lamindb 1.10.1__py3-none-any.whl → 1.11a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. lamindb/__init__.py +89 -49
  2. lamindb/_finish.py +14 -12
  3. lamindb/_tracked.py +2 -4
  4. lamindb/_view.py +1 -1
  5. lamindb/base/__init__.py +2 -1
  6. lamindb/base/dtypes.py +76 -0
  7. lamindb/core/_settings.py +45 -2
  8. lamindb/core/storage/_anndata_accessor.py +118 -26
  9. lamindb/core/storage/_backed_access.py +10 -7
  10. lamindb/core/storage/_spatialdata_accessor.py +15 -4
  11. lamindb/core/storage/_zarr.py +3 -0
  12. lamindb/curators/_legacy.py +16 -3
  13. lamindb/curators/core.py +439 -191
  14. lamindb/examples/cellxgene/__init__.py +8 -3
  15. lamindb/examples/cellxgene/_cellxgene.py +127 -13
  16. lamindb/examples/cellxgene/{cxg_schema_versions.csv → cellxgene_schema_versions.csv} +11 -0
  17. lamindb/examples/croissant/__init__.py +12 -2
  18. lamindb/examples/datasets/__init__.py +2 -2
  19. lamindb/examples/datasets/_core.py +1 -1
  20. lamindb/examples/datasets/_small.py +66 -22
  21. lamindb/examples/datasets/mini_immuno.py +1 -0
  22. lamindb/migrations/0118_alter_recordproject_value_projectrecord.py +99 -0
  23. lamindb/migrations/0119_rename_records_project_linked_in_records.py +26 -0
  24. lamindb/migrations/{0117_squashed.py → 0119_squashed.py} +92 -5
  25. lamindb/migrations/0120_add_record_fk_constraint.py +64 -0
  26. lamindb/migrations/0121_recorduser.py +53 -0
  27. lamindb/models/__init__.py +3 -1
  28. lamindb/models/_describe.py +2 -2
  29. lamindb/models/_feature_manager.py +53 -53
  30. lamindb/models/_from_values.py +2 -2
  31. lamindb/models/_is_versioned.py +4 -4
  32. lamindb/models/_label_manager.py +4 -4
  33. lamindb/models/artifact.py +336 -136
  34. lamindb/models/artifact_set.py +36 -1
  35. lamindb/models/can_curate.py +1 -2
  36. lamindb/models/collection.py +3 -34
  37. lamindb/models/feature.py +111 -7
  38. lamindb/models/has_parents.py +11 -11
  39. lamindb/models/project.py +42 -2
  40. lamindb/models/query_manager.py +16 -7
  41. lamindb/models/query_set.py +59 -34
  42. lamindb/models/record.py +25 -4
  43. lamindb/models/run.py +8 -6
  44. lamindb/models/schema.py +54 -26
  45. lamindb/models/sqlrecord.py +123 -25
  46. lamindb/models/storage.py +59 -14
  47. lamindb/models/transform.py +17 -17
  48. lamindb/models/ulabel.py +6 -1
  49. {lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/METADATA +3 -3
  50. {lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/RECORD +52 -47
  51. {lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/LICENSE +0 -0
  52. {lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/WHEEL +0 -0
lamindb/__init__.py CHANGED
@@ -83,7 +83,7 @@ Curators and integrations.
83
83
  curators
84
84
  integrations
85
85
 
86
- Low-level functionality.
86
+ Examples, errors, and setup.
87
87
 
88
88
  .. autosummary::
89
89
  :toctree: .
@@ -91,6 +91,12 @@ Low-level functionality.
91
91
  examples
92
92
  errors
93
93
  setup
94
+
95
+ Low-level functionality.
96
+
97
+ .. autosummary::
98
+ :toctree: .
99
+
94
100
  base
95
101
  core
96
102
  models
@@ -108,63 +114,97 @@ Backwards compatibility.
108
114
 
109
115
  # ruff: noqa: I001
110
116
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
111
- __version__ = "1.10.1"
117
+ __version__ = "1.11a1"
112
118
 
113
- import warnings
119
+ import warnings as _warnings
114
120
 
115
121
  # through SpatialData
116
- warnings.filterwarnings(
122
+ _warnings.filterwarnings(
117
123
  "ignore", message="The legacy Dask DataFrame implementation is deprecated"
118
124
  )
119
125
 
120
- from lamindb_setup._check_setup import InstanceNotSetupError as _InstanceNotSetupError
121
126
  from lamindb_setup._check_setup import _check_instance_setup
122
127
  from lamindb_setup._connect_instance import connect
123
128
  from lamindb_setup.core.upath import UPath
124
129
 
125
130
  from . import base, errors, setup
126
131
 
127
-
128
- def __getattr__(name):
129
- raise _InstanceNotSetupError()
130
-
131
-
132
- if _check_instance_setup(from_module="lamindb"):
133
- del __getattr__ # so that imports work out
134
- from . import base
135
- from ._tracked import tracked
136
- from ._view import view
137
- from .core._context import context
138
- from .core._settings import settings
139
- from .curators._legacy import CatManager as Curator
140
- from .models import (
141
- Artifact,
142
- Collection,
143
- Feature,
144
- FeatureSet, # backward compat
145
- Person,
146
- Project,
147
- Reference,
148
- Run,
149
- Schema,
150
- Storage,
151
- Transform,
152
- ULabel,
153
- User,
154
- Space,
155
- Branch,
156
- Record,
157
- )
158
- from .models.save import save
159
- from . import core
160
- from . import integrations
161
- from . import curators
162
- from . import examples
163
-
164
- track = context._track
165
- finish = context._finish
166
- settings.__doc__ = """Global live settings (:class:`~lamindb.core.Settings`)."""
167
- context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
168
- from django.db.models import Q
169
-
170
- Param = Feature # backward compat
132
+ _check_instance_setup(from_module="lamindb")
133
+
134
+ from ._tracked import tracked
135
+ from ._view import view
136
+ from .core._context import context
137
+ from .core._settings import settings
138
+ from .curators._legacy import CatManager as Curator
139
+ from .models import (
140
+ Artifact,
141
+ Collection,
142
+ Feature,
143
+ FeatureSet, # backward compat
144
+ Person,
145
+ Project,
146
+ Reference,
147
+ Run,
148
+ Schema,
149
+ Storage,
150
+ Transform,
151
+ ULabel,
152
+ User,
153
+ Space,
154
+ Branch,
155
+ Record,
156
+ )
157
+ from .models.save import save
158
+ from . import core
159
+ from . import integrations
160
+ from . import curators
161
+ from . import examples
162
+
163
+ track = context._track
164
+ finish = context._finish
165
+ settings.__doc__ = """Global live settings (:class:`~lamindb.core.Settings`)."""
166
+ context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
167
+ from django.db.models import Q
168
+
169
+ Param = Feature # backward compat
170
+
171
+ __all__ = [
172
+ # data lineage
173
+ "track",
174
+ "finish",
175
+ "tracked",
176
+ # registries
177
+ "Artifact",
178
+ "Storage",
179
+ "Transform",
180
+ "Run",
181
+ "Feature",
182
+ "ULabel",
183
+ "Schema",
184
+ "Record",
185
+ "User",
186
+ "Collection",
187
+ "Project",
188
+ "Space",
189
+ "Branch",
190
+ "Reference",
191
+ "Person",
192
+ # other
193
+ "connect",
194
+ "view",
195
+ "save",
196
+ "UPath",
197
+ "settings",
198
+ "context",
199
+ # curators and integrations
200
+ "curators",
201
+ "integrations",
202
+ # examples, errors, setup
203
+ "examples",
204
+ "errors",
205
+ "setup",
206
+ # low-level functionality
207
+ "base",
208
+ "core",
209
+ "models",
210
+ ]
lamindb/_finish.py CHANGED
@@ -264,12 +264,14 @@ def save_context_core(
264
264
  if (
265
265
  is_run_from_ipython and notebook_runner != "nbconvert" and filepath.exists()
266
266
  ): # python notebooks in interactive session
267
- import nbproject
268
-
269
- # it might be that the user modifies the title just before ln.finish()
270
- if (nbproject_title := nbproject.meta.live.title) != transform.description:
271
- transform.description = nbproject_title
272
- transform.save()
267
+ if is_ipynb:
268
+ # ignore this for py:percent notebooks
269
+ import nbproject
270
+
271
+ # it might be that the user modifies the title just before ln.finish()
272
+ if (nbproject_title := nbproject.meta.live.title) != transform.description:
273
+ transform.description = nbproject_title
274
+ transform.save()
273
275
  if not ln_setup._TESTING:
274
276
  save_source_code_and_report = check_filepath_recently_saved(
275
277
  filepath, is_retry
@@ -349,7 +351,7 @@ def save_context_core(
349
351
  if transform_hash != transform.hash:
350
352
  response = input(
351
353
  f"You are about to overwrite existing source code (hash '{transform.hash}') for Transform('{transform.uid}')."
352
- f" Proceed? (y/n)"
354
+ f" Proceed? (y/n) "
353
355
  )
354
356
  if response == "y":
355
357
  transform.source_code = source_code_path.read_text()
@@ -365,11 +367,11 @@ def save_context_core(
365
367
 
366
368
  if run is not None:
367
369
  base_path = ln_setup.settings.cache_dir / "environments" / f"run_{run.uid}"
368
- paths = [base_path / "run_env_pip.txt", base_path / "r_pak_lockfile.json"]
370
+ paths = [base_path / "run_env_pip.txt", base_path / "r_environment.txt"]
369
371
  existing_paths = [path for path in paths if path.exists()]
370
372
  if len(existing_paths) == 2:
371
373
  # let's not store the python environment for an R session for now
372
- existing_paths = [base_path / "r_pak_lockfile.json"]
374
+ existing_paths = [base_path / "r_environment.txt"]
373
375
 
374
376
  if existing_paths:
375
377
  overwrite_env = True
@@ -387,8 +389,8 @@ def save_context_core(
387
389
  if len(existing_paths) == 1:
388
390
  if existing_paths[0].name == "run_env_pip.txt":
389
391
  description = "requirements.txt"
390
- elif existing_paths[0].name == "r_pak_lockfile.json":
391
- description = "r_pak_lockfile.json"
392
+ elif existing_paths[0].name == "r_environment.txt":
393
+ description = "r_environment.txt"
392
394
  env_hash, _ = hash_file(artifact_path)
393
395
  else:
394
396
  description = "environments"
@@ -432,7 +434,7 @@ def save_context_core(
432
434
  hash, _ = hash_file(report_path) # ignore hash_type for now
433
435
  if hash != run.report.hash:
434
436
  response = input(
435
- f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n)"
437
+ f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n) "
436
438
  )
437
439
  if response == "y":
438
440
  run.report.replace(report_path)
lamindb/_tracked.py CHANGED
@@ -52,7 +52,7 @@ def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]
52
52
  artifact = ln.Artifact.get(key=input_artifact_key)
53
53
  df = artifact.load() # auto-tracked as input
54
54
  new_df = df.iloc[:subset_rows, :subset_cols]
55
- ln.Artifact.from_df(new_df, key=output_artifact_key).save() # auto-tracked as output
55
+ ln.Artifact.from_dataframe(new_df, key=output_artifact_key).save() # auto-tracked as output
56
56
  """
57
57
 
58
58
  def decorator_tracked(func: Callable[P, R]) -> Callable[P, R]:
@@ -104,9 +104,7 @@ def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]
104
104
  # Deal with non-trivial parameter values
105
105
  filtered_params = {}
106
106
  for key, value in params.items():
107
- dtype, _, _ = infer_feature_type_convert_json(
108
- key, value, str_as_ulabel=False
109
- )
107
+ dtype, _, _ = infer_feature_type_convert_json(key, value)
110
108
  if (dtype == "?" or dtype.startswith("cat")) and dtype != "cat ? str":
111
109
  continue
112
110
  filtered_params[key] = value
lamindb/_view.py CHANGED
@@ -162,7 +162,7 @@ def view(
162
162
  logger.print(section)
163
163
  logger.print("*" * len(section_no_color))
164
164
  for registry in sorted(filtered_registries, key=lambda x: x.__name__):
165
- df = registry.df(limit=limit)
165
+ df = registry.to_dataframe(limit=limit)
166
166
  if df.shape[0] > 0:
167
167
  logger.print(colors.blue(colors.bold(registry.__name__)))
168
168
  show(df)
lamindb/base/__init__.py CHANGED
@@ -10,6 +10,7 @@ Modules:
10
10
  uids
11
11
  types
12
12
  fields
13
+ dtypes
13
14
 
14
15
  Utils:
15
16
 
@@ -23,4 +24,4 @@ Utils:
23
24
 
24
25
  from lamindb_setup.core import deprecated, doc_args
25
26
 
26
- from . import fields, types, uids
27
+ from . import dtypes, fields, types, uids
lamindb/base/dtypes.py ADDED
@@ -0,0 +1,76 @@
1
+ from datetime import datetime
2
+ from typing import Any, Callable, Iterable
3
+
4
+ import pandas as pd
5
+
6
+
7
+ def is_list_of_type(value: Any, expected_type: Any) -> bool:
8
+ """Helper function to check if a value is either of expected_type or a list of that type, or a mix of both in a nested structure."""
9
+ if isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
10
+ # handle nested lists recursively
11
+ return all(is_list_of_type(item, expected_type) for item in value)
12
+ return isinstance(value, expected_type)
13
+
14
+
15
+ def check_dtype(expected_type: Any) -> Callable:
16
+ """Creates a check function for Pandera that validates a column's dtype.
17
+
18
+ Supports both standard dtype checking and mixed list/single values for the same type.
19
+ For example, a column with expected_type 'float' would also accept a mix of float values and lists of floats.
20
+
21
+ Args:
22
+ expected_type: String identifier for the expected type ('int', 'float', 'num', 'str')
23
+
24
+ Returns:
25
+ A function that checks if a series has the expected dtype or contains mixed types
26
+ """
27
+
28
+ def check_function(series):
29
+ # first check if the series is entirely of the expected dtype (fast path)
30
+ if expected_type == "int" and pd.api.types.is_integer_dtype(series.dtype):
31
+ return True
32
+ elif expected_type == "float" and pd.api.types.is_float_dtype(series.dtype):
33
+ return True
34
+ elif expected_type == "num" and pd.api.types.is_numeric_dtype(series.dtype):
35
+ return True
36
+ elif expected_type == "str" and pd.api.types.is_string_dtype(series.dtype):
37
+ return True
38
+ elif expected_type == "path" and pd.api.types.is_string_dtype(series.dtype):
39
+ return True
40
+
41
+ # if we're here, it might be a mixed column with object dtype
42
+ # need to check each value individually
43
+ if series.dtype == "object" and expected_type.startswith("list"):
44
+ expected_type_member = expected_type.replace("list[", "").removesuffix("]")
45
+ if expected_type_member == "int":
46
+ return series.apply(lambda x: is_list_of_type(x, int)).all()
47
+ elif expected_type_member == "float":
48
+ return series.apply(lambda x: is_list_of_type(x, float)).all()
49
+ elif expected_type_member == "num":
50
+ # for numeric, accept either int or float
51
+ return series.apply(lambda x: is_list_of_type(x, (int, float))).all()
52
+ elif (
53
+ expected_type_member == "str"
54
+ or expected_type_member == "path"
55
+ or expected_type_member.startswith("cat[")
56
+ ):
57
+ return series.apply(lambda x: is_list_of_type(x, str)).all()
58
+
59
+ # if we get here, the validation failed
60
+ return False
61
+
62
+ return check_function
63
+
64
+
65
+ def is_valid_datetime_str(date_string: str) -> bool | str:
66
+ try:
67
+ dt = datetime.fromisoformat(date_string)
68
+ return dt.isoformat()
69
+ except ValueError:
70
+ return False
71
+
72
+
73
+ def is_iterable_of_sqlrecord(value: Any):
74
+ from lamindb.models import SQLRecord
75
+
76
+ return isinstance(value, Iterable) and isinstance(next(iter(value)), SQLRecord)
lamindb/core/_settings.py CHANGED
@@ -10,6 +10,7 @@ from lamindb_setup import settings as setup_settings
10
10
  from lamindb_setup._set_managed_storage import set_managed_storage
11
11
  from lamindb_setup.core import deprecated
12
12
  from lamindb_setup.core._settings_instance import sanitize_git_repo_url
13
+ from lamindb_setup.core._settings_storage import StorageSettings
13
14
 
14
15
  from .subsettings._annotation_settings import AnnotationSettings, annotation_settings
15
16
  from .subsettings._creation_settings import CreationSettings, creation_settings
@@ -18,7 +19,6 @@ if TYPE_CHECKING:
18
19
  from collections.abc import Mapping
19
20
  from pathlib import Path
20
21
 
21
- from lamindb_setup.core._settings_storage import StorageSettings
22
22
  from upath import UPath
23
23
 
24
24
 
@@ -193,13 +193,39 @@ class Settings:
193
193
 
194
194
  @storage.setter
195
195
  def storage(self, path_kwargs: str | Path | UPath | tuple[str | UPath, Mapping]):
196
+ import lamindb as ln
197
+
196
198
  if isinstance(path_kwargs, tuple):
197
199
  path, kwargs = path_kwargs
200
+ # we should ultimately deprecate passing host here, I think
198
201
  if isinstance(kwargs, str):
199
202
  kwargs = {"host": kwargs}
200
203
  else:
201
204
  path, kwargs = path_kwargs, {}
202
- set_managed_storage(path, **kwargs)
205
+ ssettings = StorageSettings(root=path) # there is no need to pass kwargs here!
206
+ exists = ln.Storage.filter(root=ssettings.root_as_str).one_or_none()
207
+ if exists is None:
208
+ response = input(
209
+ f"Storage location {ssettings.root_as_str} does not yet exist. Do you want to continue with creating it? (y/n) "
210
+ )
211
+ # logger.warning(f"deprecated call because storage location does **not yet** exist; going forward, please create through ln.Storage(root={path}).save() going forward")
212
+ if response != "y":
213
+ return None
214
+ set_managed_storage(path, **kwargs)
215
+ else:
216
+ if exists.instance_uid != ln_setup.settings.instance.uid:
217
+ raise ValueError(
218
+ f"Storage {ssettings.root_as_str} exists in another instance ({exists.instance_uid}), cannot write to it from here."
219
+ )
220
+ ssettings = StorageSettings(
221
+ root=exists.root,
222
+ region=exists.region,
223
+ uid=exists.uid,
224
+ instance_id=ln_setup.settings.instance._id,
225
+ )
226
+ ln_setup.settings.instance._storage = ssettings
227
+ kwargs.pop("host", None) # host is not needed for existing storage
228
+ settings.storage._set_fs_kwargs(**kwargs)
203
229
 
204
230
  @property
205
231
  def instance_uid(self) -> str:
@@ -223,6 +249,23 @@ class Settings:
223
249
 
224
250
  @local_storage.setter
225
251
  def local_storage(self, local_root: Path):
252
+ import lamindb as ln
253
+
254
+ # note duplication with storage setter!
255
+ ssettings = StorageSettings(root=local_root)
256
+ exists = ln.Storage.filter(root=ssettings.root_as_str).one_or_none()
257
+ if exists is None:
258
+ response = input(
259
+ f"Storage location {ssettings.root_as_str} does not yet exist. Do you want to continue with creating it? (y/n) "
260
+ )
261
+ # logger.warning(f"deprecated call because storage location does **not yet** exist; going forward, please create through ln.Storage(root={path}).save() going forward")
262
+ if response != "y":
263
+ return None
264
+ else:
265
+ if exists.instance_uid != ln_setup.settings.instance.uid:
266
+ raise ValueError(
267
+ f"Storage {ssettings.root_as_str} exists in another instance ({exists.instance_uid}), cannot write to it from here."
268
+ )
226
269
  ln_setup.settings.instance.local_storage = local_root
227
270
 
228
271
  @property