lamindb 1.10.1__py3-none-any.whl → 1.11a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +89 -49
- lamindb/_finish.py +14 -12
- lamindb/_tracked.py +2 -4
- lamindb/_view.py +1 -1
- lamindb/base/__init__.py +2 -1
- lamindb/base/dtypes.py +76 -0
- lamindb/core/_settings.py +45 -2
- lamindb/core/storage/_anndata_accessor.py +118 -26
- lamindb/core/storage/_backed_access.py +10 -7
- lamindb/core/storage/_spatialdata_accessor.py +15 -4
- lamindb/core/storage/_zarr.py +3 -0
- lamindb/curators/_legacy.py +16 -3
- lamindb/curators/core.py +439 -191
- lamindb/examples/cellxgene/__init__.py +8 -3
- lamindb/examples/cellxgene/_cellxgene.py +127 -13
- lamindb/examples/cellxgene/{cxg_schema_versions.csv → cellxgene_schema_versions.csv} +11 -0
- lamindb/examples/croissant/__init__.py +12 -2
- lamindb/examples/datasets/__init__.py +2 -2
- lamindb/examples/datasets/_core.py +1 -1
- lamindb/examples/datasets/_small.py +66 -22
- lamindb/examples/datasets/mini_immuno.py +1 -0
- lamindb/migrations/0118_alter_recordproject_value_projectrecord.py +99 -0
- lamindb/migrations/0119_rename_records_project_linked_in_records.py +26 -0
- lamindb/migrations/{0117_squashed.py → 0119_squashed.py} +92 -5
- lamindb/migrations/0120_add_record_fk_constraint.py +64 -0
- lamindb/migrations/0121_recorduser.py +53 -0
- lamindb/models/__init__.py +3 -1
- lamindb/models/_describe.py +2 -2
- lamindb/models/_feature_manager.py +53 -53
- lamindb/models/_from_values.py +2 -2
- lamindb/models/_is_versioned.py +4 -4
- lamindb/models/_label_manager.py +4 -4
- lamindb/models/artifact.py +336 -136
- lamindb/models/artifact_set.py +36 -1
- lamindb/models/can_curate.py +1 -2
- lamindb/models/collection.py +3 -34
- lamindb/models/feature.py +111 -7
- lamindb/models/has_parents.py +11 -11
- lamindb/models/project.py +42 -2
- lamindb/models/query_manager.py +16 -7
- lamindb/models/query_set.py +59 -34
- lamindb/models/record.py +25 -4
- lamindb/models/run.py +8 -6
- lamindb/models/schema.py +54 -26
- lamindb/models/sqlrecord.py +123 -25
- lamindb/models/storage.py +59 -14
- lamindb/models/transform.py +17 -17
- lamindb/models/ulabel.py +6 -1
- {lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/METADATA +3 -3
- {lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/RECORD +52 -47
- {lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/LICENSE +0 -0
- {lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
@@ -83,7 +83,7 @@ Curators and integrations.
|
|
83
83
|
curators
|
84
84
|
integrations
|
85
85
|
|
86
|
-
|
86
|
+
Examples, errors, and setup.
|
87
87
|
|
88
88
|
.. autosummary::
|
89
89
|
:toctree: .
|
@@ -91,6 +91,12 @@ Low-level functionality.
|
|
91
91
|
examples
|
92
92
|
errors
|
93
93
|
setup
|
94
|
+
|
95
|
+
Low-level functionality.
|
96
|
+
|
97
|
+
.. autosummary::
|
98
|
+
:toctree: .
|
99
|
+
|
94
100
|
base
|
95
101
|
core
|
96
102
|
models
|
@@ -108,63 +114,97 @@ Backwards compatibility.
|
|
108
114
|
|
109
115
|
# ruff: noqa: I001
|
110
116
|
# denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
|
111
|
-
__version__ = "1.
|
117
|
+
__version__ = "1.11a1"
|
112
118
|
|
113
|
-
import warnings
|
119
|
+
import warnings as _warnings
|
114
120
|
|
115
121
|
# through SpatialData
|
116
|
-
|
122
|
+
_warnings.filterwarnings(
|
117
123
|
"ignore", message="The legacy Dask DataFrame implementation is deprecated"
|
118
124
|
)
|
119
125
|
|
120
|
-
from lamindb_setup._check_setup import InstanceNotSetupError as _InstanceNotSetupError
|
121
126
|
from lamindb_setup._check_setup import _check_instance_setup
|
122
127
|
from lamindb_setup._connect_instance import connect
|
123
128
|
from lamindb_setup.core.upath import UPath
|
124
129
|
|
125
130
|
from . import base, errors, setup
|
126
131
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
132
|
+
_check_instance_setup(from_module="lamindb")
|
133
|
+
|
134
|
+
from ._tracked import tracked
|
135
|
+
from ._view import view
|
136
|
+
from .core._context import context
|
137
|
+
from .core._settings import settings
|
138
|
+
from .curators._legacy import CatManager as Curator
|
139
|
+
from .models import (
|
140
|
+
Artifact,
|
141
|
+
Collection,
|
142
|
+
Feature,
|
143
|
+
FeatureSet, # backward compat
|
144
|
+
Person,
|
145
|
+
Project,
|
146
|
+
Reference,
|
147
|
+
Run,
|
148
|
+
Schema,
|
149
|
+
Storage,
|
150
|
+
Transform,
|
151
|
+
ULabel,
|
152
|
+
User,
|
153
|
+
Space,
|
154
|
+
Branch,
|
155
|
+
Record,
|
156
|
+
)
|
157
|
+
from .models.save import save
|
158
|
+
from . import core
|
159
|
+
from . import integrations
|
160
|
+
from . import curators
|
161
|
+
from . import examples
|
162
|
+
|
163
|
+
track = context._track
|
164
|
+
finish = context._finish
|
165
|
+
settings.__doc__ = """Global live settings (:class:`~lamindb.core.Settings`)."""
|
166
|
+
context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
|
167
|
+
from django.db.models import Q
|
168
|
+
|
169
|
+
Param = Feature # backward compat
|
170
|
+
|
171
|
+
__all__ = [
|
172
|
+
# data lineage
|
173
|
+
"track",
|
174
|
+
"finish",
|
175
|
+
"tracked",
|
176
|
+
# registries
|
177
|
+
"Artifact",
|
178
|
+
"Storage",
|
179
|
+
"Transform",
|
180
|
+
"Run",
|
181
|
+
"Feature",
|
182
|
+
"ULabel",
|
183
|
+
"Schema",
|
184
|
+
"Record",
|
185
|
+
"User",
|
186
|
+
"Collection",
|
187
|
+
"Project",
|
188
|
+
"Space",
|
189
|
+
"Branch",
|
190
|
+
"Reference",
|
191
|
+
"Person",
|
192
|
+
# other
|
193
|
+
"connect",
|
194
|
+
"view",
|
195
|
+
"save",
|
196
|
+
"UPath",
|
197
|
+
"settings",
|
198
|
+
"context",
|
199
|
+
# curators and integrations
|
200
|
+
"curators",
|
201
|
+
"integrations",
|
202
|
+
# examples, errors, setup
|
203
|
+
"examples",
|
204
|
+
"errors",
|
205
|
+
"setup",
|
206
|
+
# low-level functionality
|
207
|
+
"base",
|
208
|
+
"core",
|
209
|
+
"models",
|
210
|
+
]
|
lamindb/_finish.py
CHANGED
@@ -264,12 +264,14 @@ def save_context_core(
|
|
264
264
|
if (
|
265
265
|
is_run_from_ipython and notebook_runner != "nbconvert" and filepath.exists()
|
266
266
|
): # python notebooks in interactive session
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
transform.
|
267
|
+
if is_ipynb:
|
268
|
+
# ignore this for py:percent notebooks
|
269
|
+
import nbproject
|
270
|
+
|
271
|
+
# it might be that the user modifies the title just before ln.finish()
|
272
|
+
if (nbproject_title := nbproject.meta.live.title) != transform.description:
|
273
|
+
transform.description = nbproject_title
|
274
|
+
transform.save()
|
273
275
|
if not ln_setup._TESTING:
|
274
276
|
save_source_code_and_report = check_filepath_recently_saved(
|
275
277
|
filepath, is_retry
|
@@ -349,7 +351,7 @@ def save_context_core(
|
|
349
351
|
if transform_hash != transform.hash:
|
350
352
|
response = input(
|
351
353
|
f"You are about to overwrite existing source code (hash '{transform.hash}') for Transform('{transform.uid}')."
|
352
|
-
f" Proceed? (y/n)"
|
354
|
+
f" Proceed? (y/n) "
|
353
355
|
)
|
354
356
|
if response == "y":
|
355
357
|
transform.source_code = source_code_path.read_text()
|
@@ -365,11 +367,11 @@ def save_context_core(
|
|
365
367
|
|
366
368
|
if run is not None:
|
367
369
|
base_path = ln_setup.settings.cache_dir / "environments" / f"run_{run.uid}"
|
368
|
-
paths = [base_path / "run_env_pip.txt", base_path / "
|
370
|
+
paths = [base_path / "run_env_pip.txt", base_path / "r_environment.txt"]
|
369
371
|
existing_paths = [path for path in paths if path.exists()]
|
370
372
|
if len(existing_paths) == 2:
|
371
373
|
# let's not store the python environment for an R session for now
|
372
|
-
existing_paths = [base_path / "
|
374
|
+
existing_paths = [base_path / "r_environment.txt"]
|
373
375
|
|
374
376
|
if existing_paths:
|
375
377
|
overwrite_env = True
|
@@ -387,8 +389,8 @@ def save_context_core(
|
|
387
389
|
if len(existing_paths) == 1:
|
388
390
|
if existing_paths[0].name == "run_env_pip.txt":
|
389
391
|
description = "requirements.txt"
|
390
|
-
elif existing_paths[0].name == "
|
391
|
-
description = "
|
392
|
+
elif existing_paths[0].name == "r_environment.txt":
|
393
|
+
description = "r_environment.txt"
|
392
394
|
env_hash, _ = hash_file(artifact_path)
|
393
395
|
else:
|
394
396
|
description = "environments"
|
@@ -432,7 +434,7 @@ def save_context_core(
|
|
432
434
|
hash, _ = hash_file(report_path) # ignore hash_type for now
|
433
435
|
if hash != run.report.hash:
|
434
436
|
response = input(
|
435
|
-
f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n)"
|
437
|
+
f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n) "
|
436
438
|
)
|
437
439
|
if response == "y":
|
438
440
|
run.report.replace(report_path)
|
lamindb/_tracked.py
CHANGED
@@ -52,7 +52,7 @@ def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]
|
|
52
52
|
artifact = ln.Artifact.get(key=input_artifact_key)
|
53
53
|
df = artifact.load() # auto-tracked as input
|
54
54
|
new_df = df.iloc[:subset_rows, :subset_cols]
|
55
|
-
ln.Artifact.
|
55
|
+
ln.Artifact.from_dataframe(new_df, key=output_artifact_key).save() # auto-tracked as output
|
56
56
|
"""
|
57
57
|
|
58
58
|
def decorator_tracked(func: Callable[P, R]) -> Callable[P, R]:
|
@@ -104,9 +104,7 @@ def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]
|
|
104
104
|
# Deal with non-trivial parameter values
|
105
105
|
filtered_params = {}
|
106
106
|
for key, value in params.items():
|
107
|
-
dtype, _, _ = infer_feature_type_convert_json(
|
108
|
-
key, value, str_as_ulabel=False
|
109
|
-
)
|
107
|
+
dtype, _, _ = infer_feature_type_convert_json(key, value)
|
110
108
|
if (dtype == "?" or dtype.startswith("cat")) and dtype != "cat ? str":
|
111
109
|
continue
|
112
110
|
filtered_params[key] = value
|
lamindb/_view.py
CHANGED
@@ -162,7 +162,7 @@ def view(
|
|
162
162
|
logger.print(section)
|
163
163
|
logger.print("*" * len(section_no_color))
|
164
164
|
for registry in sorted(filtered_registries, key=lambda x: x.__name__):
|
165
|
-
df = registry.
|
165
|
+
df = registry.to_dataframe(limit=limit)
|
166
166
|
if df.shape[0] > 0:
|
167
167
|
logger.print(colors.blue(colors.bold(registry.__name__)))
|
168
168
|
show(df)
|
lamindb/base/__init__.py
CHANGED
lamindb/base/dtypes.py
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from typing import Any, Callable, Iterable
|
3
|
+
|
4
|
+
import pandas as pd
|
5
|
+
|
6
|
+
|
7
|
+
def is_list_of_type(value: Any, expected_type: Any) -> bool:
|
8
|
+
"""Helper function to check if a value is either of expected_type or a list of that type, or a mix of both in a nested structure."""
|
9
|
+
if isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
|
10
|
+
# handle nested lists recursively
|
11
|
+
return all(is_list_of_type(item, expected_type) for item in value)
|
12
|
+
return isinstance(value, expected_type)
|
13
|
+
|
14
|
+
|
15
|
+
def check_dtype(expected_type: Any) -> Callable:
|
16
|
+
"""Creates a check function for Pandera that validates a column's dtype.
|
17
|
+
|
18
|
+
Supports both standard dtype checking and mixed list/single values for the same type.
|
19
|
+
For example, a column with expected_type 'float' would also accept a mix of float values and lists of floats.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
expected_type: String identifier for the expected type ('int', 'float', 'num', 'str')
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
A function that checks if a series has the expected dtype or contains mixed types
|
26
|
+
"""
|
27
|
+
|
28
|
+
def check_function(series):
|
29
|
+
# first check if the series is entirely of the expected dtype (fast path)
|
30
|
+
if expected_type == "int" and pd.api.types.is_integer_dtype(series.dtype):
|
31
|
+
return True
|
32
|
+
elif expected_type == "float" and pd.api.types.is_float_dtype(series.dtype):
|
33
|
+
return True
|
34
|
+
elif expected_type == "num" and pd.api.types.is_numeric_dtype(series.dtype):
|
35
|
+
return True
|
36
|
+
elif expected_type == "str" and pd.api.types.is_string_dtype(series.dtype):
|
37
|
+
return True
|
38
|
+
elif expected_type == "path" and pd.api.types.is_string_dtype(series.dtype):
|
39
|
+
return True
|
40
|
+
|
41
|
+
# if we're here, it might be a mixed column with object dtype
|
42
|
+
# need to check each value individually
|
43
|
+
if series.dtype == "object" and expected_type.startswith("list"):
|
44
|
+
expected_type_member = expected_type.replace("list[", "").removesuffix("]")
|
45
|
+
if expected_type_member == "int":
|
46
|
+
return series.apply(lambda x: is_list_of_type(x, int)).all()
|
47
|
+
elif expected_type_member == "float":
|
48
|
+
return series.apply(lambda x: is_list_of_type(x, float)).all()
|
49
|
+
elif expected_type_member == "num":
|
50
|
+
# for numeric, accept either int or float
|
51
|
+
return series.apply(lambda x: is_list_of_type(x, (int, float))).all()
|
52
|
+
elif (
|
53
|
+
expected_type_member == "str"
|
54
|
+
or expected_type_member == "path"
|
55
|
+
or expected_type_member.startswith("cat[")
|
56
|
+
):
|
57
|
+
return series.apply(lambda x: is_list_of_type(x, str)).all()
|
58
|
+
|
59
|
+
# if we get here, the validation failed
|
60
|
+
return False
|
61
|
+
|
62
|
+
return check_function
|
63
|
+
|
64
|
+
|
65
|
+
def is_valid_datetime_str(date_string: str) -> bool | str:
|
66
|
+
try:
|
67
|
+
dt = datetime.fromisoformat(date_string)
|
68
|
+
return dt.isoformat()
|
69
|
+
except ValueError:
|
70
|
+
return False
|
71
|
+
|
72
|
+
|
73
|
+
def is_iterable_of_sqlrecord(value: Any):
|
74
|
+
from lamindb.models import SQLRecord
|
75
|
+
|
76
|
+
return isinstance(value, Iterable) and isinstance(next(iter(value)), SQLRecord)
|
lamindb/core/_settings.py
CHANGED
@@ -10,6 +10,7 @@ from lamindb_setup import settings as setup_settings
|
|
10
10
|
from lamindb_setup._set_managed_storage import set_managed_storage
|
11
11
|
from lamindb_setup.core import deprecated
|
12
12
|
from lamindb_setup.core._settings_instance import sanitize_git_repo_url
|
13
|
+
from lamindb_setup.core._settings_storage import StorageSettings
|
13
14
|
|
14
15
|
from .subsettings._annotation_settings import AnnotationSettings, annotation_settings
|
15
16
|
from .subsettings._creation_settings import CreationSettings, creation_settings
|
@@ -18,7 +19,6 @@ if TYPE_CHECKING:
|
|
18
19
|
from collections.abc import Mapping
|
19
20
|
from pathlib import Path
|
20
21
|
|
21
|
-
from lamindb_setup.core._settings_storage import StorageSettings
|
22
22
|
from upath import UPath
|
23
23
|
|
24
24
|
|
@@ -193,13 +193,39 @@ class Settings:
|
|
193
193
|
|
194
194
|
@storage.setter
|
195
195
|
def storage(self, path_kwargs: str | Path | UPath | tuple[str | UPath, Mapping]):
|
196
|
+
import lamindb as ln
|
197
|
+
|
196
198
|
if isinstance(path_kwargs, tuple):
|
197
199
|
path, kwargs = path_kwargs
|
200
|
+
# we should ultimately deprecate passing host here, I think
|
198
201
|
if isinstance(kwargs, str):
|
199
202
|
kwargs = {"host": kwargs}
|
200
203
|
else:
|
201
204
|
path, kwargs = path_kwargs, {}
|
202
|
-
|
205
|
+
ssettings = StorageSettings(root=path) # there is no need to pass kwargs here!
|
206
|
+
exists = ln.Storage.filter(root=ssettings.root_as_str).one_or_none()
|
207
|
+
if exists is None:
|
208
|
+
response = input(
|
209
|
+
f"Storage location {ssettings.root_as_str} does not yet exist. Do you want to continue with creating it? (y/n) "
|
210
|
+
)
|
211
|
+
# logger.warning(f"deprecated call because storage location does **not yet** exist; going forward, please create through ln.Storage(root={path}).save() going forward")
|
212
|
+
if response != "y":
|
213
|
+
return None
|
214
|
+
set_managed_storage(path, **kwargs)
|
215
|
+
else:
|
216
|
+
if exists.instance_uid != ln_setup.settings.instance.uid:
|
217
|
+
raise ValueError(
|
218
|
+
f"Storage {ssettings.root_as_str} exists in another instance ({exists.instance_uid}), cannot write to it from here."
|
219
|
+
)
|
220
|
+
ssettings = StorageSettings(
|
221
|
+
root=exists.root,
|
222
|
+
region=exists.region,
|
223
|
+
uid=exists.uid,
|
224
|
+
instance_id=ln_setup.settings.instance._id,
|
225
|
+
)
|
226
|
+
ln_setup.settings.instance._storage = ssettings
|
227
|
+
kwargs.pop("host", None) # host is not needed for existing storage
|
228
|
+
settings.storage._set_fs_kwargs(**kwargs)
|
203
229
|
|
204
230
|
@property
|
205
231
|
def instance_uid(self) -> str:
|
@@ -223,6 +249,23 @@ class Settings:
|
|
223
249
|
|
224
250
|
@local_storage.setter
|
225
251
|
def local_storage(self, local_root: Path):
|
252
|
+
import lamindb as ln
|
253
|
+
|
254
|
+
# note duplication with storage setter!
|
255
|
+
ssettings = StorageSettings(root=local_root)
|
256
|
+
exists = ln.Storage.filter(root=ssettings.root_as_str).one_or_none()
|
257
|
+
if exists is None:
|
258
|
+
response = input(
|
259
|
+
f"Storage location {ssettings.root_as_str} does not yet exist. Do you want to continue with creating it? (y/n) "
|
260
|
+
)
|
261
|
+
# logger.warning(f"deprecated call because storage location does **not yet** exist; going forward, please create through ln.Storage(root={path}).save() going forward")
|
262
|
+
if response != "y":
|
263
|
+
return None
|
264
|
+
else:
|
265
|
+
if exists.instance_uid != ln_setup.settings.instance.uid:
|
266
|
+
raise ValueError(
|
267
|
+
f"Storage {ssettings.root_as_str} exists in another instance ({exists.instance_uid}), cannot write to it from here."
|
268
|
+
)
|
226
269
|
ln_setup.settings.instance.local_storage = local_root
|
227
270
|
|
228
271
|
@property
|