lamindb 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/core/_context.py +6 -0
- lamindb/core/datasets/__init__.py +1 -0
- lamindb/core/datasets/_core.py +23 -0
- lamindb/core/datasets/_small.py +16 -2
- lamindb/core/storage/objects.py +1 -2
- lamindb/curators/__init__.py +1269 -1513
- lamindb/curators/_cellxgene_schemas/__init__.py +190 -18
- lamindb/curators/_cellxgene_schemas/schema_versions.csv +43 -0
- lamindb/models/_feature_manager.py +65 -14
- lamindb/models/_from_values.py +113 -78
- lamindb/models/artifact.py +138 -95
- lamindb/models/can_curate.py +185 -216
- lamindb/models/feature.py +32 -2
- lamindb/models/project.py +69 -7
- lamindb/models/record.py +43 -25
- lamindb/models/run.py +18 -1
- lamindb/models/schema.py +0 -8
- {lamindb-1.2.0.dist-info → lamindb-1.3.0.dist-info}/METADATA +6 -5
- {lamindb-1.2.0.dist-info → lamindb-1.3.0.dist-info}/RECORD +22 -22
- lamindb/curators/_cellxgene_schemas/schema_versions.yml +0 -104
- {lamindb-1.2.0.dist-info → lamindb-1.3.0.dist-info}/LICENSE +0 -0
- {lamindb-1.2.0.dist-info → lamindb-1.3.0.dist-info}/WHEEL +0 -0
lamindb/models/artifact.py
CHANGED
@@ -127,7 +127,7 @@ if TYPE_CHECKING:
|
|
127
127
|
INCONSISTENT_STATE_MSG = (
|
128
128
|
"Trying to read a folder artifact from an outdated version, "
|
129
129
|
"this can result in an incosistent state.\n"
|
130
|
-
"Read from the latest version: artifact.versions.
|
130
|
+
"Read from the latest version: artifact.versions.get(is_latest=True)"
|
131
131
|
)
|
132
132
|
|
133
133
|
|
@@ -602,10 +602,10 @@ def get_run(run: Run | None) -> Run | None:
|
|
602
602
|
run = context.run
|
603
603
|
if run is None and not settings.creation.artifact_silence_missing_run_warning:
|
604
604
|
# here we check that this is not a read-only connection
|
605
|
-
# normally for our connection strings the read-only role name has
|
605
|
+
# normally for our connection strings the read-only role name has "read" in it
|
606
606
|
# not absolutely safe but the worst case is that the warning is not shown
|
607
607
|
instance = setup_settings.instance
|
608
|
-
if instance.dialect != "postgresql" or "
|
608
|
+
if instance.dialect != "postgresql" or "read" not in instance.db:
|
609
609
|
logger.warning(WARNING_RUN_TRANSFORM)
|
610
610
|
# suppress run by passing False
|
611
611
|
elif not run:
|
@@ -1468,39 +1468,23 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1468
1468
|
def n_objects(self) -> int:
|
1469
1469
|
return self.n_files
|
1470
1470
|
|
1471
|
-
# add the below because this is what people will have in their code
|
1472
|
-
# if they implement the recommended migration strategy
|
1473
|
-
# - FeatureSet -> Schema
|
1474
|
-
# - featureset -> schema
|
1475
|
-
# - feature_set -> schema
|
1476
|
-
# @property
|
1477
|
-
# def schemas(self) -> QuerySet[Schema]:
|
1478
|
-
# """Schemas linked to artifact via many-to-many relationship.
|
1479
|
-
|
1480
|
-
# Is now mediating the private `.feature_sets` relationship during
|
1481
|
-
# a transition period to better schema management.
|
1482
|
-
|
1483
|
-
# .. versionchanged: 1.0
|
1484
|
-
# Was previously called `.feature_sets`.
|
1485
|
-
|
1486
|
-
# """
|
1487
|
-
# return self.feature_sets
|
1488
|
-
|
1489
1471
|
@property
|
1490
1472
|
def path(self) -> Path:
|
1491
1473
|
"""Path.
|
1492
1474
|
|
1493
|
-
|
1475
|
+
Example::
|
1494
1476
|
|
1495
|
-
|
1496
|
-
>>> artifact.path
|
1497
|
-
S3QueryPath('s3://my-bucket/my-file.csv')
|
1477
|
+
import lamindb as ln
|
1498
1478
|
|
1499
|
-
|
1479
|
+
# File in cloud storage, here AWS S3:
|
1480
|
+
artifact = ln.Artifact("s3://my-bucket/my-file.csv").save()
|
1481
|
+
artifact.path
|
1482
|
+
#S3QueryPath('s3://my-bucket/my-file.csv')
|
1500
1483
|
|
1501
|
-
|
1502
|
-
|
1503
|
-
|
1484
|
+
# File in local storage:
|
1485
|
+
ln.Artifact("./myfile.csv", key="myfile.csv").save()
|
1486
|
+
artifact.path
|
1487
|
+
#> PosixPath('/home/runner/work/lamindb/lamindb/docs/guide/mydata/myfile.csv')
|
1504
1488
|
"""
|
1505
1489
|
from lamindb import settings
|
1506
1490
|
|
@@ -1520,6 +1504,34 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1520
1504
|
filepath, cache_key=cache_key
|
1521
1505
|
)
|
1522
1506
|
|
1507
|
+
@classmethod
|
1508
|
+
def get(
|
1509
|
+
cls,
|
1510
|
+
idlike: int | str | None = None,
|
1511
|
+
**expressions,
|
1512
|
+
) -> Artifact:
|
1513
|
+
"""Get a single artifact.
|
1514
|
+
|
1515
|
+
Args:
|
1516
|
+
idlike: Either a uid stub, uid or an integer id.
|
1517
|
+
expressions: Fields and values passed as Django query expressions.
|
1518
|
+
|
1519
|
+
Raises:
|
1520
|
+
:exc:`docs:lamindb.errors.DoesNotExist`: In case no matching record is found.
|
1521
|
+
|
1522
|
+
See Also:
|
1523
|
+
- Guide: :doc:`docs:registries`
|
1524
|
+
- Method in `Record` base class: :meth:`~lamindb.models.Record.get`
|
1525
|
+
|
1526
|
+
Examples::
|
1527
|
+
|
1528
|
+
artifact = ln.Artifact.get("tCUkRcaEjTjhtozp0000")
|
1529
|
+
artifact = ln.Arfifact.get(key="my_datasets/my_file.parquet")
|
1530
|
+
"""
|
1531
|
+
from .query_set import QuerySet
|
1532
|
+
|
1533
|
+
return QuerySet(model=cls).get(idlike, **expressions)
|
1534
|
+
|
1523
1535
|
@classmethod
|
1524
1536
|
def from_df(
|
1525
1537
|
cls,
|
@@ -1547,17 +1559,19 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1547
1559
|
:class:`~lamindb.Feature`
|
1548
1560
|
Track features.
|
1549
1561
|
|
1550
|
-
|
1551
|
-
|
1552
|
-
|
1553
|
-
|
1554
|
-
|
1555
|
-
|
1556
|
-
|
1557
|
-
|
1558
|
-
|
1559
|
-
|
1560
|
-
|
1562
|
+
Example::
|
1563
|
+
|
1564
|
+
import lamindb as ln
|
1565
|
+
|
1566
|
+
df = ln.core.datasets.df_iris_in_meter_batch1()
|
1567
|
+
df.head()
|
1568
|
+
#> sepal_length sepal_width petal_length petal_width iris_organism_code
|
1569
|
+
#> 0 0.051 0.035 0.014 0.002 0
|
1570
|
+
#> 1 0.049 0.030 0.014 0.002 0
|
1571
|
+
#> 2 0.047 0.032 0.013 0.002 0
|
1572
|
+
#> 3 0.046 0.031 0.015 0.002 0
|
1573
|
+
#> 4 0.050 0.036 0.014 0.002 0
|
1574
|
+
artifact = ln.Artifact.from_df(df, key="iris/result_batch1.parquet").save()
|
1561
1575
|
"""
|
1562
1576
|
artifact = Artifact( # type: ignore
|
1563
1577
|
data=df,
|
@@ -1600,12 +1614,12 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1600
1614
|
:class:`~lamindb.Feature`
|
1601
1615
|
Track features.
|
1602
1616
|
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
1606
|
-
|
1607
|
-
|
1608
|
-
|
1617
|
+
Example::
|
1618
|
+
|
1619
|
+
import lamindb as ln
|
1620
|
+
|
1621
|
+
adata = ln.core.datasets.anndata_with_obs()
|
1622
|
+
artifact = ln.Artifact.from_anndata(adata, key="mini_anndata_with_obs.h5ad").save()
|
1609
1623
|
"""
|
1610
1624
|
if not data_is_anndata(adata):
|
1611
1625
|
raise ValueError(
|
@@ -1662,12 +1676,12 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1662
1676
|
:class:`~lamindb.Feature`
|
1663
1677
|
Track features.
|
1664
1678
|
|
1665
|
-
|
1666
|
-
|
1667
|
-
|
1668
|
-
|
1669
|
-
|
1670
|
-
|
1679
|
+
Example::
|
1680
|
+
|
1681
|
+
import lamindb as ln
|
1682
|
+
|
1683
|
+
mdata = ln.core.datasets.mudata_papalexi21_subset()
|
1684
|
+
artifact = ln.Artifact.from_mudata(mdata, key="mudata_papalexi21_subset.h5mu").save()
|
1671
1685
|
"""
|
1672
1686
|
if not data_is_mudata(mdata):
|
1673
1687
|
raise ValueError("data has to be a MuData object or a path to MuData-like")
|
@@ -1712,8 +1726,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1712
1726
|
:class:`~lamindb.Feature`
|
1713
1727
|
Track features.
|
1714
1728
|
|
1715
|
-
|
1716
|
-
|
1729
|
+
Example::
|
1730
|
+
|
1731
|
+
import lamindb as ln
|
1732
|
+
|
1733
|
+
artifact = ln.Artifact.from_spatialdata(sdata, key="my_dataset.zarr").save()
|
1717
1734
|
"""
|
1718
1735
|
if not data_is_spatialdata(sdata):
|
1719
1736
|
raise ValueError(
|
@@ -1754,9 +1771,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1754
1771
|
revises: An old version of the artifact.
|
1755
1772
|
run: The run that creates the artifact.
|
1756
1773
|
|
1757
|
-
|
1758
|
-
|
1759
|
-
|
1774
|
+
Example::
|
1775
|
+
|
1776
|
+
import lamindb as ln
|
1777
|
+
|
1778
|
+
artifact = ln.Artifact.from_tiledbsoma("s3://mybucket/store.tiledbsoma", description="a tiledbsoma store").save()
|
1760
1779
|
"""
|
1761
1780
|
if UPath(path).suffix != ".tiledbsoma":
|
1762
1781
|
raise ValueError(
|
@@ -1798,10 +1817,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1798
1817
|
of a registered storage location, the inferred key defaults to `path.name`.
|
1799
1818
|
run: A `Run` object.
|
1800
1819
|
|
1801
|
-
|
1802
|
-
|
1803
|
-
|
1804
|
-
|
1820
|
+
Example::
|
1821
|
+
|
1822
|
+
import lamindb as ln
|
1823
|
+
|
1824
|
+
dir_path = ln.core.datasets.generate_cell_ranger_files("sample_001", ln.settings.storage)
|
1825
|
+
artifacts = ln.Artifact.from_dir(dir_path)
|
1826
|
+
ln.save(artifacts)
|
1805
1827
|
"""
|
1806
1828
|
from lamindb import settings
|
1807
1829
|
|
@@ -2006,19 +2028,24 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2006
2028
|
Args:
|
2007
2029
|
mode: can only be `"w"` (write mode) for `tiledbsoma` stores,
|
2008
2030
|
otherwise should be always `"r"` (read-only mode).
|
2031
|
+
is_run_input: Whether to track this artifact as run input.
|
2032
|
+
**kwargs: Keyword arguments for the accessor, i.e. `h5py` or `zarr` connection,
|
2033
|
+
`pyarrow.dataset.dataset`.
|
2009
2034
|
|
2010
2035
|
Notes:
|
2011
2036
|
For more info, see tutorial: :doc:`/arrays`.
|
2012
2037
|
|
2013
|
-
|
2038
|
+
Example::
|
2039
|
+
|
2040
|
+
import lamindb as ln
|
2014
2041
|
|
2015
|
-
Read AnnData in backed mode from cloud
|
2042
|
+
# Read AnnData in backed mode from cloud
|
2016
2043
|
|
2017
|
-
|
2018
|
-
|
2019
|
-
AnnDataAccessor object with n_obs × n_vars = 70 × 765
|
2020
|
-
|
2021
|
-
|
2044
|
+
artifact = ln.Artifact.get(key="lndb-storage/pbmc68k.h5ad")
|
2045
|
+
artifact.open()
|
2046
|
+
#> AnnDataAccessor object with n_obs × n_vars = 70 × 765
|
2047
|
+
#> constructed for the AnnData object pbmc68k.h5ad
|
2048
|
+
#> ...
|
2022
2049
|
"""
|
2023
2050
|
if self._overwrite_versions and not self.is_latest:
|
2024
2051
|
raise ValueError(INCONSISTENT_STATE_MSG)
|
@@ -2124,6 +2151,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2124
2151
|
|
2125
2152
|
See all :mod:`~lamindb.core.loaders`.
|
2126
2153
|
|
2154
|
+
Args:
|
2155
|
+
is_run_input: Whether to track this artifact as run input.
|
2156
|
+
**kwargs: Keyword arguments for the loader.
|
2157
|
+
|
2127
2158
|
Examples:
|
2128
2159
|
|
2129
2160
|
Load a `DataFrame`-like artifact:
|
@@ -2185,19 +2216,22 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2185
2216
|
_track_run_input(self, is_run_input)
|
2186
2217
|
return access_memory
|
2187
2218
|
|
2188
|
-
def cache(self, is_run_input: bool | None = None) -> Path:
|
2219
|
+
def cache(self, is_run_input: bool | None = None, **kwargs) -> Path:
|
2189
2220
|
"""Download cloud artifact to local cache.
|
2190
2221
|
|
2191
2222
|
Follows synching logic: only caches an artifact if it's outdated in the local cache.
|
2192
2223
|
|
2193
2224
|
Returns a path to a locally cached on-disk object (say a `.jpg` file).
|
2194
2225
|
|
2195
|
-
|
2226
|
+
Args:
|
2227
|
+
is_run_input: Whether to track this artifact as run input.
|
2228
|
+
**kwargs: Keyword arguments for synchronization.
|
2196
2229
|
|
2197
|
-
|
2230
|
+
Example::
|
2198
2231
|
|
2199
|
-
|
2200
|
-
|
2232
|
+
# Sync file from cloud and return the local path of the cache
|
2233
|
+
artifact.cache()
|
2234
|
+
#> PosixPath('/home/runner/work/Caches/lamindb/lamindb-ci/lndb-storage/pbmc68k.h5ad')
|
2201
2235
|
"""
|
2202
2236
|
from lamindb import settings
|
2203
2237
|
|
@@ -2207,7 +2241,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2207
2241
|
filepath, cache_key = filepath_cache_key_from_artifact(
|
2208
2242
|
self, using_key=settings._using_key
|
2209
2243
|
)
|
2210
|
-
cache_path = _synchronize_cleanup_on_error(
|
2244
|
+
cache_path = _synchronize_cleanup_on_error(
|
2245
|
+
filepath, cache_key=cache_key, **kwargs
|
2246
|
+
)
|
2211
2247
|
# only call if sync is successfull
|
2212
2248
|
_track_run_input(self, is_run_input)
|
2213
2249
|
return cache_path
|
@@ -2232,18 +2268,19 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2232
2268
|
permanent: Permanently delete the artifact (skip trash).
|
2233
2269
|
storage: Indicate whether you want to delete the artifact in storage.
|
2234
2270
|
|
2235
|
-
|
2271
|
+
Example::
|
2236
2272
|
|
2237
|
-
|
2273
|
+
import lamindb as ln
|
2238
2274
|
|
2239
|
-
|
2240
|
-
|
2275
|
+
# For an `Artifact` object `artifact`, call:
|
2276
|
+
artifact = ln.Artifact.get(key="some.csv")
|
2277
|
+
artifact.delete() # delete a single file artifact
|
2241
2278
|
|
2242
|
-
|
2243
|
-
|
2279
|
+
artifact = ln.Artifact.filter(key="some.tiledbsoma". is_latest=False).first()
|
2280
|
+
artiact.delete() # delete an old version, the data will not be deleted
|
2244
2281
|
|
2245
|
-
|
2246
|
-
|
2282
|
+
artifact = ln.Artifact.get(key="some.tiledbsoma". is_latest=True)
|
2283
|
+
artiact.delete() # delete all versions, the data will be deleted or prompted for deletion.
|
2247
2284
|
"""
|
2248
2285
|
# this first check means an invalid delete fails fast rather than cascading through
|
2249
2286
|
# database and storage permission errors
|
@@ -2337,9 +2374,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2337
2374
|
Args:
|
2338
2375
|
upload: Trigger upload to cloud storage in instances with hybrid storage mode.
|
2339
2376
|
|
2340
|
-
|
2341
|
-
|
2342
|
-
|
2377
|
+
Example::
|
2378
|
+
|
2379
|
+
import lamindb as ln
|
2380
|
+
|
2381
|
+
artifact = ln.Artifact("./myfile.csv", key="myfile.parquet").save()
|
2343
2382
|
"""
|
2344
2383
|
state_was_adding = self._state.adding
|
2345
2384
|
print_progress = kwargs.pop("print_progress", True)
|
@@ -2408,8 +2447,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2408
2447
|
def restore(self) -> None:
|
2409
2448
|
"""Restore from trash.
|
2410
2449
|
|
2411
|
-
|
2412
|
-
|
2450
|
+
Example::
|
2451
|
+
|
2452
|
+
artifact.restore()
|
2413
2453
|
"""
|
2414
2454
|
self._branch_code = 1
|
2415
2455
|
self.save()
|
@@ -2417,8 +2457,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2417
2457
|
def describe(self) -> None:
|
2418
2458
|
"""Describe relations of record.
|
2419
2459
|
|
2420
|
-
|
2421
|
-
|
2460
|
+
Example::
|
2461
|
+
|
2462
|
+
artifact.describe()
|
2422
2463
|
"""
|
2423
2464
|
return describe_artifact_collection(self)
|
2424
2465
|
|
@@ -2428,11 +2469,12 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2428
2469
|
|
2429
2470
|
# can't really just call .cache in .load because of double tracking
|
2430
2471
|
def _synchronize_cleanup_on_error(
|
2431
|
-
filepath: UPath, cache_key: str | None = None
|
2472
|
+
filepath: UPath, cache_key: str | None = None, **kwargs
|
2432
2473
|
) -> UPath:
|
2433
2474
|
try:
|
2475
|
+
print_progress = kwargs.pop("print_progress", True)
|
2434
2476
|
cache_path = setup_settings.paths.cloud_to_local(
|
2435
|
-
filepath, cache_key=cache_key, print_progress=
|
2477
|
+
filepath, cache_key=cache_key, print_progress=print_progress, **kwargs
|
2436
2478
|
)
|
2437
2479
|
except Exception as e:
|
2438
2480
|
if not isinstance(filepath, LocalPathClasses):
|
@@ -2478,8 +2520,9 @@ class ArtifactParamValue(BasicRecord, LinkORM, TracksRun):
|
|
2478
2520
|
|
2479
2521
|
|
2480
2522
|
def _track_run_input(
|
2481
|
-
data:
|
2482
|
-
|
2523
|
+
data: (
|
2524
|
+
Artifact | Iterable[Artifact]
|
2525
|
+
), # can also be Collection | Iterable[Collection]
|
2483
2526
|
is_run_input: bool | Run | None = None,
|
2484
2527
|
run: Run | None = None,
|
2485
2528
|
):
|
@@ -2543,10 +2586,10 @@ def _track_run_input(
|
|
2543
2586
|
if run is None:
|
2544
2587
|
if settings.track_run_inputs:
|
2545
2588
|
# here we check that this is not a read-only connection
|
2546
|
-
# normally for our connection strings the read-only role name has
|
2589
|
+
# normally for our connection strings the read-only role name has "read" in it
|
2547
2590
|
# not absolutely safe but the worst case is that the warning is not shown
|
2548
2591
|
instance = setup_settings.instance
|
2549
|
-
if instance.dialect != "postgresql" or "
|
2592
|
+
if instance.dialect != "postgresql" or "read" not in instance.db:
|
2550
2593
|
logger.warning(WARNING_NO_INPUT)
|
2551
2594
|
# assume we have a run record
|
2552
2595
|
else:
|