lamindb 1.2a2__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,6 +37,7 @@ from lamindb.errors import FieldValidationError
37
37
  from lamindb.models.query_set import QuerySet
38
38
 
39
39
  from ..base.users import current_user_id
40
+ from ..core._compat import is_package_installed
40
41
  from ..core.loaders import load_to_memory
41
42
  from ..core.storage import (
42
43
  LocalPathClasses,
@@ -48,7 +49,6 @@ from ..core.storage import (
48
49
  from ..core.storage._anndata_accessor import _anndata_n_observations
49
50
  from ..core.storage._pyarrow_dataset import PYARROW_SUFFIXES
50
51
  from ..core.storage._tiledbsoma import _soma_n_observations
51
- from ..core.storage.objects import is_package_installed
52
52
  from ..core.storage.paths import (
53
53
  AUTO_KEY_PREFIX,
54
54
  auto_storage_key_from_artifact,
@@ -113,6 +113,7 @@ if TYPE_CHECKING:
113
113
 
114
114
  from lamindb.base.types import StrField
115
115
  from lamindb.core.storage._backed_access import AnnDataAccessor, BackedAccessor
116
+ from lamindb.core.types import ScverseDataStructures
116
117
 
117
118
  from ..base.types import (
118
119
  ArtifactKind,
@@ -126,7 +127,7 @@ if TYPE_CHECKING:
126
127
  INCONSISTENT_STATE_MSG = (
127
128
  "Trying to read a folder artifact from an outdated version, "
128
129
  "this can result in an incosistent state.\n"
129
- "Read from the latest version: artifact.versions.filter(is_latest=True).one()"
130
+ "Read from the latest version: artifact.versions.get(is_latest=True)"
130
131
  )
131
132
 
132
133
 
@@ -364,7 +365,7 @@ def get_relative_path_to_directory(
364
365
 
365
366
  def get_artifact_kwargs_from_data(
366
367
  *,
367
- data: Path | UPath | str | pd.DataFrame | AnnData | MuData,
368
+ data: Path | UPath | str | pd.DataFrame | ScverseDataStructures,
368
369
  key: str | None,
369
370
  run: Run | None,
370
371
  format: str | None,
@@ -554,7 +555,7 @@ def data_is_spatialdata(data: SpatialData | UPathStr) -> bool:
554
555
 
555
556
 
556
557
  def _check_otype_artifact(
557
- data: UPathStr | pd.DataFrame | AnnData | MuData | SpatialData,
558
+ data: UPathStr | pd.DataFrame | ScverseDataStructures,
558
559
  otype: str | None = None,
559
560
  ) -> str:
560
561
  if otype is None:
@@ -601,10 +602,10 @@ def get_run(run: Run | None) -> Run | None:
601
602
  run = context.run
602
603
  if run is None and not settings.creation.artifact_silence_missing_run_warning:
603
604
  # here we check that this is not a read-only connection
604
- # normally for our connection strings the read-only role name has _read in it
605
+ # normally for our connection strings the read-only role name has "read" in it
605
606
  # not absolutely safe but the worst case is that the warning is not shown
606
607
  instance = setup_settings.instance
607
- if instance.dialect != "postgresql" or "_read" not in instance.db:
608
+ if instance.dialect != "postgresql" or "read" not in instance.db:
608
609
  logger.warning(WARNING_RUN_TRANSFORM)
609
610
  # suppress run by passing False
610
611
  elif not run:
@@ -1467,39 +1468,23 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1467
1468
  def n_objects(self) -> int:
1468
1469
  return self.n_files
1469
1470
 
1470
- # add the below because this is what people will have in their code
1471
- # if they implement the recommended migration strategy
1472
- # - FeatureSet -> Schema
1473
- # - featureset -> schema
1474
- # - feature_set -> schema
1475
- # @property
1476
- # def schemas(self) -> QuerySet[Schema]:
1477
- # """Schemas linked to artifact via many-to-many relationship.
1478
-
1479
- # Is now mediating the private `.feature_sets` relationship during
1480
- # a transition period to better schema management.
1481
-
1482
- # .. versionchanged: 1.0
1483
- # Was previously called `.feature_sets`.
1484
-
1485
- # """
1486
- # return self.feature_sets
1487
-
1488
1471
  @property
1489
1472
  def path(self) -> Path:
1490
1473
  """Path.
1491
1474
 
1492
- File in cloud storage, here AWS S3:
1475
+ Example::
1493
1476
 
1494
- >>> artifact = ln.Artifact("s3://my-bucket/my-file.csv").save()
1495
- >>> artifact.path
1496
- S3QueryPath('s3://my-bucket/my-file.csv')
1477
+ import lamindb as ln
1497
1478
 
1498
- File in local storage:
1479
+ # File in cloud storage, here AWS S3:
1480
+ artifact = ln.Artifact("s3://my-bucket/my-file.csv").save()
1481
+ artifact.path
1482
+ #S3QueryPath('s3://my-bucket/my-file.csv')
1499
1483
 
1500
- >>> ln.Artifact("./myfile.csv", key="myfile.csv").save()
1501
- >>> artifact.path
1502
- PosixPath('/home/runner/work/lamindb/lamindb/docs/guide/mydata/myfile.csv')
1484
+ # File in local storage:
1485
+ ln.Artifact("./myfile.csv", key="myfile.csv").save()
1486
+ artifact.path
1487
+ #> PosixPath('/home/runner/work/lamindb/lamindb/docs/guide/mydata/myfile.csv')
1503
1488
  """
1504
1489
  from lamindb import settings
1505
1490
 
@@ -1519,6 +1504,34 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1519
1504
  filepath, cache_key=cache_key
1520
1505
  )
1521
1506
 
1507
+ @classmethod
1508
+ def get(
1509
+ cls,
1510
+ idlike: int | str | None = None,
1511
+ **expressions,
1512
+ ) -> Artifact:
1513
+ """Get a single artifact.
1514
+
1515
+ Args:
1516
+ idlike: Either a uid stub, uid or an integer id.
1517
+ expressions: Fields and values passed as Django query expressions.
1518
+
1519
+ Raises:
1520
+ :exc:`docs:lamindb.errors.DoesNotExist`: In case no matching record is found.
1521
+
1522
+ See Also:
1523
+ - Guide: :doc:`docs:registries`
1524
+ - Method in `Record` base class: :meth:`~lamindb.models.Record.get`
1525
+
1526
+ Examples::
1527
+
1528
+ artifact = ln.Artifact.get("tCUkRcaEjTjhtozp0000")
1529
+ artifact = ln.Arfifact.get(key="my_datasets/my_file.parquet")
1530
+ """
1531
+ from .query_set import QuerySet
1532
+
1533
+ return QuerySet(model=cls).get(idlike, **expressions)
1534
+
1522
1535
  @classmethod
1523
1536
  def from_df(
1524
1537
  cls,
@@ -1546,17 +1559,19 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1546
1559
  :class:`~lamindb.Feature`
1547
1560
  Track features.
1548
1561
 
1549
- Examples:
1550
- >>> df = ln.core.datasets.df_iris_in_meter_batch1()
1551
- >>> df.head()
1552
- sepal_length sepal_width petal_length petal_width iris_organism_code
1553
- 0 0.051 0.035 0.014 0.002 0
1554
- 1 0.049 0.030 0.014 0.002 0
1555
- 2 0.047 0.032 0.013 0.002 0
1556
- 3 0.046 0.031 0.015 0.002 0
1557
- 4 0.050 0.036 0.014 0.002 0
1558
- >>> artifact = ln.Artifact.from_df(df, description="Iris flower collection batch1")
1559
- >>> artifact.save()
1562
+ Example::
1563
+
1564
+ import lamindb as ln
1565
+
1566
+ df = ln.core.datasets.df_iris_in_meter_batch1()
1567
+ df.head()
1568
+ #> sepal_length sepal_width petal_length petal_width iris_organism_code
1569
+ #> 0 0.051 0.035 0.014 0.002 0
1570
+ #> 1 0.049 0.030 0.014 0.002 0
1571
+ #> 2 0.047 0.032 0.013 0.002 0
1572
+ #> 3 0.046 0.031 0.015 0.002 0
1573
+ #> 4 0.050 0.036 0.014 0.002 0
1574
+ artifact = ln.Artifact.from_df(df, key="iris/result_batch1.parquet").save()
1560
1575
  """
1561
1576
  artifact = Artifact( # type: ignore
1562
1577
  data=df,
@@ -1599,12 +1614,12 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1599
1614
  :class:`~lamindb.Feature`
1600
1615
  Track features.
1601
1616
 
1602
- Examples:
1603
- >>> import bionty as bt
1604
- >>> bt.settings.organism = "human"
1605
- >>> adata = ln.core.datasets.anndata_with_obs()
1606
- >>> artifact = ln.Artifact.from_anndata(adata, description="mini anndata with obs")
1607
- >>> artifact.save()
1617
+ Example::
1618
+
1619
+ import lamindb as ln
1620
+
1621
+ adata = ln.core.datasets.anndata_with_obs()
1622
+ artifact = ln.Artifact.from_anndata(adata, key="mini_anndata_with_obs.h5ad").save()
1608
1623
  """
1609
1624
  if not data_is_anndata(adata):
1610
1625
  raise ValueError(
@@ -1661,12 +1676,12 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1661
1676
  :class:`~lamindb.Feature`
1662
1677
  Track features.
1663
1678
 
1664
- Examples:
1665
- >>> import bionty as bt
1666
- >>> bt.settings.organism = "human"
1667
- >>> mdata = ln.core.datasets.mudata_papalexi21_subset()
1668
- >>> artifact = ln.Artifact.from_mudata(mdata, description="a mudata object")
1669
- >>> artifact.save()
1679
+ Example::
1680
+
1681
+ import lamindb as ln
1682
+
1683
+ mdata = ln.core.datasets.mudata_papalexi21_subset()
1684
+ artifact = ln.Artifact.from_mudata(mdata, key="mudata_papalexi21_subset.h5mu").save()
1670
1685
  """
1671
1686
  if not data_is_mudata(mdata):
1672
1687
  raise ValueError("data has to be a MuData object or a path to MuData-like")
@@ -1711,8 +1726,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1711
1726
  :class:`~lamindb.Feature`
1712
1727
  Track features.
1713
1728
 
1714
- Examples:
1715
- >>> artifact = ln.Artifact.from_spatialdata(sdata, key="my_dataset.zarr")
1729
+ Example::
1730
+
1731
+ import lamindb as ln
1732
+
1733
+ artifact = ln.Artifact.from_spatialdata(sdata, key="my_dataset.zarr").save()
1716
1734
  """
1717
1735
  if not data_is_spatialdata(sdata):
1718
1736
  raise ValueError(
@@ -1753,9 +1771,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1753
1771
  revises: An old version of the artifact.
1754
1772
  run: The run that creates the artifact.
1755
1773
 
1756
- Examples:
1757
- >>> artifact = ln.Artifact.from_tiledbsoma("s3://mybucket/store.tiledbsoma", description="a tiledbsoma store")
1758
- >>> artifact.save()
1774
+ Example::
1775
+
1776
+ import lamindb as ln
1777
+
1778
+ artifact = ln.Artifact.from_tiledbsoma("s3://mybucket/store.tiledbsoma", description="a tiledbsoma store").save()
1759
1779
  """
1760
1780
  if UPath(path).suffix != ".tiledbsoma":
1761
1781
  raise ValueError(
@@ -1797,10 +1817,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1797
1817
  of a registered storage location, the inferred key defaults to `path.name`.
1798
1818
  run: A `Run` object.
1799
1819
 
1800
- Examples:
1801
- >>> dir_path = ln.core.datasets.generate_cell_ranger_files("sample_001", ln.settings.storage)
1802
- >>> artifacts = ln.Artifact.from_dir(dir_path)
1803
- >>> ln.save(artifacts)
1820
+ Example::
1821
+
1822
+ import lamindb as ln
1823
+
1824
+ dir_path = ln.core.datasets.generate_cell_ranger_files("sample_001", ln.settings.storage)
1825
+ artifacts = ln.Artifact.from_dir(dir_path)
1826
+ ln.save(artifacts)
1804
1827
  """
1805
1828
  from lamindb import settings
1806
1829
 
@@ -2005,19 +2028,24 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2005
2028
  Args:
2006
2029
  mode: can only be `"w"` (write mode) for `tiledbsoma` stores,
2007
2030
  otherwise should be always `"r"` (read-only mode).
2031
+ is_run_input: Whether to track this artifact as run input.
2032
+ **kwargs: Keyword arguments for the accessor, i.e. `h5py` or `zarr` connection,
2033
+ `pyarrow.dataset.dataset`.
2008
2034
 
2009
2035
  Notes:
2010
2036
  For more info, see tutorial: :doc:`/arrays`.
2011
2037
 
2012
- Examples:
2038
+ Example::
2039
+
2040
+ import lamindb as ln
2013
2041
 
2014
- Read AnnData in backed mode from cloud:
2042
+ # Read AnnData in backed mode from cloud
2015
2043
 
2016
- >>> artifact = ln.Artifact.get(key="lndb-storage/pbmc68k.h5ad")
2017
- >>> artifact.open()
2018
- AnnDataAccessor object with n_obs × n_vars = 70 × 765
2019
- constructed for the AnnData object pbmc68k.h5ad
2020
- ...
2044
+ artifact = ln.Artifact.get(key="lndb-storage/pbmc68k.h5ad")
2045
+ artifact.open()
2046
+ #> AnnDataAccessor object with n_obs × n_vars = 70 × 765
2047
+ #> constructed for the AnnData object pbmc68k.h5ad
2048
+ #> ...
2021
2049
  """
2022
2050
  if self._overwrite_versions and not self.is_latest:
2023
2051
  raise ValueError(INCONSISTENT_STATE_MSG)
@@ -2123,6 +2151,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2123
2151
 
2124
2152
  See all :mod:`~lamindb.core.loaders`.
2125
2153
 
2154
+ Args:
2155
+ is_run_input: Whether to track this artifact as run input.
2156
+ **kwargs: Keyword arguments for the loader.
2157
+
2126
2158
  Examples:
2127
2159
 
2128
2160
  Load a `DataFrame`-like artifact:
@@ -2184,19 +2216,22 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2184
2216
  _track_run_input(self, is_run_input)
2185
2217
  return access_memory
2186
2218
 
2187
- def cache(self, is_run_input: bool | None = None) -> Path:
2219
+ def cache(self, is_run_input: bool | None = None, **kwargs) -> Path:
2188
2220
  """Download cloud artifact to local cache.
2189
2221
 
2190
2222
  Follows synching logic: only caches an artifact if it's outdated in the local cache.
2191
2223
 
2192
2224
  Returns a path to a locally cached on-disk object (say a `.jpg` file).
2193
2225
 
2194
- Examples:
2226
+ Args:
2227
+ is_run_input: Whether to track this artifact as run input.
2228
+ **kwargs: Keyword arguments for synchronization.
2195
2229
 
2196
- Sync file from cloud and return the local path of the cache:
2230
+ Example::
2197
2231
 
2198
- >>> artifact.cache()
2199
- PosixPath('/home/runner/work/Caches/lamindb/lamindb-ci/lndb-storage/pbmc68k.h5ad')
2232
+ # Sync file from cloud and return the local path of the cache
2233
+ artifact.cache()
2234
+ #> PosixPath('/home/runner/work/Caches/lamindb/lamindb-ci/lndb-storage/pbmc68k.h5ad')
2200
2235
  """
2201
2236
  from lamindb import settings
2202
2237
 
@@ -2206,7 +2241,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2206
2241
  filepath, cache_key = filepath_cache_key_from_artifact(
2207
2242
  self, using_key=settings._using_key
2208
2243
  )
2209
- cache_path = _synchronize_cleanup_on_error(filepath, cache_key=cache_key)
2244
+ cache_path = _synchronize_cleanup_on_error(
2245
+ filepath, cache_key=cache_key, **kwargs
2246
+ )
2210
2247
  # only call if sync is successfull
2211
2248
  _track_run_input(self, is_run_input)
2212
2249
  return cache_path
@@ -2231,18 +2268,19 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2231
2268
  permanent: Permanently delete the artifact (skip trash).
2232
2269
  storage: Indicate whether you want to delete the artifact in storage.
2233
2270
 
2234
- Examples:
2271
+ Example::
2235
2272
 
2236
- For an `Artifact` object `artifact`, call:
2273
+ import lamindb as ln
2237
2274
 
2238
- >>> artifact = ln.Artifact.filter(key="some.csv").one()
2239
- >>> artifact.delete() # delete a single file artifact
2275
+ # For an `Artifact` object `artifact`, call:
2276
+ artifact = ln.Artifact.get(key="some.csv")
2277
+ artifact.delete() # delete a single file artifact
2240
2278
 
2241
- >>> artifact = ln.Artifact.filter(key="some.tiledbsoma". is_latest=False).first()
2242
- >>> artiact.delete() # delete an old version, the data will not be deleted
2279
+ artifact = ln.Artifact.filter(key="some.tiledbsoma". is_latest=False).first()
2280
+ artiact.delete() # delete an old version, the data will not be deleted
2243
2281
 
2244
- >>> artifact = ln.Artifact.filter(key="some.tiledbsoma". is_latest=True).one()
2245
- >>> artiact.delete() # delete all versions, the data will be deleted or prompted for deletion.
2282
+ artifact = ln.Artifact.get(key="some.tiledbsoma". is_latest=True)
2283
+ artiact.delete() # delete all versions, the data will be deleted or prompted for deletion.
2246
2284
  """
2247
2285
  # this first check means an invalid delete fails fast rather than cascading through
2248
2286
  # database and storage permission errors
@@ -2336,9 +2374,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2336
2374
  Args:
2337
2375
  upload: Trigger upload to cloud storage in instances with hybrid storage mode.
2338
2376
 
2339
- Examples:
2340
- >>> artifact = ln.Artifact("./myfile.csv", description="myfile")
2341
- >>> artifact.save()
2377
+ Example::
2378
+
2379
+ import lamindb as ln
2380
+
2381
+ artifact = ln.Artifact("./myfile.csv", key="myfile.parquet").save()
2342
2382
  """
2343
2383
  state_was_adding = self._state.adding
2344
2384
  print_progress = kwargs.pop("print_progress", True)
@@ -2407,8 +2447,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2407
2447
  def restore(self) -> None:
2408
2448
  """Restore from trash.
2409
2449
 
2410
- Examples:
2411
- >>> artifact.restore()
2450
+ Example::
2451
+
2452
+ artifact.restore()
2412
2453
  """
2413
2454
  self._branch_code = 1
2414
2455
  self.save()
@@ -2416,8 +2457,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2416
2457
  def describe(self) -> None:
2417
2458
  """Describe relations of record.
2418
2459
 
2419
- Examples:
2420
- >>> artifact.describe()
2460
+ Example::
2461
+
2462
+ artifact.describe()
2421
2463
  """
2422
2464
  return describe_artifact_collection(self)
2423
2465
 
@@ -2427,11 +2469,12 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2427
2469
 
2428
2470
  # can't really just call .cache in .load because of double tracking
2429
2471
  def _synchronize_cleanup_on_error(
2430
- filepath: UPath, cache_key: str | None = None
2472
+ filepath: UPath, cache_key: str | None = None, **kwargs
2431
2473
  ) -> UPath:
2432
2474
  try:
2475
+ print_progress = kwargs.pop("print_progress", True)
2433
2476
  cache_path = setup_settings.paths.cloud_to_local(
2434
- filepath, cache_key=cache_key, print_progress=True
2477
+ filepath, cache_key=cache_key, print_progress=print_progress, **kwargs
2435
2478
  )
2436
2479
  except Exception as e:
2437
2480
  if not isinstance(filepath, LocalPathClasses):
@@ -2477,8 +2520,9 @@ class ArtifactParamValue(BasicRecord, LinkORM, TracksRun):
2477
2520
 
2478
2521
 
2479
2522
  def _track_run_input(
2480
- data: Artifact
2481
- | Iterable[Artifact], # can also be Collection | Iterable[Collection]
2523
+ data: (
2524
+ Artifact | Iterable[Artifact]
2525
+ ), # can also be Collection | Iterable[Collection]
2482
2526
  is_run_input: bool | Run | None = None,
2483
2527
  run: Run | None = None,
2484
2528
  ):
@@ -2542,10 +2586,10 @@ def _track_run_input(
2542
2586
  if run is None:
2543
2587
  if settings.track_run_inputs:
2544
2588
  # here we check that this is not a read-only connection
2545
- # normally for our connection strings the read-only role name has _read in it
2589
+ # normally for our connection strings the read-only role name has "read" in it
2546
2590
  # not absolutely safe but the worst case is that the warning is not shown
2547
2591
  instance = setup_settings.instance
2548
- if instance.dialect != "postgresql" or "_read" not in instance.db:
2592
+ if instance.dialect != "postgresql" or "read" not in instance.db:
2549
2593
  logger.warning(WARNING_NO_INPUT)
2550
2594
  # assume we have a run record
2551
2595
  else: