lamindb 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -127,7 +127,7 @@ if TYPE_CHECKING:
127
127
  INCONSISTENT_STATE_MSG = (
128
128
  "Trying to read a folder artifact from an outdated version, "
129
129
  "this can result in an incosistent state.\n"
130
- "Read from the latest version: artifact.versions.filter(is_latest=True).one()"
130
+ "Read from the latest version: artifact.versions.get(is_latest=True)"
131
131
  )
132
132
 
133
133
 
@@ -602,10 +602,10 @@ def get_run(run: Run | None) -> Run | None:
602
602
  run = context.run
603
603
  if run is None and not settings.creation.artifact_silence_missing_run_warning:
604
604
  # here we check that this is not a read-only connection
605
- # normally for our connection strings the read-only role name has _read in it
605
+ # normally for our connection strings the read-only role name has "read" in it
606
606
  # not absolutely safe but the worst case is that the warning is not shown
607
607
  instance = setup_settings.instance
608
- if instance.dialect != "postgresql" or "_read" not in instance.db:
608
+ if instance.dialect != "postgresql" or "read" not in instance.db:
609
609
  logger.warning(WARNING_RUN_TRANSFORM)
610
610
  # suppress run by passing False
611
611
  elif not run:
@@ -1468,39 +1468,23 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1468
1468
  def n_objects(self) -> int:
1469
1469
  return self.n_files
1470
1470
 
1471
- # add the below because this is what people will have in their code
1472
- # if they implement the recommended migration strategy
1473
- # - FeatureSet -> Schema
1474
- # - featureset -> schema
1475
- # - feature_set -> schema
1476
- # @property
1477
- # def schemas(self) -> QuerySet[Schema]:
1478
- # """Schemas linked to artifact via many-to-many relationship.
1479
-
1480
- # Is now mediating the private `.feature_sets` relationship during
1481
- # a transition period to better schema management.
1482
-
1483
- # .. versionchanged: 1.0
1484
- # Was previously called `.feature_sets`.
1485
-
1486
- # """
1487
- # return self.feature_sets
1488
-
1489
1471
  @property
1490
1472
  def path(self) -> Path:
1491
1473
  """Path.
1492
1474
 
1493
- File in cloud storage, here AWS S3:
1475
+ Example::
1494
1476
 
1495
- >>> artifact = ln.Artifact("s3://my-bucket/my-file.csv").save()
1496
- >>> artifact.path
1497
- S3QueryPath('s3://my-bucket/my-file.csv')
1477
+ import lamindb as ln
1498
1478
 
1499
- File in local storage:
1479
+ # File in cloud storage, here AWS S3:
1480
+ artifact = ln.Artifact("s3://my-bucket/my-file.csv").save()
1481
+ artifact.path
1482
+ #S3QueryPath('s3://my-bucket/my-file.csv')
1500
1483
 
1501
- >>> ln.Artifact("./myfile.csv", key="myfile.csv").save()
1502
- >>> artifact.path
1503
- PosixPath('/home/runner/work/lamindb/lamindb/docs/guide/mydata/myfile.csv')
1484
+ # File in local storage:
1485
+ ln.Artifact("./myfile.csv", key="myfile.csv").save()
1486
+ artifact.path
1487
+ #> PosixPath('/home/runner/work/lamindb/lamindb/docs/guide/mydata/myfile.csv')
1504
1488
  """
1505
1489
  from lamindb import settings
1506
1490
 
@@ -1520,6 +1504,34 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1520
1504
  filepath, cache_key=cache_key
1521
1505
  )
1522
1506
 
1507
+ @classmethod
1508
+ def get(
1509
+ cls,
1510
+ idlike: int | str | None = None,
1511
+ **expressions,
1512
+ ) -> Artifact:
1513
+ """Get a single artifact.
1514
+
1515
+ Args:
1516
+ idlike: Either a uid stub, uid or an integer id.
1517
+ expressions: Fields and values passed as Django query expressions.
1518
+
1519
+ Raises:
1520
+ :exc:`docs:lamindb.errors.DoesNotExist`: In case no matching record is found.
1521
+
1522
+ See Also:
1523
+ - Guide: :doc:`docs:registries`
1524
+ - Method in `Record` base class: :meth:`~lamindb.models.Record.get`
1525
+
1526
+ Examples::
1527
+
1528
+ artifact = ln.Artifact.get("tCUkRcaEjTjhtozp0000")
1529
+ artifact = ln.Arfifact.get(key="my_datasets/my_file.parquet")
1530
+ """
1531
+ from .query_set import QuerySet
1532
+
1533
+ return QuerySet(model=cls).get(idlike, **expressions)
1534
+
1523
1535
  @classmethod
1524
1536
  def from_df(
1525
1537
  cls,
@@ -1547,17 +1559,19 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1547
1559
  :class:`~lamindb.Feature`
1548
1560
  Track features.
1549
1561
 
1550
- Examples:
1551
- >>> df = ln.core.datasets.df_iris_in_meter_batch1()
1552
- >>> df.head()
1553
- sepal_length sepal_width petal_length petal_width iris_organism_code
1554
- 0 0.051 0.035 0.014 0.002 0
1555
- 1 0.049 0.030 0.014 0.002 0
1556
- 2 0.047 0.032 0.013 0.002 0
1557
- 3 0.046 0.031 0.015 0.002 0
1558
- 4 0.050 0.036 0.014 0.002 0
1559
- >>> artifact = ln.Artifact.from_df(df, description="Iris flower collection batch1")
1560
- >>> artifact.save()
1562
+ Example::
1563
+
1564
+ import lamindb as ln
1565
+
1566
+ df = ln.core.datasets.df_iris_in_meter_batch1()
1567
+ df.head()
1568
+ #> sepal_length sepal_width petal_length petal_width iris_organism_code
1569
+ #> 0 0.051 0.035 0.014 0.002 0
1570
+ #> 1 0.049 0.030 0.014 0.002 0
1571
+ #> 2 0.047 0.032 0.013 0.002 0
1572
+ #> 3 0.046 0.031 0.015 0.002 0
1573
+ #> 4 0.050 0.036 0.014 0.002 0
1574
+ artifact = ln.Artifact.from_df(df, key="iris/result_batch1.parquet").save()
1561
1575
  """
1562
1576
  artifact = Artifact( # type: ignore
1563
1577
  data=df,
@@ -1600,12 +1614,12 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1600
1614
  :class:`~lamindb.Feature`
1601
1615
  Track features.
1602
1616
 
1603
- Examples:
1604
- >>> import bionty as bt
1605
- >>> bt.settings.organism = "human"
1606
- >>> adata = ln.core.datasets.anndata_with_obs()
1607
- >>> artifact = ln.Artifact.from_anndata(adata, description="mini anndata with obs")
1608
- >>> artifact.save()
1617
+ Example::
1618
+
1619
+ import lamindb as ln
1620
+
1621
+ adata = ln.core.datasets.anndata_with_obs()
1622
+ artifact = ln.Artifact.from_anndata(adata, key="mini_anndata_with_obs.h5ad").save()
1609
1623
  """
1610
1624
  if not data_is_anndata(adata):
1611
1625
  raise ValueError(
@@ -1662,12 +1676,12 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1662
1676
  :class:`~lamindb.Feature`
1663
1677
  Track features.
1664
1678
 
1665
- Examples:
1666
- >>> import bionty as bt
1667
- >>> bt.settings.organism = "human"
1668
- >>> mdata = ln.core.datasets.mudata_papalexi21_subset()
1669
- >>> artifact = ln.Artifact.from_mudata(mdata, description="a mudata object")
1670
- >>> artifact.save()
1679
+ Example::
1680
+
1681
+ import lamindb as ln
1682
+
1683
+ mdata = ln.core.datasets.mudata_papalexi21_subset()
1684
+ artifact = ln.Artifact.from_mudata(mdata, key="mudata_papalexi21_subset.h5mu").save()
1671
1685
  """
1672
1686
  if not data_is_mudata(mdata):
1673
1687
  raise ValueError("data has to be a MuData object or a path to MuData-like")
@@ -1712,8 +1726,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1712
1726
  :class:`~lamindb.Feature`
1713
1727
  Track features.
1714
1728
 
1715
- Examples:
1716
- >>> artifact = ln.Artifact.from_spatialdata(sdata, key="my_dataset.zarr")
1729
+ Example::
1730
+
1731
+ import lamindb as ln
1732
+
1733
+ artifact = ln.Artifact.from_spatialdata(sdata, key="my_dataset.zarr").save()
1717
1734
  """
1718
1735
  if not data_is_spatialdata(sdata):
1719
1736
  raise ValueError(
@@ -1754,9 +1771,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1754
1771
  revises: An old version of the artifact.
1755
1772
  run: The run that creates the artifact.
1756
1773
 
1757
- Examples:
1758
- >>> artifact = ln.Artifact.from_tiledbsoma("s3://mybucket/store.tiledbsoma", description="a tiledbsoma store")
1759
- >>> artifact.save()
1774
+ Example::
1775
+
1776
+ import lamindb as ln
1777
+
1778
+ artifact = ln.Artifact.from_tiledbsoma("s3://mybucket/store.tiledbsoma", description="a tiledbsoma store").save()
1760
1779
  """
1761
1780
  if UPath(path).suffix != ".tiledbsoma":
1762
1781
  raise ValueError(
@@ -1798,10 +1817,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1798
1817
  of a registered storage location, the inferred key defaults to `path.name`.
1799
1818
  run: A `Run` object.
1800
1819
 
1801
- Examples:
1802
- >>> dir_path = ln.core.datasets.generate_cell_ranger_files("sample_001", ln.settings.storage)
1803
- >>> artifacts = ln.Artifact.from_dir(dir_path)
1804
- >>> ln.save(artifacts)
1820
+ Example::
1821
+
1822
+ import lamindb as ln
1823
+
1824
+ dir_path = ln.core.datasets.generate_cell_ranger_files("sample_001", ln.settings.storage)
1825
+ artifacts = ln.Artifact.from_dir(dir_path)
1826
+ ln.save(artifacts)
1805
1827
  """
1806
1828
  from lamindb import settings
1807
1829
 
@@ -2006,19 +2028,24 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2006
2028
  Args:
2007
2029
  mode: can only be `"w"` (write mode) for `tiledbsoma` stores,
2008
2030
  otherwise should be always `"r"` (read-only mode).
2031
+ is_run_input: Whether to track this artifact as run input.
2032
+ **kwargs: Keyword arguments for the accessor, i.e. `h5py` or `zarr` connection,
2033
+ `pyarrow.dataset.dataset`.
2009
2034
 
2010
2035
  Notes:
2011
2036
  For more info, see tutorial: :doc:`/arrays`.
2012
2037
 
2013
- Examples:
2038
+ Example::
2039
+
2040
+ import lamindb as ln
2014
2041
 
2015
- Read AnnData in backed mode from cloud:
2042
+ # Read AnnData in backed mode from cloud
2016
2043
 
2017
- >>> artifact = ln.Artifact.get(key="lndb-storage/pbmc68k.h5ad")
2018
- >>> artifact.open()
2019
- AnnDataAccessor object with n_obs × n_vars = 70 × 765
2020
- constructed for the AnnData object pbmc68k.h5ad
2021
- ...
2044
+ artifact = ln.Artifact.get(key="lndb-storage/pbmc68k.h5ad")
2045
+ artifact.open()
2046
+ #> AnnDataAccessor object with n_obs × n_vars = 70 × 765
2047
+ #> constructed for the AnnData object pbmc68k.h5ad
2048
+ #> ...
2022
2049
  """
2023
2050
  if self._overwrite_versions and not self.is_latest:
2024
2051
  raise ValueError(INCONSISTENT_STATE_MSG)
@@ -2124,6 +2151,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2124
2151
 
2125
2152
  See all :mod:`~lamindb.core.loaders`.
2126
2153
 
2154
+ Args:
2155
+ is_run_input: Whether to track this artifact as run input.
2156
+ **kwargs: Keyword arguments for the loader.
2157
+
2127
2158
  Examples:
2128
2159
 
2129
2160
  Load a `DataFrame`-like artifact:
@@ -2185,19 +2216,22 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2185
2216
  _track_run_input(self, is_run_input)
2186
2217
  return access_memory
2187
2218
 
2188
- def cache(self, is_run_input: bool | None = None) -> Path:
2219
+ def cache(self, is_run_input: bool | None = None, **kwargs) -> Path:
2189
2220
  """Download cloud artifact to local cache.
2190
2221
 
2191
2222
  Follows synching logic: only caches an artifact if it's outdated in the local cache.
2192
2223
 
2193
2224
  Returns a path to a locally cached on-disk object (say a `.jpg` file).
2194
2225
 
2195
- Examples:
2226
+ Args:
2227
+ is_run_input: Whether to track this artifact as run input.
2228
+ **kwargs: Keyword arguments for synchronization.
2196
2229
 
2197
- Sync file from cloud and return the local path of the cache:
2230
+ Example::
2198
2231
 
2199
- >>> artifact.cache()
2200
- PosixPath('/home/runner/work/Caches/lamindb/lamindb-ci/lndb-storage/pbmc68k.h5ad')
2232
+ # Sync file from cloud and return the local path of the cache
2233
+ artifact.cache()
2234
+ #> PosixPath('/home/runner/work/Caches/lamindb/lamindb-ci/lndb-storage/pbmc68k.h5ad')
2201
2235
  """
2202
2236
  from lamindb import settings
2203
2237
 
@@ -2207,7 +2241,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2207
2241
  filepath, cache_key = filepath_cache_key_from_artifact(
2208
2242
  self, using_key=settings._using_key
2209
2243
  )
2210
- cache_path = _synchronize_cleanup_on_error(filepath, cache_key=cache_key)
2244
+ cache_path = _synchronize_cleanup_on_error(
2245
+ filepath, cache_key=cache_key, **kwargs
2246
+ )
2211
2247
  # only call if sync is successfull
2212
2248
  _track_run_input(self, is_run_input)
2213
2249
  return cache_path
@@ -2232,18 +2268,19 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2232
2268
  permanent: Permanently delete the artifact (skip trash).
2233
2269
  storage: Indicate whether you want to delete the artifact in storage.
2234
2270
 
2235
- Examples:
2271
+ Example::
2236
2272
 
2237
- For an `Artifact` object `artifact`, call:
2273
+ import lamindb as ln
2238
2274
 
2239
- >>> artifact = ln.Artifact.filter(key="some.csv").one()
2240
- >>> artifact.delete() # delete a single file artifact
2275
+ # For an `Artifact` object `artifact`, call:
2276
+ artifact = ln.Artifact.get(key="some.csv")
2277
+ artifact.delete() # delete a single file artifact
2241
2278
 
2242
- >>> artifact = ln.Artifact.filter(key="some.tiledbsoma". is_latest=False).first()
2243
- >>> artiact.delete() # delete an old version, the data will not be deleted
2279
+ artifact = ln.Artifact.filter(key="some.tiledbsoma". is_latest=False).first()
2280
+ artiact.delete() # delete an old version, the data will not be deleted
2244
2281
 
2245
- >>> artifact = ln.Artifact.filter(key="some.tiledbsoma". is_latest=True).one()
2246
- >>> artiact.delete() # delete all versions, the data will be deleted or prompted for deletion.
2282
+ artifact = ln.Artifact.get(key="some.tiledbsoma". is_latest=True)
2283
+ artiact.delete() # delete all versions, the data will be deleted or prompted for deletion.
2247
2284
  """
2248
2285
  # this first check means an invalid delete fails fast rather than cascading through
2249
2286
  # database and storage permission errors
@@ -2337,9 +2374,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2337
2374
  Args:
2338
2375
  upload: Trigger upload to cloud storage in instances with hybrid storage mode.
2339
2376
 
2340
- Examples:
2341
- >>> artifact = ln.Artifact("./myfile.csv", description="myfile")
2342
- >>> artifact.save()
2377
+ Example::
2378
+
2379
+ import lamindb as ln
2380
+
2381
+ artifact = ln.Artifact("./myfile.csv", key="myfile.parquet").save()
2343
2382
  """
2344
2383
  state_was_adding = self._state.adding
2345
2384
  print_progress = kwargs.pop("print_progress", True)
@@ -2408,8 +2447,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2408
2447
  def restore(self) -> None:
2409
2448
  """Restore from trash.
2410
2449
 
2411
- Examples:
2412
- >>> artifact.restore()
2450
+ Example::
2451
+
2452
+ artifact.restore()
2413
2453
  """
2414
2454
  self._branch_code = 1
2415
2455
  self.save()
@@ -2417,8 +2457,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2417
2457
  def describe(self) -> None:
2418
2458
  """Describe relations of record.
2419
2459
 
2420
- Examples:
2421
- >>> artifact.describe()
2460
+ Example::
2461
+
2462
+ artifact.describe()
2422
2463
  """
2423
2464
  return describe_artifact_collection(self)
2424
2465
 
@@ -2428,11 +2469,12 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2428
2469
 
2429
2470
  # can't really just call .cache in .load because of double tracking
2430
2471
  def _synchronize_cleanup_on_error(
2431
- filepath: UPath, cache_key: str | None = None
2472
+ filepath: UPath, cache_key: str | None = None, **kwargs
2432
2473
  ) -> UPath:
2433
2474
  try:
2475
+ print_progress = kwargs.pop("print_progress", True)
2434
2476
  cache_path = setup_settings.paths.cloud_to_local(
2435
- filepath, cache_key=cache_key, print_progress=True
2477
+ filepath, cache_key=cache_key, print_progress=print_progress, **kwargs
2436
2478
  )
2437
2479
  except Exception as e:
2438
2480
  if not isinstance(filepath, LocalPathClasses):
@@ -2478,8 +2520,9 @@ class ArtifactParamValue(BasicRecord, LinkORM, TracksRun):
2478
2520
 
2479
2521
 
2480
2522
  def _track_run_input(
2481
- data: Artifact
2482
- | Iterable[Artifact], # can also be Collection | Iterable[Collection]
2523
+ data: (
2524
+ Artifact | Iterable[Artifact]
2525
+ ), # can also be Collection | Iterable[Collection]
2483
2526
  is_run_input: bool | Run | None = None,
2484
2527
  run: Run | None = None,
2485
2528
  ):
@@ -2543,10 +2586,10 @@ def _track_run_input(
2543
2586
  if run is None:
2544
2587
  if settings.track_run_inputs:
2545
2588
  # here we check that this is not a read-only connection
2546
- # normally for our connection strings the read-only role name has _read in it
2589
+ # normally for our connection strings the read-only role name has "read" in it
2547
2590
  # not absolutely safe but the worst case is that the warning is not shown
2548
2591
  instance = setup_settings.instance
2549
- if instance.dialect != "postgresql" or "_read" not in instance.db:
2592
+ if instance.dialect != "postgresql" or "read" not in instance.db:
2550
2593
  logger.warning(WARNING_NO_INPUT)
2551
2594
  # assume we have a run record
2552
2595
  else: