PyPI - lamindb - Versions diffs - 1.4.0__py3-none-any.whl → 1.5.1__py3-none-any.whl - Mend

lamindb 1.4.0py3-none-any.whl → 1.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

lamindb/__init__.py +52 -36
lamindb/_finish.py +17 -10
lamindb/_tracked.py +1 -1
lamindb/base/__init__.py +3 -1
lamindb/base/fields.py +40 -22
lamindb/base/ids.py +1 -94
lamindb/base/types.py +2 -0
lamindb/base/uids.py +117 -0
lamindb/core/_context.py +203 -102
lamindb/core/_settings.py +38 -25
lamindb/core/datasets/__init__.py +11 -4
lamindb/core/datasets/_core.py +5 -5
lamindb/core/datasets/_small.py +0 -93
lamindb/core/datasets/mini_immuno.py +172 -0
lamindb/core/loaders.py +1 -1
lamindb/core/storage/_backed_access.py +100 -6
lamindb/core/storage/_polars_lazy_df.py +51 -0
lamindb/core/storage/_pyarrow_dataset.py +15 -30
lamindb/core/storage/_tiledbsoma.py +29 -13
lamindb/core/storage/objects.py +6 -0
lamindb/core/subsettings/__init__.py +2 -0
lamindb/core/subsettings/_annotation_settings.py +11 -0
lamindb/curators/__init__.py +7 -3349
lamindb/curators/_legacy.py +2056 -0
lamindb/curators/core.py +1534 -0
lamindb/errors.py +11 -0
lamindb/examples/__init__.py +27 -0
lamindb/examples/schemas/__init__.py +12 -0
lamindb/examples/schemas/_anndata.py +25 -0
lamindb/examples/schemas/_simple.py +19 -0
lamindb/integrations/_vitessce.py +8 -5
lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
lamindb/migrations/0093_alter_schemacomponent_unique_together.py +16 -0
lamindb/models/__init__.py +4 -1
lamindb/models/_describe.py +21 -4
lamindb/models/_feature_manager.py +382 -287
lamindb/models/_label_manager.py +8 -2
lamindb/models/artifact.py +177 -106
lamindb/models/artifact_set.py +122 -0
lamindb/models/collection.py +73 -52
lamindb/models/core.py +1 -1
lamindb/models/feature.py +51 -17
lamindb/models/has_parents.py +69 -14
lamindb/models/project.py +1 -1
lamindb/models/query_manager.py +221 -22
lamindb/models/query_set.py +247 -172
lamindb/models/record.py +65 -247
lamindb/models/run.py +4 -4
lamindb/models/save.py +8 -2
lamindb/models/schema.py +456 -184
lamindb/models/transform.py +2 -2
lamindb/models/ulabel.py +8 -5
{lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/METADATA +6 -6
{lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/RECORD +57 -43
{lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/LICENSE +0 -0
{lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/WHEEL +0 -0

lamindb/__init__.py CHANGED Viewed

@@ -1,43 +1,62 @@
 """A data framework for biology.
-Tracking notebooks, scripts & functions.
+Data lineage
+============
+Track inputs, outputs & environment of a notebook or script run.
 .. autosummary::
    :toctree: .
    track
    finish
+Decorate a function with `@tracked()` to track inputs, outputs & environment of function executions.
+.. autosummary::
+   :toctree: .
    tracked
-Registries.
+Registries
+==========
+Manage artifacts and transforms.
 .. autosummary::
    :toctree: .
    Artifact
+   Storage
    Transform
    Run
+Validate and annotate artifacts.
+.. autosummary::
+   :toctree: .
    ULabel
-   User
-   Storage
    Feature
-   Schema
    Param
+   Schema
+Manage projects.
+.. autosummary::
+   :toctree: .
+   User
    Collection
    Project
    Space
    Reference
    Person
-Curators & integrations.
-.. autosummary::
-   :toctree: .
-   curators
-   integrations
+Other
+=====
-Key functionality.
+Functions and classes.
 .. autosummary::
    :toctree: .
@@ -47,32 +66,35 @@ Key functionality.
    save
    UPath
    settings
+   context
-Low-level functionality.
+Curators and integrations.
 .. autosummary::
    :toctree: .
-   context
-   errors
-   setup
-   base
-   models
-   core
+   curators
+   integrations
-Backward compatibility.
+Low-level functionality.
 .. autosummary::
    :toctree: .
-   FeatureSet
-   Curator
+   examples
+   curators
+   integrations
+   errors
+   setup
+   base
+   core
+   models
 """
 # ruff: noqa: I001
 # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
-__version__ = "1.4.0"
+__version__ = "1.5.1"
 import warnings
@@ -100,7 +122,7 @@ if _check_instance_setup(from_module="lamindb"):
     from ._view import view
     from .core._context import context
     from .core._settings import settings
-    from .curators import CatManager as Curator
+    from .curators._legacy import CatManager as Curator
     from .models import (
         Artifact,
         Collection,
@@ -122,16 +144,10 @@ if _check_instance_setup(from_module="lamindb"):
     from . import core
     from . import integrations
     from . import curators
+    from . import examples
-    track = context.track
-    finish = context.finish
-    settings.__doc__ = """Global settings (:class:`~lamindb.core.Settings`)."""
-    context.__doc__ = """Global run context (:class:`~lamindb.core.Context`).
-    Note that you can access:
-    - `ln.context.track()` as `ln.track()`
-    - `ln.context.finish()` as `ln.finish()`
-    """
+    track = context._track
+    finish = context._finish
+    settings.__doc__ = """Global live settings (:class:`~lamindb.core.Settings`)."""
+    context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
     from django.db.models import Q

lamindb/_finish.py CHANGED Viewed

@@ -160,7 +160,7 @@ def notebook_to_report(notebook_path: Path, output_path: Path) -> None:
 def notebook_to_script(  # type: ignore
-    transform: Transform, notebook_path: Path, script_path: Path | None = None
+    title: str, notebook_path: Path, script_path: Path | None = None
 ) -> None | str:
     import jupytext
@@ -169,7 +169,7 @@ def notebook_to_script(  # type: ignore
     # remove global metadata header
     py_content = re.sub(r"^# ---\n.*?# ---\n\n", "", py_content, flags=re.DOTALL)
     # replace title
-    py_content = py_content.replace(f"# # {transform.description}", "#")
+    py_content = py_content.replace(f"# # {title}", "#")
     if script_path is None:
         return py_content
     else:
@@ -244,6 +244,7 @@ def save_context_core(
     ignore_non_consecutive: bool | None = None,
     from_cli: bool = False,
     is_retry: bool = False,
+    notebook_runner: str | None = None,
 ) -> str | None:
     import lamindb as ln
     from lamindb.models import (
@@ -259,7 +260,9 @@ def save_context_core(
     source_code_path = filepath
     report_path: Path | None = None
     save_source_code_and_report = True
-    if is_run_from_ipython:  # python notebooks in interactive session
+    if (
+        is_run_from_ipython and notebook_runner != "nbconvert"
+    ):  # python notebooks in interactive session
         import nbproject
         # it might be that the user modifies the title just before ln.finish()
@@ -310,7 +313,7 @@ def save_context_core(
         source_code_path = ln_setup.settings.cache_dir / filepath.name.replace(
             ".ipynb", ".py"
         )
-        notebook_to_script(transform, filepath, source_code_path)
+        notebook_to_script(transform.description, filepath, source_code_path)
     elif is_r_notebook:
         if filepath.with_suffix(".nb.html").exists():
             report_path = filepath.with_suffix(".nb.html")
@@ -337,18 +340,18 @@ def save_context_core(
     ln.settings.creation.artifact_silence_missing_run_warning = True
     # save source code
     if save_source_code_and_report:
-        hash, _ = hash_file(source_code_path)  # ignore hash_type for now
+        transform_hash, _ = hash_file(source_code_path)  # ignore hash_type for now
         if transform.hash is not None:
             # check if the hash of the transform source code matches
             # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
-            if hash != transform.hash:
+            if transform_hash != transform.hash:
                 response = input(
                     f"You are about to overwrite existing source code (hash '{transform.hash}') for Transform('{transform.uid}')."
                     f" Proceed? (y/n)"
                 )
                 if response == "y":
                     transform.source_code = source_code_path.read_text()
-                    transform.hash = hash
+                    transform.hash = transform_hash
                 else:
                     logger.warning("Please re-run `ln.track()` to make a new version")
                     return "rerun-the-notebook"
@@ -356,7 +359,7 @@ def save_context_core(
                 logger.debug("source code is already saved")
         else:
             transform.source_code = source_code_path.read_text()
-            transform.hash = hash
+            transform.hash = transform_hash
     # track run environment
     if run is not None:
@@ -398,7 +401,8 @@ def save_context_core(
     # track report and set is_consecutive
     if save_source_code_and_report:
         if run is not None:
-            if report_path is not None:
+            # do not save a run report if executing through nbconvert
+            if report_path is not None and notebook_runner != "nbconvert":
                 if is_r_notebook:
                     title_text, report_path = clean_r_notebook_html(report_path)
                     if title_text is not None:
@@ -432,6 +436,8 @@ def save_context_core(
                     f"saved transform.latest_run.report: {transform.latest_run.report}"
                 )
             run._is_consecutive = is_consecutive
+        if report_path is not None and notebook_runner == "nbconvert":
+            logger.important(f"to save the notebook html, run: lamin save {filepath}")
     # save both run & transform records if we arrive here
     if run is not None:
@@ -442,9 +448,10 @@ def save_context_core(
         # the hash existed and we're actually back to the previous version
         # hence, this was in fact a run of the previous transform rather than of
         # the new transform
-        # this can happen in interactive notebooks if the user makes no change to the notebook
+        # this can happen in interactively executed notebooks with a pro-active version bump in case it turns out that the user didn't make a change to the notebook
         run.transform = transform
         run.save()
+        ln.Transform.get(transform_id_prior_to_save).delete()
     # finalize
     if not from_cli and run is not None:

lamindb/_tracked.py CHANGED Viewed

@@ -26,7 +26,7 @@ def get_current_tracked_run() -> Run | None:
 def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]]:
-    """Mark a function as tracked with this decorator.
+    """Track function runs.
     You will be able to see inputs, outputs, and parameters of the function in the data lineage graph.

lamindb/base/__init__.py CHANGED Viewed

@@ -7,7 +7,9 @@ Modules:
 .. autosummary::
    :toctree: .
+   uids
    types
+   fields
 Utils:
@@ -21,4 +23,4 @@ Utils:
 from lamindb_setup.core import deprecated, doc_args
-from . import types
+from . import fields, types, uids

lamindb/base/fields.py CHANGED Viewed

@@ -1,3 +1,35 @@
+"""Fields.
+.. autosummary::
+   :toctree: .
+   CharField
+   TextField
+   ForeignKey
+   BooleanField
+   DateField
+   DateTimeField
+   BigIntegerField
+   IntegerField
+   OneToOneField
+   FloatField
+   DecimalField
+   BinaryField
+   JSONField
+   EmailField
+   TimeField
+   SlugField
+   URLField
+   UUIDField
+   PositiveIntegerField
+   PositiveSmallIntegerField
+   SmallIntegerField
+   GenericIPAddressField
+   DurationField
+   CharField
+   TextField
+"""
 from django.db import models
@@ -37,6 +69,14 @@ class ForeignKey(models.ForeignKey):
         super().__init__(*args, **kwargs)
+# fix doc string that otherwise errors
+ForeignKey.get_extra_descriptor_filter.__doc__ = (
+    ForeignKey.get_extra_descriptor_filter.__doc__.replace(
+        ".filter(**kwargs)", "`.filter(**kwargs)`"
+    )
+)
 class BooleanField(models.BooleanField):
     """Custom `BooleanField` with default values for `blank` and `default`.
@@ -257,25 +297,3 @@ class GenericIPAddressField(models.GenericIPAddressField):
     def __init__(self, *args, **kwargs):
         kwargs.setdefault("blank", True)
         super().__init__(*args, **kwargs)
-class FileField(models.FileField):
-    """Custom `FileField` with default values for `blank`.
-    Django default values for `FileField` are `blank=False`.
-    """
-    def __init__(self, *args, **kwargs):
-        kwargs.setdefault("blank", True)
-        super().__init__(*args, **kwargs)
-class ImageField(models.ImageField):
-    """Custom `ImageField` with default values for `blank`.
-    Django default values for `ImageField` are `blank=False`.
-    """
-    def __init__(self, *args, **kwargs):
-        kwargs.setdefault("blank", True)
-        super().__init__(*args, **kwargs)

lamindb/base/ids.py CHANGED Viewed

@@ -1,94 +1 @@
-"""Universal IDs.
-Base generators:
-.. autosummary::
-   :toctree: .
-   base26
-   base62
-   base64
-8 base62 characters:
-======= ===========
-n       p_collision
-======= ===========
-100k    2e-05
-1M      2e-03
-======= ===========
-12 base62 characters:
-======= ===========
-n       p_collision
-======= ===========
-100M    2e-06
-1B      2e-04
-======= ===========
-20 base62 characters (62**20=7e+35) roughly matches UUID (2*122=5e+36):
-======= ===========
-n       p_collision
-======= ===========
-3e15    1e-6
-======= ===========
-"""
-import secrets
-import string
-def base64(n_char: int) -> str:
-    """Random Base64 string."""
-    alphabet = string.digits + string.ascii_letters.swapcase() + "_" + "-"
-    id = "".join(secrets.choice(alphabet) for i in range(n_char))
-    return id
-def base62(n_char: int) -> str:
-    """Random Base62 string."""
-    alphabet = string.digits + string.ascii_letters.swapcase()
-    id = "".join(secrets.choice(alphabet) for i in range(n_char))
-    return id
-def base26(n_char: int):
-    """ASCII lowercase."""
-    alphabet = string.ascii_lowercase
-    id = "".join(secrets.choice(alphabet) for i in range(n_char))
-    return id
-def base62_4() -> str:
-    return base62(4)
-def base62_8() -> str:
-    return base62(8)
-def base62_12() -> str:
-    return base62(12)
-def base62_14() -> str:
-    return base62(14)
-def base62_16() -> str:
-    return base62(16)
-def base62_18() -> str:
-    return base62(18)
-def base62_20() -> str:
-    return base62(20)
-def base62_24() -> str:
-    return base62(24)
+from .uids import *  # noqa: F403

lamindb/base/types.py CHANGED Viewed

@@ -49,6 +49,7 @@ Dtype = Literal[
     "bool",  # boolean
     "date",  # date
     "datetime",  # datetime
+    "dict",  # dictionary
     "object",  # this is a pandas input dtype, we're only using it for complicated types, not for strings
 ]
 """Data type.
@@ -68,6 +69,7 @@ float         `"float"`     `float64 | float32 | float16 | float8 | ...`
 string        `"str"`       `object`
 datetime      `"datetime"`  `datetime`
 date          `"date"`      `date`
+dictionary    `"dict"`      `object`
 ============  ============  =================================================
 Categoricals

lamindb/base/uids.py ADDED Viewed

@@ -0,0 +1,117 @@
+"""Universal IDs.
+Base generators:
+.. autosummary::
+   :toctree: .
+   base26
+   base62
+   base64
+`uid` generators:
+.. autosummary::
+   :toctree: .
+   base62_8
+   base62_12
+   base62_16
+   base62_20
+Collision probabilities
+=======================
+8 base62 characters (`62**8=2e+14`):
+======= ===========
+n       p_collision
+======= ===========
+100k    2e-05
+1M      2e-03
+======= ===========
+12 base62 characters (`62**12=3e+21`):
+======= ===========
+n       p_collision
+======= ===========
+100M    2e-06
+1B      2e-04
+======= ===========
+16 base62 characters (`62**16=5e+28`):
+======= ===========
+n       p_collision
+======= ===========
+1e12    7e-05
+1e13    7e-03
+======= ===========
+20 base62 characters (`62**20=7e+35`) roughly matches UUID (`2**122=5e+36`):
+======= ===========
+n       p_collision
+======= ===========
+1e16    7e-05
+1e17    7e-03
+======= ===========
+See `source <https://lamin.ai/laminlabs/lamindata/transform/t2xCdMB9v5wL>`__.
+"""
+import secrets
+import string
+def base64(n_char: int) -> str:
+    """Random Base64 string."""
+    alphabet = string.digits + string.ascii_letters.swapcase() + "_" + "-"
+    uid = "".join(secrets.choice(alphabet) for i in range(n_char))
+    return uid
+def base62(n_char: int) -> str:
+    """Random Base62 string."""
+    alphabet = string.digits + string.ascii_letters.swapcase()
+    uid = "".join(secrets.choice(alphabet) for i in range(n_char))
+    return uid
+def base26(n_char: int):
+    """ASCII lowercase."""
+    alphabet = string.ascii_lowercase
+    uid = "".join(secrets.choice(alphabet) for i in range(n_char))
+    return uid
+def base62_4() -> str:
+    return base62(4)
+def base62_8() -> str:
+    """Random Base62 string of length 8."""
+    return base62(8)
+def base62_12() -> str:
+    """Random Base62 string of length 12."""
+    return base62(12)
+def base62_16() -> str:
+    """Random Base62 string of length 16."""
+    return base62(16)
+def base62_20() -> str:
+    """Random Base62 string of length 20."""
+    return base62(20)
+def base62_24() -> str:
+    """Random Base62 string of length 24."""
+    return base62(24)

lamindb 1.4.0__py3-none-any.whl → 1.5.1__py3-none-any.whl

lamindb 1.4.0py3-none-any.whl → 1.5.1py3-none-any.whl