lamindb 1.4.0__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. lamindb/__init__.py +52 -36
  2. lamindb/_finish.py +17 -10
  3. lamindb/_tracked.py +1 -1
  4. lamindb/base/__init__.py +3 -1
  5. lamindb/base/fields.py +40 -22
  6. lamindb/base/ids.py +1 -94
  7. lamindb/base/types.py +2 -0
  8. lamindb/base/uids.py +117 -0
  9. lamindb/core/_context.py +203 -102
  10. lamindb/core/_settings.py +38 -25
  11. lamindb/core/datasets/__init__.py +11 -4
  12. lamindb/core/datasets/_core.py +5 -5
  13. lamindb/core/datasets/_small.py +0 -93
  14. lamindb/core/datasets/mini_immuno.py +172 -0
  15. lamindb/core/loaders.py +1 -1
  16. lamindb/core/storage/_backed_access.py +100 -6
  17. lamindb/core/storage/_polars_lazy_df.py +51 -0
  18. lamindb/core/storage/_pyarrow_dataset.py +15 -30
  19. lamindb/core/storage/_tiledbsoma.py +29 -13
  20. lamindb/core/storage/objects.py +6 -0
  21. lamindb/core/subsettings/__init__.py +2 -0
  22. lamindb/core/subsettings/_annotation_settings.py +11 -0
  23. lamindb/curators/__init__.py +7 -3349
  24. lamindb/curators/_legacy.py +2056 -0
  25. lamindb/curators/core.py +1534 -0
  26. lamindb/errors.py +11 -0
  27. lamindb/examples/__init__.py +27 -0
  28. lamindb/examples/schemas/__init__.py +12 -0
  29. lamindb/examples/schemas/_anndata.py +25 -0
  30. lamindb/examples/schemas/_simple.py +19 -0
  31. lamindb/integrations/_vitessce.py +8 -5
  32. lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
  33. lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
  34. lamindb/migrations/0093_alter_schemacomponent_unique_together.py +16 -0
  35. lamindb/models/__init__.py +4 -1
  36. lamindb/models/_describe.py +21 -4
  37. lamindb/models/_feature_manager.py +382 -287
  38. lamindb/models/_label_manager.py +8 -2
  39. lamindb/models/artifact.py +177 -106
  40. lamindb/models/artifact_set.py +122 -0
  41. lamindb/models/collection.py +73 -52
  42. lamindb/models/core.py +1 -1
  43. lamindb/models/feature.py +51 -17
  44. lamindb/models/has_parents.py +69 -14
  45. lamindb/models/project.py +1 -1
  46. lamindb/models/query_manager.py +221 -22
  47. lamindb/models/query_set.py +247 -172
  48. lamindb/models/record.py +65 -247
  49. lamindb/models/run.py +4 -4
  50. lamindb/models/save.py +8 -2
  51. lamindb/models/schema.py +456 -184
  52. lamindb/models/transform.py +2 -2
  53. lamindb/models/ulabel.py +8 -5
  54. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/METADATA +6 -6
  55. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/RECORD +57 -43
  56. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/LICENSE +0 -0
  57. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/WHEEL +0 -0
lamindb/__init__.py CHANGED
@@ -1,43 +1,62 @@
1
1
  """A data framework for biology.
2
2
 
3
- Tracking notebooks, scripts & functions.
3
+ Data lineage
4
+ ============
5
+
6
+ Track inputs, outputs & environment of a notebook or script run.
4
7
 
5
8
  .. autosummary::
6
9
  :toctree: .
7
10
 
8
11
  track
9
12
  finish
13
+
14
+ Decorate a function with `@tracked()` to track inputs, outputs & environment of function executions.
15
+
16
+ .. autosummary::
17
+ :toctree: .
18
+
10
19
  tracked
11
20
 
12
- Registries.
21
+ Registries
22
+ ==========
23
+
24
+ Manage artifacts and transforms.
13
25
 
14
26
  .. autosummary::
15
27
  :toctree: .
16
28
 
17
29
  Artifact
30
+ Storage
18
31
  Transform
19
32
  Run
33
+
34
+ Validate and annotate artifacts.
35
+
36
+ .. autosummary::
37
+ :toctree: .
38
+
20
39
  ULabel
21
- User
22
- Storage
23
40
  Feature
24
- Schema
25
41
  Param
42
+ Schema
43
+
44
+ Manage projects.
45
+
46
+ .. autosummary::
47
+ :toctree: .
48
+
49
+ User
26
50
  Collection
27
51
  Project
28
52
  Space
29
53
  Reference
30
54
  Person
31
55
 
32
- Curators & integrations.
33
-
34
- .. autosummary::
35
- :toctree: .
36
-
37
- curators
38
- integrations
56
+ Other
57
+ =====
39
58
 
40
- Key functionality.
59
+ Functions and classes.
41
60
 
42
61
  .. autosummary::
43
62
  :toctree: .
@@ -47,32 +66,35 @@ Key functionality.
47
66
  save
48
67
  UPath
49
68
  settings
69
+ context
50
70
 
51
- Low-level functionality.
71
+ Curators and integrations.
52
72
 
53
73
  .. autosummary::
54
74
  :toctree: .
55
75
 
56
- context
57
- errors
58
- setup
59
- base
60
- models
61
- core
76
+ curators
77
+ integrations
62
78
 
63
- Backward compatibility.
79
+ Low-level functionality.
64
80
 
65
81
  .. autosummary::
66
82
  :toctree: .
67
83
 
68
- FeatureSet
69
- Curator
84
+ examples
85
+ curators
86
+ integrations
87
+ errors
88
+ setup
89
+ base
90
+ core
91
+ models
70
92
 
71
93
  """
72
94
 
73
95
  # ruff: noqa: I001
74
96
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
75
- __version__ = "1.4.0"
97
+ __version__ = "1.5.1"
76
98
 
77
99
  import warnings
78
100
 
@@ -100,7 +122,7 @@ if _check_instance_setup(from_module="lamindb"):
100
122
  from ._view import view
101
123
  from .core._context import context
102
124
  from .core._settings import settings
103
- from .curators import CatManager as Curator
125
+ from .curators._legacy import CatManager as Curator
104
126
  from .models import (
105
127
  Artifact,
106
128
  Collection,
@@ -122,16 +144,10 @@ if _check_instance_setup(from_module="lamindb"):
122
144
  from . import core
123
145
  from . import integrations
124
146
  from . import curators
147
+ from . import examples
125
148
 
126
- track = context.track
127
- finish = context.finish
128
- settings.__doc__ = """Global settings (:class:`~lamindb.core.Settings`)."""
129
- context.__doc__ = """Global run context (:class:`~lamindb.core.Context`).
130
-
131
- Note that you can access:
132
-
133
- - `ln.context.track()` as `ln.track()`
134
- - `ln.context.finish()` as `ln.finish()`
135
-
136
- """
149
+ track = context._track
150
+ finish = context._finish
151
+ settings.__doc__ = """Global live settings (:class:`~lamindb.core.Settings`)."""
152
+ context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
137
153
  from django.db.models import Q
lamindb/_finish.py CHANGED
@@ -160,7 +160,7 @@ def notebook_to_report(notebook_path: Path, output_path: Path) -> None:
160
160
 
161
161
 
162
162
  def notebook_to_script( # type: ignore
163
- transform: Transform, notebook_path: Path, script_path: Path | None = None
163
+ title: str, notebook_path: Path, script_path: Path | None = None
164
164
  ) -> None | str:
165
165
  import jupytext
166
166
 
@@ -169,7 +169,7 @@ def notebook_to_script( # type: ignore
169
169
  # remove global metadata header
170
170
  py_content = re.sub(r"^# ---\n.*?# ---\n\n", "", py_content, flags=re.DOTALL)
171
171
  # replace title
172
- py_content = py_content.replace(f"# # {transform.description}", "#")
172
+ py_content = py_content.replace(f"# # {title}", "#")
173
173
  if script_path is None:
174
174
  return py_content
175
175
  else:
@@ -244,6 +244,7 @@ def save_context_core(
244
244
  ignore_non_consecutive: bool | None = None,
245
245
  from_cli: bool = False,
246
246
  is_retry: bool = False,
247
+ notebook_runner: str | None = None,
247
248
  ) -> str | None:
248
249
  import lamindb as ln
249
250
  from lamindb.models import (
@@ -259,7 +260,9 @@ def save_context_core(
259
260
  source_code_path = filepath
260
261
  report_path: Path | None = None
261
262
  save_source_code_and_report = True
262
- if is_run_from_ipython: # python notebooks in interactive session
263
+ if (
264
+ is_run_from_ipython and notebook_runner != "nbconvert"
265
+ ): # python notebooks in interactive session
263
266
  import nbproject
264
267
 
265
268
  # it might be that the user modifies the title just before ln.finish()
@@ -310,7 +313,7 @@ def save_context_core(
310
313
  source_code_path = ln_setup.settings.cache_dir / filepath.name.replace(
311
314
  ".ipynb", ".py"
312
315
  )
313
- notebook_to_script(transform, filepath, source_code_path)
316
+ notebook_to_script(transform.description, filepath, source_code_path)
314
317
  elif is_r_notebook:
315
318
  if filepath.with_suffix(".nb.html").exists():
316
319
  report_path = filepath.with_suffix(".nb.html")
@@ -337,18 +340,18 @@ def save_context_core(
337
340
  ln.settings.creation.artifact_silence_missing_run_warning = True
338
341
  # save source code
339
342
  if save_source_code_and_report:
340
- hash, _ = hash_file(source_code_path) # ignore hash_type for now
343
+ transform_hash, _ = hash_file(source_code_path) # ignore hash_type for now
341
344
  if transform.hash is not None:
342
345
  # check if the hash of the transform source code matches
343
346
  # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
344
- if hash != transform.hash:
347
+ if transform_hash != transform.hash:
345
348
  response = input(
346
349
  f"You are about to overwrite existing source code (hash '{transform.hash}') for Transform('{transform.uid}')."
347
350
  f" Proceed? (y/n)"
348
351
  )
349
352
  if response == "y":
350
353
  transform.source_code = source_code_path.read_text()
351
- transform.hash = hash
354
+ transform.hash = transform_hash
352
355
  else:
353
356
  logger.warning("Please re-run `ln.track()` to make a new version")
354
357
  return "rerun-the-notebook"
@@ -356,7 +359,7 @@ def save_context_core(
356
359
  logger.debug("source code is already saved")
357
360
  else:
358
361
  transform.source_code = source_code_path.read_text()
359
- transform.hash = hash
362
+ transform.hash = transform_hash
360
363
 
361
364
  # track run environment
362
365
  if run is not None:
@@ -398,7 +401,8 @@ def save_context_core(
398
401
  # track report and set is_consecutive
399
402
  if save_source_code_and_report:
400
403
  if run is not None:
401
- if report_path is not None:
404
+ # do not save a run report if executing through nbconvert
405
+ if report_path is not None and notebook_runner != "nbconvert":
402
406
  if is_r_notebook:
403
407
  title_text, report_path = clean_r_notebook_html(report_path)
404
408
  if title_text is not None:
@@ -432,6 +436,8 @@ def save_context_core(
432
436
  f"saved transform.latest_run.report: {transform.latest_run.report}"
433
437
  )
434
438
  run._is_consecutive = is_consecutive
439
+ if report_path is not None and notebook_runner == "nbconvert":
440
+ logger.important(f"to save the notebook html, run: lamin save {filepath}")
435
441
 
436
442
  # save both run & transform records if we arrive here
437
443
  if run is not None:
@@ -442,9 +448,10 @@ def save_context_core(
442
448
  # the hash existed and we're actually back to the previous version
443
449
  # hence, this was in fact a run of the previous transform rather than of
444
450
  # the new transform
445
- # this can happen in interactive notebooks if the user makes no change to the notebook
451
+ # this can happen in interactively executed notebooks with a pro-active version bump in case it turns out that the user didn't make a change to the notebook
446
452
  run.transform = transform
447
453
  run.save()
454
+ ln.Transform.get(transform_id_prior_to_save).delete()
448
455
 
449
456
  # finalize
450
457
  if not from_cli and run is not None:
lamindb/_tracked.py CHANGED
@@ -26,7 +26,7 @@ def get_current_tracked_run() -> Run | None:
26
26
 
27
27
 
28
28
  def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]]:
29
- """Mark a function as tracked with this decorator.
29
+ """Track function runs.
30
30
 
31
31
  You will be able to see inputs, outputs, and parameters of the function in the data lineage graph.
32
32
 
lamindb/base/__init__.py CHANGED
@@ -7,7 +7,9 @@ Modules:
7
7
  .. autosummary::
8
8
  :toctree: .
9
9
 
10
+ uids
10
11
  types
12
+ fields
11
13
 
12
14
  Utils:
13
15
 
@@ -21,4 +23,4 @@ Utils:
21
23
 
22
24
  from lamindb_setup.core import deprecated, doc_args
23
25
 
24
- from . import types
26
+ from . import fields, types, uids
lamindb/base/fields.py CHANGED
@@ -1,3 +1,35 @@
1
+ """Fields.
2
+
3
+ .. autosummary::
4
+ :toctree: .
5
+
6
+ CharField
7
+ TextField
8
+ ForeignKey
9
+ BooleanField
10
+ DateField
11
+ DateTimeField
12
+ BigIntegerField
13
+ IntegerField
14
+ OneToOneField
15
+ FloatField
16
+ DecimalField
17
+ BinaryField
18
+ JSONField
19
+ EmailField
20
+ TimeField
21
+ SlugField
22
+ URLField
23
+ UUIDField
24
+ PositiveIntegerField
25
+ PositiveSmallIntegerField
26
+ SmallIntegerField
27
+ GenericIPAddressField
28
+ DurationField
29
+ CharField
30
+ TextField
31
+ """
32
+
1
33
  from django.db import models
2
34
 
3
35
 
@@ -37,6 +69,14 @@ class ForeignKey(models.ForeignKey):
37
69
  super().__init__(*args, **kwargs)
38
70
 
39
71
 
72
+ # fix doc string that otherwise errors
73
+ ForeignKey.get_extra_descriptor_filter.__doc__ = (
74
+ ForeignKey.get_extra_descriptor_filter.__doc__.replace(
75
+ ".filter(**kwargs)", "`.filter(**kwargs)`"
76
+ )
77
+ )
78
+
79
+
40
80
  class BooleanField(models.BooleanField):
41
81
  """Custom `BooleanField` with default values for `blank` and `default`.
42
82
 
@@ -257,25 +297,3 @@ class GenericIPAddressField(models.GenericIPAddressField):
257
297
  def __init__(self, *args, **kwargs):
258
298
  kwargs.setdefault("blank", True)
259
299
  super().__init__(*args, **kwargs)
260
-
261
-
262
- class FileField(models.FileField):
263
- """Custom `FileField` with default values for `blank`.
264
-
265
- Django default values for `FileField` are `blank=False`.
266
- """
267
-
268
- def __init__(self, *args, **kwargs):
269
- kwargs.setdefault("blank", True)
270
- super().__init__(*args, **kwargs)
271
-
272
-
273
- class ImageField(models.ImageField):
274
- """Custom `ImageField` with default values for `blank`.
275
-
276
- Django default values for `ImageField` are `blank=False`.
277
- """
278
-
279
- def __init__(self, *args, **kwargs):
280
- kwargs.setdefault("blank", True)
281
- super().__init__(*args, **kwargs)
lamindb/base/ids.py CHANGED
@@ -1,94 +1 @@
1
- """Universal IDs.
2
-
3
- Base generators:
4
-
5
- .. autosummary::
6
- :toctree: .
7
-
8
- base26
9
- base62
10
- base64
11
-
12
- 8 base62 characters:
13
-
14
- ======= ===========
15
- n p_collision
16
- ======= ===========
17
- 100k 2e-05
18
- 1M 2e-03
19
- ======= ===========
20
-
21
- 12 base62 characters:
22
-
23
- ======= ===========
24
- n p_collision
25
- ======= ===========
26
- 100M 2e-06
27
- 1B 2e-04
28
- ======= ===========
29
-
30
- 20 base62 characters (62**20=7e+35) roughly matches UUID (2*122=5e+36):
31
-
32
- ======= ===========
33
- n p_collision
34
- ======= ===========
35
- 3e15 1e-6
36
- ======= ===========
37
-
38
- """
39
-
40
- import secrets
41
- import string
42
-
43
-
44
- def base64(n_char: int) -> str:
45
- """Random Base64 string."""
46
- alphabet = string.digits + string.ascii_letters.swapcase() + "_" + "-"
47
- id = "".join(secrets.choice(alphabet) for i in range(n_char))
48
- return id
49
-
50
-
51
- def base62(n_char: int) -> str:
52
- """Random Base62 string."""
53
- alphabet = string.digits + string.ascii_letters.swapcase()
54
- id = "".join(secrets.choice(alphabet) for i in range(n_char))
55
- return id
56
-
57
-
58
- def base26(n_char: int):
59
- """ASCII lowercase."""
60
- alphabet = string.ascii_lowercase
61
- id = "".join(secrets.choice(alphabet) for i in range(n_char))
62
- return id
63
-
64
-
65
- def base62_4() -> str:
66
- return base62(4)
67
-
68
-
69
- def base62_8() -> str:
70
- return base62(8)
71
-
72
-
73
- def base62_12() -> str:
74
- return base62(12)
75
-
76
-
77
- def base62_14() -> str:
78
- return base62(14)
79
-
80
-
81
- def base62_16() -> str:
82
- return base62(16)
83
-
84
-
85
- def base62_18() -> str:
86
- return base62(18)
87
-
88
-
89
- def base62_20() -> str:
90
- return base62(20)
91
-
92
-
93
- def base62_24() -> str:
94
- return base62(24)
1
+ from .uids import * # noqa: F403
lamindb/base/types.py CHANGED
@@ -49,6 +49,7 @@ Dtype = Literal[
49
49
  "bool", # boolean
50
50
  "date", # date
51
51
  "datetime", # datetime
52
+ "dict", # dictionary
52
53
  "object", # this is a pandas input dtype, we're only using it for complicated types, not for strings
53
54
  ]
54
55
  """Data type.
@@ -68,6 +69,7 @@ float `"float"` `float64 | float32 | float16 | float8 | ...`
68
69
  string `"str"` `object`
69
70
  datetime `"datetime"` `datetime`
70
71
  date `"date"` `date`
72
+ dictionary `"dict"` `object`
71
73
  ============ ============ =================================================
72
74
 
73
75
  Categoricals
lamindb/base/uids.py ADDED
@@ -0,0 +1,117 @@
1
+ """Universal IDs.
2
+
3
+ Base generators:
4
+
5
+ .. autosummary::
6
+ :toctree: .
7
+
8
+ base26
9
+ base62
10
+ base64
11
+
12
+ `uid` generators:
13
+
14
+ .. autosummary::
15
+ :toctree: .
16
+
17
+ base62_8
18
+ base62_12
19
+ base62_16
20
+ base62_20
21
+
22
+
23
+ Collision probabilities
24
+ =======================
25
+
26
+ 8 base62 characters (`62**8=2e+14`):
27
+
28
+ ======= ===========
29
+ n p_collision
30
+ ======= ===========
31
+ 100k 2e-05
32
+ 1M 2e-03
33
+ ======= ===========
34
+
35
+ 12 base62 characters (`62**12=3e+21`):
36
+
37
+ ======= ===========
38
+ n p_collision
39
+ ======= ===========
40
+ 100M 2e-06
41
+ 1B 2e-04
42
+ ======= ===========
43
+
44
+ 16 base62 characters (`62**16=5e+28`):
45
+
46
+ ======= ===========
47
+ n p_collision
48
+ ======= ===========
49
+ 1e12 7e-05
50
+ 1e13 7e-03
51
+ ======= ===========
52
+
53
+ 20 base62 characters (`62**20=7e+35`) roughly matches UUID (`2**122=5e+36`):
54
+
55
+ ======= ===========
56
+ n p_collision
57
+ ======= ===========
58
+ 1e16 7e-05
59
+ 1e17 7e-03
60
+ ======= ===========
61
+
62
+ See `source <https://lamin.ai/laminlabs/lamindata/transform/t2xCdMB9v5wL>`__.
63
+
64
+ """
65
+
66
+ import secrets
67
+ import string
68
+
69
+
70
+ def base64(n_char: int) -> str:
71
+ """Random Base64 string."""
72
+ alphabet = string.digits + string.ascii_letters.swapcase() + "_" + "-"
73
+ uid = "".join(secrets.choice(alphabet) for i in range(n_char))
74
+ return uid
75
+
76
+
77
+ def base62(n_char: int) -> str:
78
+ """Random Base62 string."""
79
+ alphabet = string.digits + string.ascii_letters.swapcase()
80
+ uid = "".join(secrets.choice(alphabet) for i in range(n_char))
81
+ return uid
82
+
83
+
84
+ def base26(n_char: int):
85
+ """ASCII lowercase."""
86
+ alphabet = string.ascii_lowercase
87
+ uid = "".join(secrets.choice(alphabet) for i in range(n_char))
88
+ return uid
89
+
90
+
91
+ def base62_4() -> str:
92
+ return base62(4)
93
+
94
+
95
+ def base62_8() -> str:
96
+ """Random Base62 string of length 8."""
97
+ return base62(8)
98
+
99
+
100
+ def base62_12() -> str:
101
+ """Random Base62 string of length 12."""
102
+ return base62(12)
103
+
104
+
105
+ def base62_16() -> str:
106
+ """Random Base62 string of length 16."""
107
+ return base62(16)
108
+
109
+
110
+ def base62_20() -> str:
111
+ """Random Base62 string of length 20."""
112
+ return base62(20)
113
+
114
+
115
+ def base62_24() -> str:
116
+ """Random Base62 string of length 24."""
117
+ return base62(24)