lamindb 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. lamindb/__init__.py +14 -5
  2. lamindb/_artifact.py +174 -57
  3. lamindb/_can_curate.py +27 -8
  4. lamindb/_collection.py +85 -51
  5. lamindb/_feature.py +177 -41
  6. lamindb/_finish.py +222 -81
  7. lamindb/_from_values.py +83 -98
  8. lamindb/_parents.py +4 -4
  9. lamindb/_query_set.py +59 -17
  10. lamindb/_record.py +171 -53
  11. lamindb/_run.py +4 -4
  12. lamindb/_save.py +33 -10
  13. lamindb/_schema.py +135 -38
  14. lamindb/_storage.py +1 -1
  15. lamindb/_tracked.py +106 -0
  16. lamindb/_transform.py +21 -8
  17. lamindb/_ulabel.py +5 -14
  18. lamindb/base/validation.py +2 -6
  19. lamindb/core/__init__.py +13 -14
  20. lamindb/core/_context.py +39 -36
  21. lamindb/core/_data.py +29 -25
  22. lamindb/core/_describe.py +1 -1
  23. lamindb/core/_django.py +1 -1
  24. lamindb/core/_feature_manager.py +54 -44
  25. lamindb/core/_label_manager.py +4 -4
  26. lamindb/core/_mapped_collection.py +20 -7
  27. lamindb/core/datasets/__init__.py +6 -1
  28. lamindb/core/datasets/_core.py +12 -11
  29. lamindb/core/datasets/_small.py +66 -20
  30. lamindb/core/exceptions.py +1 -90
  31. lamindb/core/loaders.py +7 -13
  32. lamindb/core/relations.py +6 -4
  33. lamindb/core/storage/_anndata_accessor.py +41 -0
  34. lamindb/core/storage/_backed_access.py +2 -2
  35. lamindb/core/storage/_pyarrow_dataset.py +25 -15
  36. lamindb/core/storage/_tiledbsoma.py +56 -12
  37. lamindb/core/storage/paths.py +41 -22
  38. lamindb/core/subsettings/_creation_settings.py +4 -16
  39. lamindb/curators/__init__.py +2168 -833
  40. lamindb/curators/_cellxgene_schemas/__init__.py +26 -0
  41. lamindb/curators/_cellxgene_schemas/schema_versions.yml +104 -0
  42. lamindb/errors.py +96 -0
  43. lamindb/integrations/_vitessce.py +3 -3
  44. lamindb/migrations/0069_squashed.py +76 -75
  45. lamindb/migrations/0075_lamindbv1_part5.py +4 -5
  46. lamindb/migrations/0082_alter_feature_dtype.py +21 -0
  47. lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py +94 -0
  48. lamindb/migrations/0084_alter_schemafeature_feature_and_more.py +35 -0
  49. lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py +63 -0
  50. lamindb/migrations/0086_various.py +95 -0
  51. lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py +41 -0
  52. lamindb/migrations/0088_schema_components.py +273 -0
  53. lamindb/migrations/0088_squashed.py +4372 -0
  54. lamindb/models.py +423 -156
  55. {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/METADATA +10 -7
  56. lamindb-1.1.0.dist-info/RECORD +95 -0
  57. lamindb/curators/_spatial.py +0 -528
  58. lamindb/migrations/0052_squashed.py +0 -1261
  59. lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +0 -57
  60. lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +0 -35
  61. lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +0 -61
  62. lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +0 -22
  63. lamindb/migrations/0057_link_models_latest_report_and_others.py +0 -356
  64. lamindb/migrations/0058_artifact__actions_collection__actions.py +0 -22
  65. lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -31
  66. lamindb/migrations/0060_alter_artifact__actions.py +0 -22
  67. lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +0 -45
  68. lamindb/migrations/0062_add_is_latest_field.py +0 -32
  69. lamindb/migrations/0063_populate_latest_field.py +0 -45
  70. lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +0 -33
  71. lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +0 -22
  72. lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +0 -352
  73. lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +0 -20
  74. lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +0 -20
  75. lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -1294
  76. lamindb-1.0.4.dist-info/RECORD +0 -102
  77. {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/LICENSE +0 -0
  78. {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/WHEEL +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lamindb
3
- Version: 1.0.4
3
+ Version: 1.1.0
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.10,<3.13
@@ -9,9 +9,11 @@ Classifier: Programming Language :: Python :: 3.10
9
9
  Classifier: Programming Language :: Python :: 3.11
10
10
  Classifier: Programming Language :: Python :: 3.12
11
11
  Requires-Dist: lamin_utils==0.13.10
12
- Requires-Dist: lamin_cli==1.0.4
13
- Requires-Dist: lamindb_setup[aws]==1.0.3
12
+ Requires-Dist: lamin_cli==1.1.0
13
+ Requires-Dist: lamindb_setup[aws]==1.1.0
14
+ Requires-Dist: pyyaml
14
15
  Requires-Dist: pyarrow
16
+ Requires-Dist: pandera
15
17
  Requires-Dist: typing_extensions!=4.6.0
16
18
  Requires-Dist: python-dateutil
17
19
  Requires-Dist: scipy<1.15.0
@@ -20,9 +22,10 @@ Requires-Dist: anndata>=0.8.0,<=0.11.3
20
22
  Requires-Dist: fsspec
21
23
  Requires-Dist: graphviz
22
24
  Requires-Dist: psycopg2-binary
23
- Requires-Dist: bionty==1.0.0 ; extra == "bionty"
25
+ Requires-Dist: bionty==1.1.0 ; extra == "bionty"
24
26
  Requires-Dist: cellregistry ; extra == "cellregistry"
25
- Requires-Dist: clinicore==1.0.0 ; extra == "clinicore"
27
+ Requires-Dist: clinicore==1.1.0 ; extra == "clinicore"
28
+ Requires-Dist: tomlkit ; extra == "dev"
26
29
  Requires-Dist: line_profiler ; extra == "dev"
27
30
  Requires-Dist: pre-commit ; extra == "dev"
28
31
  Requires-Dist: nox ; extra == "dev"
@@ -34,14 +37,14 @@ Requires-Dist: mudata ; extra == "dev"
34
37
  Requires-Dist: nbproject_test>=0.6.0 ; extra == "dev"
35
38
  Requires-Dist: faker-biology ; extra == "dev"
36
39
  Requires-Dist: django-schema-graph ; extra == "erdiagram"
37
- Requires-Dist: readfcs>=1.1.9 ; extra == "fcs"
40
+ Requires-Dist: readfcs>=2.0.1 ; extra == "fcs"
38
41
  Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
39
42
  Requires-Dist: nbproject==0.10.5 ; extra == "jupyter"
40
43
  Requires-Dist: jupytext ; extra == "jupyter"
41
44
  Requires-Dist: nbconvert>=7.2.1 ; extra == "jupyter"
42
45
  Requires-Dist: mistune!=3.1.0 ; extra == "jupyter"
43
46
  Requires-Dist: omop ; extra == "omop"
44
- Requires-Dist: wetlab==1.0.1 ; extra == "wetlab"
47
+ Requires-Dist: wetlab==1.1.0 ; extra == "wetlab"
45
48
  Requires-Dist: zarr>=2.16.0,<3.0.0a0 ; extra == "zarr"
46
49
  Project-URL: Home, https://github.com/laminlabs/lamindb
47
50
  Provides-Extra: bionty
@@ -0,0 +1,95 @@
1
+ lamindb/__init__.py,sha256=tfnXH1mSnAnNlt_u6GoNwf66Dd-AMhCGelvFgMoQm50,2391
2
+ lamindb/_artifact.py,sha256=n1bLUBzmHOSwDMmRdI_81Nnm59_YVL0UupRFRdwvR7E,50856
3
+ lamindb/_can_curate.py,sha256=2vIvyJVqdFJs9q2a7j6kU8_TgY60PIobHX-ZHRvGTC0,20965
4
+ lamindb/_collection.py,sha256=c9I0XU_OtJkWmwPRBim_FuOiAk-J7-UOU91M5Nx62IM,15885
5
+ lamindb/_feature.py,sha256=A-3pjZavUTqr1VUsfF-7zSkOUVbWWsS6yyE_0-WHesI,11532
6
+ lamindb/_finish.py,sha256=_9yPSDE1xtuHtW08IO72go2n0Hw7GFkpio3SpqklHHM,19115
7
+ lamindb/_from_values.py,sha256=2Le2xrVMsZFCROWSh7hrhwq9HkRof8ddJ-fCu7pbeig,13596
8
+ lamindb/_is_versioned.py,sha256=6_LBAKD_fng6BReqitJUIxTUaQok3AeIpNnE_D8kHnQ,1293
9
+ lamindb/_parents.py,sha256=SMl9YbeLyg4KiNL3Oz_9MOK_lpv7OlUpo2AcC9xAPAc,17254
10
+ lamindb/_query_manager.py,sha256=znUAYeNuUk13YVXq51CllpGr33F15_PW_dQYcok0gO8,3636
11
+ lamindb/_query_set.py,sha256=9UdArhet0UweMlU1w3vqspp2ovJQOz8blBQgMmfWrAE,25414
12
+ lamindb/_record.py,sha256=BiREwu7wbtbIpcNWzLOLRElmMXVk-QJjm4UJXRgC8fk,38761
13
+ lamindb/_run.py,sha256=Ef8PGag2AnzpOQoOrh-L6DzMygxBcwwJvX3JGDEqv24,2062
14
+ lamindb/_save.py,sha256=t7cWmksCSC-cwx-YdmPBvSiaBgO80d_H3NlR6oNnXcM,12783
15
+ lamindb/_schema.py,sha256=py0bUgRjNeRxAGRp8-0zySTKbH6lPpkzrJG5XYflbHI,12506
16
+ lamindb/_storage.py,sha256=8HA_0aIc_utOWV20wdH6-vOOhz6dgE5tK6wJJDgt64Q,431
17
+ lamindb/_tracked.py,sha256=D3996sfch0tKXEk_1aWNG4LHCwWMBahHQeoasBLd2fg,3626
18
+ lamindb/_transform.py,sha256=KBXjXRiL7uvGqlS9EvjCLP9td3kGgilWpdOzZ59JeTE,6394
19
+ lamindb/_ulabel.py,sha256=TTDBRB3GVJa6x78HI9gbEYWqIhabObIFH3Mihy4x4j0,1786
20
+ lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
21
+ lamindb/_view.py,sha256=c4eN5hcBlg3TVnljKefbyWAq0eBncjMp2xQcb5OaGWg,4982
22
+ lamindb/errors.py,sha256=hT79cjnDucvtBP41uJqWrSMZjMdved5Nm1GnZwuq38w,1738
23
+ lamindb/models.py,sha256=nkBdj2QmM-XRaQV9izXU6GM3G-AOYUMCGSR_2J3EdMc,159252
24
+ lamindb/base/__init__.py,sha256=J0UpYObi9hJBFyBpAXp4wB3DaJx48R2SaUeB4wjiFvc,267
25
+ lamindb/base/fields.py,sha256=RdwYHQmB7B-jopD_K2QNL5vjhOelu7DWGgqQItXr3pg,8024
26
+ lamindb/base/ids.py,sha256=WzHWiHZtlRUKqxz_p-76ks_JSW669ztvriE7Z3A0yHg,1736
27
+ lamindb/base/types.py,sha256=JfZk0xmhLsWusU0s4SNjhRnQ52mn-cSiG5Gf4SsACBs,1227
28
+ lamindb/base/users.py,sha256=g4ZLQb6eey66FO9eEumbfDpJi_FZZsiLVe2Frz9JuLI,978
29
+ lamindb/base/validation.py,sha256=RESPeSCPzeELaKto8SEb2WB5sWPv76VkOUIPxUsxOGY,2250
30
+ lamindb/core/__init__.py,sha256=4eizMbkZAd0f7UhTyzepsLOyXNQmvtATHwmkfPKAUuc,1663
31
+ lamindb/core/_context.py,sha256=qht_oNj9s4ZWJp74o77PEYAOaw0h5BpbFJuNTTqP9ik,28901
32
+ lamindb/core/_data.py,sha256=PbE1ryKwmJh30EooceZBhYQr82dGxvWIEiaLnp5aLFE,19138
33
+ lamindb/core/_describe.py,sha256=dnpxpvwJpXIWtrnQ1l_9xDjSxw-zb73-oD6z_rumXE4,4888
34
+ lamindb/core/_django.py,sha256=1g1kWWF_3WCTdENGo1k5eoEulfvHJz8a0zI7oJcKBxU,7629
35
+ lamindb/core/_feature_manager.py,sha256=IVo3IXQw2ayx3cXxNhdNGvVHcf7UPbRy0MsBW0S8AG8,48551
36
+ lamindb/core/_label_manager.py,sha256=hDRAJLS-PDP_hHySfuJG-13463jN1sZwE5j4pRGVW_c,11908
37
+ lamindb/core/_mapped_collection.py,sha256=a8Ks18eIg699K5rkaPPYS_efyPS63WfYDGyJyHYq8gU,25523
38
+ lamindb/core/_settings.py,sha256=haLHE1dhog_Sz6gnecTW8E548njWH5nVt8mTFPEs6ZM,5733
39
+ lamindb/core/_sync_git.py,sha256=xu1o6zlBD_pTRpBPVXiHKOI2tmtGaUfXvuuII2AAfM4,5875
40
+ lamindb/core/_track_environment.py,sha256=nVEO98P7ZrUWyixMI3AdoD7vcUszIVciZwgPOsQUsNU,814
41
+ lamindb/core/exceptions.py,sha256=FMEoSvT3FvtLkxQAt2oDXPeaPem8V5x5UBbTsPFYU5w,53
42
+ lamindb/core/fields.py,sha256=zM6G7CiE6mU_5heLWwIhE3d5LqAjPCEwgs0eechFm0c,175
43
+ lamindb/core/loaders.py,sha256=yQA3d07N3t4B53k5JTlihp0fbFXBU4UC415aQFHPEos,4571
44
+ lamindb/core/relations.py,sha256=mZWt2cImMoF3HElyMOG99IP_2NUglOhwktwqey6uR8k,3546
45
+ lamindb/core/types.py,sha256=DS4uXaCswAW9tqz4MZ_sYc1_QZ6il--Z4x8HeHNQgRw,162
46
+ lamindb/core/versioning.py,sha256=tUZthgDwbz7UBx1wx_MsdsXXsltE1E9clapmRkmvKlU,4953
47
+ lamindb/core/datasets/__init__.py,sha256=PPraqD4nMr-DvI1_6aDQde2p9MkgJY-ukV0Qb34xnyg,1722
48
+ lamindb/core/datasets/_core.py,sha256=9-By-vT7KUbdD8jFQK7KaRcvODj0hiwQMeM2d9mgmss,19661
49
+ lamindb/core/datasets/_fake.py,sha256=BZF9R_1iF0HDnvtZNqL2FtsjSMuqDIfuFxnw_LJYIh4,953
50
+ lamindb/core/datasets/_small.py,sha256=uqVY3Qg0Mf7uHGGpIaYhWzo8M7mBtK3DZDOZ-ROVUeM,5002
51
+ lamindb/core/storage/__init__.py,sha256=JOIMu_7unbyhndtH1j0Q-9AvY8knSuc1IJO9sQnyBAQ,498
52
+ lamindb/core/storage/_anndata_accessor.py,sha256=umQFD0r8tolX2z9sWZYt88rb1nv-yIIHHO_IuhTYLzU,25858
53
+ lamindb/core/storage/_anndata_sizes.py,sha256=aXO3OB--tF5MChenSsigW6Q-RuE8YJJOUTVukkLrv9A,1029
54
+ lamindb/core/storage/_backed_access.py,sha256=FGGNmu-2LaMo0UmPe6rxtxkaeGcGSFYgR7dgzri6URY,3589
55
+ lamindb/core/storage/_pyarrow_dataset.py,sha256=0hFjuajLMnasRQCwSXhyire-s3aoHCqZFdYHR5tz_Do,1455
56
+ lamindb/core/storage/_tiledbsoma.py,sha256=t3wwfbisl33kVkDHO3aoGxb7TJybbRQ_h-QuFFU5YZE,10202
57
+ lamindb/core/storage/_valid_suffixes.py,sha256=vUSeQ4s01rdhD_vSd6wKmFBsgMJAKkBMnL_T9Y1znMg,501
58
+ lamindb/core/storage/_zarr.py,sha256=sVd9jVt2q91maeL6GAqMhT6sqtD04vQRfxY3mvIUQlc,3854
59
+ lamindb/core/storage/objects.py,sha256=5vM2T_upuzrXt2b7fQeQ2FUO710-FRbubxTzKzV2ECU,1812
60
+ lamindb/core/storage/paths.py,sha256=IcHSC7tcK1UO75wAMsqWjwxVaW-GE6sN0oRq8IWfmrI,7015
61
+ lamindb/core/subsettings/__init__.py,sha256=j6G9WAJLK-x9FzPSFw-HJUmOseZKGTbK-oLTKI_X_zs,126
62
+ lamindb/core/subsettings/_creation_settings.py,sha256=NGHWKqCFSzVNBxAr2VnmdYguiFdW29XUK7T9wRsVshg,906
63
+ lamindb/curators/__init__.py,sha256=Ei8T7137JggXc-zCwcbvI_e66OQc4O1fkqI4vToiQcs,142919
64
+ lamindb/curators/_cellxgene_schemas/__init__.py,sha256=z-GL_JtFiZhEHUHxQuJBN6SgBCKAMHw9hheDbCje-zk,819
65
+ lamindb/curators/_cellxgene_schemas/schema_versions.yml,sha256=nipsuquq-H9n0KNOxctYV6EVshh55FB5AjujLbxsabI,1942
66
+ lamindb/integrations/__init__.py,sha256=RWGMYYIzr8zvmNPyVB4m-p4gMDhxdRbjES2Ed23OItw,215
67
+ lamindb/integrations/_vitessce.py,sha256=gt3o1sCCcdrKHgM8sJJGFPZggoQEAYfa7ntmA5ThPXo,3992
68
+ lamindb/migrations/0069_squashed.py,sha256=gMWv65ErtjJZyWWo1b4uFHXWa6MSuBcmqz4ElZ6GPf4,62639
69
+ lamindb/migrations/0070_lamindbv1_migrate_data.py,sha256=tyq_xi6U8TXi9C2Raf6v_UTtfyfqQOUIFJzYj4oCgAE,2429
70
+ lamindb/migrations/0071_lamindbv1_migrate_schema.py,sha256=r3PPpq4RK7rhrLWjhVACd5i-tSUTBF0X6Luc5v-g0Lg,25125
71
+ lamindb/migrations/0072_remove_user__branch_code_remove_user_aux_and_more.py,sha256=Nek9Mkuop3LgjAuW3moY-dyPXroCFq8UyvCAAWEquCM,4443
72
+ lamindb/migrations/0073_merge_ourprojects.py,sha256=f0uZ63X0iEylKDlYWD6CAYMge5RcwPSH6yGsoA1KgPQ,35032
73
+ lamindb/migrations/0074_lamindbv1_part4.py,sha256=NqYjEAmm2bNuK42ufLaJZDBjCjAJNv-N9pEXK7iCfyA,11557
74
+ lamindb/migrations/0075_lamindbv1_part5.py,sha256=-3pqXz7e-NbWGTqMhOStcx8zU6HjS-8dlqdyxqexWao,8960
75
+ lamindb/migrations/0076_lamindbv1_part6.py,sha256=G_Wgog-OgquE0-h_CykjiDWUyPdYlCwA8gXjeuBY2OM,21349
76
+ lamindb/migrations/0077_lamindbv1_part6b.py,sha256=v7k8OZX9o5ppSJU_yhHlIXGTobTm30bo1dAIi8tUkEI,8211
77
+ lamindb/migrations/0078_lamindbv1_part6c.py,sha256=RWRXBwyyQ_rFTN5kwstBziV6tqHJcGYI2vsFmuYCCz0,17084
78
+ lamindb/migrations/0079_alter_rundata_value_json_and_more.py,sha256=yQmbs8yWrFLOVQJqAfzLNMZOqTSnXyG-mQgpO7ls1u8,995
79
+ lamindb/migrations/0080_polish_lamindbv1.py,sha256=VfCwJtHlBsMPIyFQ2oh24oWkiRXjDvXRpKe5fBZ63aM,17660
80
+ lamindb/migrations/0081_revert_textfield_collection.py,sha256=uHuJ0W4Ips7BrnQnQBGPMn2eFQz29a1QAdHzN7XlDxo,490
81
+ lamindb/migrations/0082_alter_feature_dtype.py,sha256=qAmZL2g0x43Jk4nbwE-c5z_29Q6vzvrL8DYJE16ZTVo,508
82
+ lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py,sha256=ANFJmrd7rtpDvk0OP0EwAhkB-K4GJobdOaEWmjrQqhM,2725
83
+ lamindb/migrations/0084_alter_schemafeature_feature_and_more.py,sha256=ATL1Tyfvtvs0ZUH3JwWfBhcRsOGepCoMEjp9yZmO7z0,1012
84
+ lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py,sha256=09KGDirJWH1wkmYBREaF_UrkDGS9qwGxOQuleKydDbI,1941
85
+ lamindb/migrations/0086_various.py,sha256=6-_vkiBEiv0ZVp-JCMVdea-AaKRU61DHyMWprU_VUrg,2824
86
+ lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py,sha256=zRlBS89VhmECwGA_y9LghKibxJjtEAmHsEavKdj1ces,1132
87
+ lamindb/migrations/0088_schema_components.py,sha256=9EeoEXX2mf8RtGv_eqOH9zpI2Dg7jhOT9Kxe-9vCiTQ,9230
88
+ lamindb/migrations/0088_squashed.py,sha256=HbbVHaDJX4QQaErOUaSrnayWNrUVf5LWIyirS50YgcE,157984
89
+ lamindb/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
90
+ lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
91
+ lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
92
+ lamindb-1.1.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
93
+ lamindb-1.1.0.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
94
+ lamindb-1.1.0.dist-info/METADATA,sha256=0J09sRvpHzRbXbUhmIjUEIE3n-EwwSQd-woFAzzjvfc,2696
95
+ lamindb-1.1.0.dist-info/RECORD,,
@@ -1,528 +0,0 @@
1
- import random
2
- from collections.abc import Iterable, MutableMapping
3
- from typing import Any
4
-
5
- import lamindb_setup as ln_setup
6
- import pandas as pd
7
- from lamin_utils import colors, logger
8
- from spatialdata import SpatialData
9
-
10
- from lamindb.base.types import FieldAttr
11
- from lamindb.core._data import add_labels
12
- from lamindb.core._feature_manager import parse_staged__schemas_m2m_from_anndata
13
- from lamindb.core._settings import settings
14
- from lamindb.core.exceptions import ValidationError
15
- from lamindb.curators import (
16
- AnnDataCurator,
17
- CurateLookup,
18
- DataFrameCurator,
19
- _maybe_curation_keys_not_present,
20
- _ref_is_name,
21
- check_registry_organism,
22
- get_current_filter_kwargs,
23
- )
24
- from lamindb.models import Artifact, Collection, Feature, Record, Run, Schema
25
-
26
-
27
- class SpatialDataCurator:
28
- """Curation flow for a ``Spatialdata`` object.
29
-
30
- See also :class:`~lamindb.Curator`.
31
-
32
- Note that if genes or other measurements are removed from the SpatialData object,
33
- the object should be recreated.
34
-
35
- In the following docstring, an accessor refers to either a ``.table`` key or the ``sample_metadata_key``.
36
-
37
- Args:
38
- sdata: The SpatialData object to curate.
39
- var_index: A dictionary mapping table keys to the ``.var`` indices.
40
- categoricals: A nested dictionary mapping an accessor to dictionaries that map columns to a registry field.
41
- using_key: A reference LaminDB instance.
42
- organism: The organism name.
43
- sources: A dictionary mapping an accessor to dictionaries that map columns to Source records.
44
- exclude: A dictionary mapping an accessor to dictionaries of column names to values to exclude from validation.
45
- When specific :class:`~bionty.Source` instances are pinned and may lack default values (e.g., "unknown" or "na"),
46
- using the exclude parameter ensures they are not validated.
47
- verbosity: The verbosity level of the logger.
48
- sample_metadata_key: The key in ``.attrs`` that stores the sample level metadata.
49
-
50
- Examples:
51
- >>> from lnschema_spatial import SpatialDataCurator
52
- >>> import bionty as bt
53
- >>> curator = SpatialDataCurator(
54
- ... sdata,
55
- ... var_index={
56
- ... "table_1": bt.Gene.ensembl_gene_id,
57
- ... },
58
- ... categoricals={
59
- ... "table1":
60
- ... {"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name},
61
- ... "sample":
62
- ... {"experimental_factor": bt.ExperimentalFactor.name},
63
- ... },
64
- ... organism="human",
65
- ... )
66
- """
67
-
68
- def __init__(
69
- self,
70
- sdata: SpatialData,
71
- var_index: dict[str, FieldAttr],
72
- categoricals: dict[str, dict[str, FieldAttr]] | None = None,
73
- using_key: str | None = None,
74
- verbosity: str = "hint",
75
- organism: str | None = None,
76
- sources: dict[str, dict[str, Record]] | None = None,
77
- exclude: dict[str, dict] | None = None,
78
- *,
79
- sample_metadata_key: str = "sample",
80
- ) -> None:
81
- if sources is None:
82
- sources = {}
83
- self._sources = sources
84
- if exclude is None:
85
- exclude = {}
86
- self._exclude = exclude
87
- self._sdata = sdata
88
- self._sample_metadata_key = sample_metadata_key
89
- self._kwargs = {"organism": organism} if organism else {}
90
- self._var_fields = var_index
91
- self._verify_accessor_exists(self._var_fields.keys())
92
- self._categoricals = categoricals
93
- self._table_keys = set(self._var_fields.keys()) | set(
94
- self._categoricals.keys() - {self._sample_metadata_key}
95
- )
96
- self._using_key = using_key
97
- self._verbosity = verbosity
98
- self._sample_df_curator = None
99
- self._sample_metadata = self._sdata.get_attrs(
100
- key=self._sample_metadata_key, return_as="df", flatten=True
101
- )
102
- self._validated = False
103
-
104
- # Check validity of keys in categoricals
105
- nonval_keys = []
106
- for accessor, accessor_categoricals in self._categoricals.items():
107
- if accessor == self._sample_metadata_key:
108
- for key in accessor_categoricals.keys():
109
- if key not in self._sample_metadata.columns:
110
- nonval_keys.append(key)
111
- else:
112
- for key in accessor_categoricals.keys():
113
- if key not in self._sdata[accessor].obs.columns:
114
- nonval_keys.append(key)
115
-
116
- _maybe_curation_keys_not_present(nonval_keys, "categoricals")
117
-
118
- # check validity of keys in sources and exclude
119
- for name, dct in (("sources", self._sources), ("exclude", self._exclude)):
120
- nonval_keys = []
121
- for accessor, accessor_sources in dct.items():
122
- columns = (
123
- self._sample_metadata.columns
124
- if accessor == self._sample_metadata_key
125
- else self._sdata[accessor].obs.columns
126
- )
127
- for key in accessor_sources:
128
- if key not in columns:
129
- nonval_keys.append(key)
130
- _maybe_curation_keys_not_present(nonval_keys, name)
131
-
132
- # Set up sample level metadata and table Curator objects
133
- if self._sample_metadata_key in self._categoricals.keys():
134
- self._sample_df_curator = DataFrameCurator(
135
- df=self._sample_metadata,
136
- columns=Feature.name,
137
- categoricals=self._categoricals.get(self._sample_metadata_key, {}),
138
- using_key=using_key,
139
- verbosity=verbosity,
140
- sources=self._sources.get(self._sample_metadata_key),
141
- exclude=self._exclude.get(self._sample_metadata_key),
142
- check_valid_keys=False,
143
- **self._kwargs,
144
- )
145
- self._table_adata_curators = {
146
- table: AnnDataCurator(
147
- data=sdata[table],
148
- var_index=var_index.get(table),
149
- categoricals=self._categoricals.get(table),
150
- using_key=using_key,
151
- verbosity=verbosity,
152
- sources=self._sources.get(table),
153
- exclude=self._exclude.get(table),
154
- **self._kwargs,
155
- )
156
- for table in self._table_keys
157
- }
158
-
159
- self._non_validated = None
160
-
161
- @property
162
- def var_index(self) -> FieldAttr:
163
- """Return the registry fields to validate variables indices against."""
164
- return self._var_fields
165
-
166
- @property
167
- def categoricals(self) -> dict[str, dict[str, FieldAttr]]:
168
- """Return the categorical keys and fields to validate against."""
169
- return self._categoricals
170
-
171
- @property
172
- def non_validated(self) -> dict[str, dict[str, list[str]]]:
173
- """Return the non-validated features and labels."""
174
- if self._non_validated is None:
175
- raise ValidationError("Please run validate() first!")
176
- return self._non_validated
177
-
178
- def _verify_accessor_exists(self, accessors: Iterable[str]) -> None:
179
- """Verify that the accessors exist (either a valid table or in attrs)."""
180
- for acc in accessors:
181
- is_present = False
182
- try:
183
- self._sdata.get_attrs(key=acc)
184
- is_present = True
185
- except KeyError:
186
- if acc in self._sdata.tables.keys():
187
- is_present = True
188
- if not is_present:
189
- raise ValidationError(f"Accessor '{acc}' does not exist!")
190
-
191
- def lookup(
192
- self, using_key: str | None = None, public: bool = False
193
- ) -> CurateLookup:
194
- """Look up categories.
195
-
196
- Args:
197
- using_key: The instance where the lookup is performed.
198
- public: Whether the lookup is performed on the public reference.
199
- """
200
- cat_values_dict = list(self.categoricals.values())[0]
201
- return CurateLookup(
202
- categoricals=cat_values_dict,
203
- slots={"accessors": cat_values_dict.keys()},
204
- using_key=using_key or self._using_key,
205
- public=public,
206
- )
207
-
208
- def _update_registry_all(self) -> None:
209
- """Saves labels of all features for sample and table metadata."""
210
- if self._sample_df_curator is not None:
211
- self._sample_df_curator._update_registry_all(
212
- validated_only=True, **self._kwargs
213
- )
214
- for _, adata_curator in self._table_adata_curators.items():
215
- adata_curator._update_registry_all(validated_only=True, **self._kwargs)
216
-
217
- def add_new_from_var_index(
218
- self, table: str, organism: str | None = None, **kwargs
219
- ) -> None:
220
- """Save new values from ``.var.index`` of table.
221
-
222
- Args:
223
- table: The table key.
224
- organism: The organism name.
225
- **kwargs: Additional keyword arguments to pass to create new records.
226
- """
227
- if self._non_validated is None:
228
- raise ValidationError("Run .validate() first.")
229
- self._kwargs.update({"organism": organism} if organism else {})
230
- self._table_adata_curators[table].add_new_from_var_index(
231
- **self._kwargs, **kwargs
232
- )
233
- self._non_validated[table].pop("var_index")
234
-
235
- if len(self.non_validated[table].values()) == 0:
236
- self.non_validated.pop(table)
237
-
238
- def add_new_from(
239
- self,
240
- key: str,
241
- accessor: str | None = None,
242
- organism: str | None = None,
243
- **kwargs,
244
- ) -> None:
245
- """Save new values of categorical from sample level metadata or table.
246
-
247
- Args:
248
- key: The key referencing the slot in the DataFrame.
249
- accessor: The accessor key such as 'sample' or 'table x'.
250
- organism: The organism name.
251
- **kwargs: Additional keyword arguments to pass to create new records.
252
- """
253
- if self._non_validated is None:
254
- raise ValidationError("Run .validate() first.")
255
-
256
- if len(kwargs) > 0 and key == "all":
257
- raise ValueError("Cannot pass additional arguments to 'all' key!")
258
-
259
- self._kwargs.update({"organism": organism} if organism else {})
260
- if accessor in self._table_adata_curators:
261
- adata_curator = self._table_adata_curators[accessor]
262
- adata_curator.add_new_from(key=key, **self._kwargs, **kwargs)
263
- if accessor == self._sample_metadata_key:
264
- self._sample_df_curator.add_new_from(key=key, **self._kwargs, **kwargs)
265
-
266
- if len(self.non_validated[accessor].values()) == 0:
267
- self.non_validated.pop(accessor)
268
-
269
- def standardize(self, key: str, accessor: str | None = None) -> None:
270
- """Replace synonyms with canonical values.
271
-
272
- Modifies the dataset inplace.
273
-
274
- Args:
275
- key: The key referencing the slot in the table or sample metadata.
276
- accessor: The accessor key such as 'sample_key' or 'table_key'.
277
- """
278
- if len(self.non_validated) == 0:
279
- logger.warning("values are already standardized")
280
- return
281
-
282
- if accessor == self._sample_metadata_key:
283
- if key not in self._sample_metadata.columns:
284
- raise ValueError(f"key '{key}' not present in '{accessor}'!")
285
- else:
286
- if key not in self._sdata.tables[accessor].obs.columns:
287
- raise ValueError(f"key '{key}' not present in '{accessor}'!")
288
-
289
- if accessor in self._table_adata_curators.keys():
290
- adata_curator = self._table_adata_curators[accessor]
291
- adata_curator.standardize(key)
292
- if accessor == self._sample_metadata_key:
293
- self._sample_df_curator.standardize(key)
294
-
295
- if len(self.non_validated[accessor].values()) == 0:
296
- self.non_validated.pop(accessor)
297
-
298
- def validate(self, organism: str | None = None) -> bool:
299
- """Validate variables and categorical observations.
300
-
301
- This method also registers the validated records in the current instance:
302
- - from public sources
303
- - from the using_key instance
304
-
305
- Args:
306
- organism: The organism name.
307
-
308
- Returns:
309
- Whether the SpatialData object is validated.
310
- """
311
- from lamindb.core._settings import settings
312
-
313
- self._kwargs.update({"organism": organism} if organism else {})
314
- if self._using_key is not None and self._using_key != "default":
315
- logger.important(
316
- f"validating using registries of instance {colors.italic(self._using_key)}"
317
- )
318
-
319
- # add all validated records to the current instance
320
- verbosity = settings.verbosity
321
- try:
322
- settings.verbosity = "error"
323
- self._update_registry_all()
324
- finally:
325
- settings.verbosity = verbosity
326
-
327
- self._non_validated = {} # type: ignore
328
-
329
- sample_validated = True
330
- if self._sample_df_curator:
331
- logger.info(f"validating categoricals of '{self._sample_metadata_key}' ...")
332
- sample_validated &= self._sample_df_curator.validate(**self._kwargs)
333
- if len(self._sample_df_curator.non_validated) > 0:
334
- self._non_validated["sample"] = self._sample_df_curator.non_validated # type: ignore
335
- logger.print("")
336
-
337
- mods_validated = True
338
- for table, adata_curator in self._table_adata_curators.items():
339
- logger.info(f"validating categoricals of table '{table}' ...")
340
- mods_validated &= adata_curator.validate(**self._kwargs)
341
- if len(adata_curator.non_validated) > 0:
342
- self._non_validated[table] = adata_curator.non_validated # type: ignore
343
- logger.print("")
344
-
345
- self._validated = sample_validated & mods_validated
346
- return self._validated
347
-
348
- def save_artifact(
349
- self,
350
- description: str | None = None,
351
- key: str | None = None,
352
- revises: Artifact | None = None,
353
- run: Run | None = None,
354
- ) -> Artifact:
355
- """Save the validated ``SpatialData`` object and metadata.
356
-
357
- Args:
358
- description: A description of the ``SpatialData`` object.
359
- key: A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.zarr"`.
360
- Artifacts with the same key form a revision family.
361
- revises: Previous version of the artifact. Triggers a revision.
362
- run: The Run that creates the artifact.
363
-
364
- Returns:
365
- A saved Artifact record.
366
- """
367
- if not self._validated:
368
- self.validate()
369
- if not self._validated:
370
- raise ValidationError("Dataset does not validate. Please curate.")
371
-
372
- verbosity = settings.verbosity
373
- try:
374
- settings.verbosity = "warning"
375
-
376
- # Write the SpatialData object to a random path in tmp directory
377
- # The Artifact constructor will move it to the cache
378
- write_path = f"{settings.cache_dir}/{random.randint(10**7, 10**8 - 1)}.zarr"
379
- self._sdata.write(write_path)
380
-
381
- # Create the Artifact and associate Artifact metadata
382
- self._artifact = Artifact(
383
- write_path,
384
- description=description,
385
- key=key,
386
- revises=revises,
387
- run=run,
388
- )
389
- # According to Tim it is not easy to calculate the number of observations.
390
- # We would have to write custom code to iterate over labels (which might not even exist at that point)
391
- self._artifact.otype = "spatialdata"
392
- self._artifact.save()
393
-
394
- # Link schemas
395
- feature_kwargs = check_registry_organism(
396
- (list(self._var_fields.values())[0].field.model),
397
- self._kwargs.get("organism"),
398
- )
399
-
400
- def _add_set_from_spatialdata(
401
- host: Artifact | Collection | Run,
402
- var_fields: dict[str, FieldAttr],
403
- obs_fields: dict[str, FieldAttr] = None,
404
- mute: bool = False,
405
- organism: str | Record | None = None,
406
- ):
407
- """Add Schemas from SpatialData."""
408
- if obs_fields is None:
409
- obs_fields = {}
410
- assert host.otype == "spatialdata" # noqa: S101
411
-
412
- _schemas_m2m = {}
413
-
414
- # sample features
415
- sample_features = Feature.from_values(self._sample_metadata.columns)
416
- if len(sample_features) > 0:
417
- _schemas_m2m[self._sample_metadata_key] = Schema(
418
- features=sample_features
419
- )
420
-
421
- # table features
422
- for table, field in var_fields.items():
423
- table_fs = parse_staged__schemas_m2m_from_anndata(
424
- self._sdata[table],
425
- var_field=field,
426
- obs_field=obs_fields.get(table, Feature.name),
427
- mute=mute,
428
- organism=organism,
429
- )
430
- for k, v in table_fs.items():
431
- _schemas_m2m[f"['{table}'].{k}"] = v
432
-
433
- def _unify_staged__schemas_m2m_by_hash(
434
- _schemas_m2m: MutableMapping[str, Schema],
435
- ):
436
- unique_values: dict[str, Any] = {}
437
-
438
- for key, value in _schemas_m2m.items():
439
- value_hash = (
440
- value.hash
441
- ) # Assuming each value has a .hash attribute
442
- if value_hash in unique_values:
443
- _schemas_m2m[key] = unique_values[value_hash]
444
- else:
445
- unique_values[value_hash] = value
446
-
447
- return _schemas_m2m
448
-
449
- # link feature sets
450
- host._staged__schemas_m2m = _unify_staged__schemas_m2m_by_hash(
451
- _schemas_m2m
452
- )
453
- host.save()
454
-
455
- _add_set_from_spatialdata(
456
- self._artifact, var_fields=self._var_fields, **feature_kwargs
457
- )
458
-
459
- # Link labels
460
- def _add_labels_from_spatialdata(
461
- data,
462
- artifact: Artifact,
463
- fields: dict[str, FieldAttr],
464
- feature_ref_is_name: bool | None = None,
465
- ):
466
- """Add Labels from SpatialData."""
467
- features = Feature.lookup().dict()
468
- for key, field in fields.items():
469
- feature = features.get(key)
470
- registry = field.field.model
471
- filter_kwargs = check_registry_organism(
472
- registry, self._kwargs.get("organism")
473
- )
474
- filter_kwargs_current = get_current_filter_kwargs(
475
- registry, filter_kwargs
476
- )
477
- df = data if isinstance(data, pd.DataFrame) else data.obs
478
- labels = registry.from_values(
479
- df[key],
480
- field=field,
481
- **filter_kwargs_current,
482
- )
483
- if len(labels) == 0:
484
- continue
485
-
486
- label_ref_is_name = None
487
- if hasattr(registry, "_name_field"):
488
- label_ref_is_name = field.field.name == registry._name_field
489
- add_labels(
490
- artifact,
491
- records=labels,
492
- feature=feature,
493
- feature_ref_is_name=feature_ref_is_name,
494
- label_ref_is_name=label_ref_is_name,
495
- from_curator=True,
496
- )
497
-
498
- for accessor, accessor_fields in self._categoricals.items():
499
- column_field = self._var_fields.get(accessor)
500
- if accessor == self._sample_metadata_key:
501
- _add_labels_from_spatialdata(
502
- self._sample_metadata,
503
- self._artifact,
504
- accessor_fields,
505
- feature_ref_is_name=(
506
- None if column_field is None else _ref_is_name(column_field)
507
- ),
508
- )
509
- else:
510
- _add_labels_from_spatialdata(
511
- self._sdata.tables[accessor],
512
- self._artifact,
513
- accessor_fields,
514
- feature_ref_is_name=(
515
- None if column_field is None else _ref_is_name(column_field)
516
- ),
517
- )
518
-
519
- finally:
520
- settings.verbosity = verbosity
521
-
522
- slug = ln_setup.settings.instance.slug
523
- if ln_setup.settings.instance.is_remote: # pragma: no cover
524
- logger.important(
525
- f"go to https://lamin.ai/{slug}/artifact/{self._artifact.uid}"
526
- )
527
-
528
- return self._artifact