lamindb 0.69.1__py3-none-any.whl → 0.69.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -163,28 +163,65 @@ def read_dataframe(elem: Union[h5py.Dataset, h5py.Group]):
163
163
 
164
164
  @registry.register("h5py")
165
165
  def safer_read_partial(elem, indices):
166
- if get_spec(elem).encoding_type == "":
167
- if isinstance(elem, h5py.Dataset):
166
+ is_dataset = isinstance(elem, h5py.Dataset)
167
+ indices_inverse: Optional[list] = None
168
+ encoding_type = get_spec(elem).encoding_type
169
+ # h5py selection for datasets requires sorted indices
170
+ if is_dataset or encoding_type == "dataframe":
171
+ indices_increasing = []
172
+ indices_inverse = []
173
+ for indices_dim in indices:
174
+ if isinstance(indices_dim, np.ndarray) and not np.all(
175
+ np.diff(indices_dim) > 0
176
+ ):
177
+ idx_unique, idx_inverse = np.unique(indices_dim, return_inverse=True)
178
+ indices_increasing.append(idx_unique)
179
+ indices_inverse.append(idx_inverse)
180
+ else:
181
+ indices_increasing.append(indices_dim)
182
+ indices_inverse.append(None)
183
+ indices = tuple(indices_increasing)
184
+ if all(idx is None for idx in indices_inverse):
185
+ indices_inverse = None
186
+ result = None
187
+ if encoding_type == "":
188
+ if is_dataset:
168
189
  dims = len(elem.shape)
169
190
  if dims == 2:
170
- return elem[indices]
191
+ result = elem[indices]
171
192
  elif dims == 1:
172
193
  if indices[0] == slice(None):
173
- return elem[indices[1]]
194
+ result = elem[indices[1]]
174
195
  elif indices[1] == slice(None):
175
- return elem[indices[0]]
196
+ result = elem[indices[0]]
176
197
  elif isinstance(elem, h5py.Group):
177
198
  try:
178
199
  ds = CSRDataset(elem)
179
- return _subset_sparse(ds, indices)
200
+ result = _subset_sparse(ds, indices)
180
201
  except Exception:
181
202
  pass
182
- raise ValueError(
183
- "Can not get a subset of the element of type"
184
- f" {type(elem).__name__} with an empty spec."
185
- )
203
+ if result is None:
204
+ raise ValueError(
205
+ "Can not get a subset of the element of type"
206
+ f" {type(elem).__name__} with an empty spec."
207
+ )
208
+ else:
209
+ result = read_elem_partial(elem, indices=indices)
210
+ if indices_inverse is None:
211
+ return result
186
212
  else:
187
- return read_elem_partial(elem, indices=indices)
213
+ if indices_inverse[0] is None:
214
+ if len(result.shape) == 2:
215
+ return result[:, indices_inverse[1]]
216
+ else:
217
+ return result[indices_inverse[1]]
218
+ elif indices_inverse[1] is None:
219
+ if isinstance(result, pd.DataFrame):
220
+ return result.iloc[indices_inverse[0]]
221
+ else:
222
+ return result[indices_inverse[0]]
223
+ else:
224
+ return result[tuple(indices_inverse)]
188
225
 
189
226
 
190
227
  @registry.register("h5py")
@@ -105,10 +105,7 @@ def read_adata_h5ad(filepath, **kwargs) -> ad.AnnData:
105
105
 
106
106
 
107
107
  def store_artifact(localpath: UPathStr, storagepath: UPath) -> None:
108
- """Store directory or file to configured storage location.
109
-
110
- Returns size in bytes.
111
- """
108
+ """Store directory or file to configured storage location."""
112
109
  localpath = Path(localpath)
113
110
  if not isinstance(storagepath, LocalPathClasses):
114
111
  # this uploads files and directories
@@ -200,9 +197,7 @@ def load_to_memory(filepath: UPathStr, stream: bool = False, **kwargs):
200
197
  """
201
198
  filepath = create_path(filepath)
202
199
 
203
- if filepath.suffix in (".zarr", ".zrad"):
204
- stream = True
205
- elif filepath.suffix != ".h5ad":
200
+ if filepath.suffix not in {".h5ad", ".zarr", ".zrad"}:
206
201
  stream = False
207
202
 
208
203
  if not stream:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.69.1
3
+ Version: 0.69.3
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.8
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
- Requires-Dist: lnschema_core==0.64.0
13
- Requires-Dist: lamindb_setup==0.67.1
14
- Requires-Dist: lamin_utils==0.13.0
15
- Requires-Dist: lamin_cli==0.10.1
12
+ Requires-Dist: lnschema_core==0.64.1
13
+ Requires-Dist: lamindb_setup==0.68.1
14
+ Requires-Dist: lamin_utils==0.13.1
15
+ Requires-Dist: lamin_cli==0.10.2
16
16
  Requires-Dist: rapidfuzz
17
17
  Requires-Dist: pyarrow
18
18
  Requires-Dist: typing_extensions!=4.6.0
@@ -26,7 +26,7 @@ Requires-Dist: urllib3<2 ; extra == "aws"
26
26
  Requires-Dist: aiobotocore[boto3]>=2.5.4,<3.0.0 ; extra == "aws"
27
27
  Requires-Dist: s3fs==2023.12.2 ; extra == "aws"
28
28
  Requires-Dist: fsspec[s3]==2023.12.2 ; extra == "aws"
29
- Requires-Dist: bionty==0.42.2 ; extra == "bionty"
29
+ Requires-Dist: bionty==0.42.4 ; extra == "bionty"
30
30
  Requires-Dist: pandas<2 ; extra == "dev"
31
31
  Requires-Dist: pre-commit ; extra == "dev"
32
32
  Requires-Dist: nox ; extra == "dev"
@@ -64,7 +64,7 @@ Provides-Extra: zarr
64
64
  - Track data lineage across notebooks & pipelines.
65
65
  - Integrate registries for experimental metadata & in-house ontologies.
66
66
  - Validate, standardize & annotate.
67
- - Collaborate across a mesh of distributed LaminDB instances.
67
+ - Collaborate across distributed LaminDB instances.
68
68
 
69
69
  ## Documentation
70
70
 
@@ -1,30 +1,31 @@
1
- lamindb/__init__.py,sha256=BgkzsNWC9WfhQrW1AYUGL0bg_tedMBBfB3CPh_0rxuk,2115
2
- lamindb/_artifact.py,sha256=g4p1E-y0U7PKQMOP3Ij35UcTs46G7MPZtr5QbV9sQWo,36130
3
- lamindb/_collection.py,sha256=esjFYaVPIz7E5J-dk3FppAOZsimszd4m7MYGxErmrk4,18097
4
- lamindb/_feature.py,sha256=B_vjiH7wOW0mugZOmIHYKkAe20E1Ca9PnMppAW9KJU4,6677
1
+ lamindb/__init__.py,sha256=Xy8yw0Woc26c9DxrgHP5c_UvVJpocigGwfCj8Ho_VZo,2108
2
+ lamindb/_annotate.py,sha256=k9Is43plKcQaMvGnzVw6OeWvZDWcSxw3FQ7nsQVDHzw,27744
3
+ lamindb/_artifact.py,sha256=RV36tcHMZ6wH6u65jOAQ_H4rfmFiIzZmAr8IY7kFhm0,35817
4
+ lamindb/_can_validate.py,sha256=w7lrUGTWldpvwaRiXBRrjfU_ZRidA7CooOu_r5MbocY,14569
5
+ lamindb/_collection.py,sha256=03CQ0u8eCY_dx31pIT5ZMZsmxbbj6J5dJ9zUqJLrDGY,18427
6
+ lamindb/_feature.py,sha256=ahRv87q1tcRLQ0UM5FA3KtcMQvIjW__fZq1yAdRAV7s,6728
5
7
  lamindb/_feature_set.py,sha256=G_Ss6mKh4D0Eji-xSfLRbKVFXwgUE82YOqIUmkV0CAA,8767
6
8
  lamindb/_filter.py,sha256=_PjyQWQBR3ohDAvJbR3hMvZ-2p2GvzFxLfKGC-gPnHI,1320
7
- lamindb/_finish.py,sha256=2YdHcHXZeu5414jIbk9id-u5_m9W73hfqzLH-AwYK1Y,1269
9
+ lamindb/_finish.py,sha256=it-fSpSmMW9ybdsylBV5Lbugh6iXRGWgIiSLwPaow_8,8590
8
10
  lamindb/_from_values.py,sha256=Ei11ml77Q1xubVekt2C4-mbox2-qnC7kP18B-LhCdSc,11886
9
11
  lamindb/_is_versioned.py,sha256=DXp5t-1DwErpqqMc9eb08kpQPCHOC2fNzaozMoBunR4,1337
10
12
  lamindb/_parents.py,sha256=pTDsW8HjQ_txFbPKrBU0WjjtCNH6sx2LASUuGWpJuYE,14742
11
13
  lamindb/_query_manager.py,sha256=lyYMEsstUQlns2H01oZXN5Ly0X6ug2VOPebyu9fHn4s,4008
12
- lamindb/_query_set.py,sha256=JXw43IoSW0QVmarOmRqZGBNn4aFUYetwWEAJcoG_g6Q,11235
14
+ lamindb/_query_set.py,sha256=DafHKwufvWQaWWSZsuxq24wpxae5Vfw7wD_3KCr7kLc,11318
13
15
  lamindb/_registry.py,sha256=vEsjn33BV2vxlanE3fyvDiy7AJoq7RKqEn_Sspo4_Dc,19232
14
- lamindb/_run.py,sha256=O5TcVPZqgdUvk4nBmI_HXzvWOG_Zr3B_HMDaw228w-4,1719
15
- lamindb/_save.py,sha256=2V3u0F2FFJZBlaBtViBYLIPDIVlwY-uXlhPDfA-wCMo,11102
16
+ lamindb/_run.py,sha256=CvH6cAFUb83o38iOdpBsktF3JGAwmuZrDZ4p4wvUr0g,1853
17
+ lamindb/_save.py,sha256=uIzHfNulzn7rpSKyAvUHT1OuN294OWFGC04gLmwrScY,11452
16
18
  lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
17
19
  lamindb/_transform.py,sha256=oZq-7MgyCs4m6Bj901HwDlbvF0JuvTpe3RxN0Zb8PgE,3515
18
20
  lamindb/_ulabel.py,sha256=euXsDPD7wC99oopLXVkT-vq7f3E6-zP4Z4akI-yh0aM,1913
19
21
  lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
20
- lamindb/_validate.py,sha256=w7lrUGTWldpvwaRiXBRrjfU_ZRidA7CooOu_r5MbocY,14569
21
22
  lamindb/_view.py,sha256=yFMu4vnt0YqvN1q11boAkwigxCH1gdliDUSbzh3IuDw,2175
22
- lamindb/core/__init__.py,sha256=RYNsg2foVZRawpCW2J5J82vHZt6ub_Tze8wiDMxXCH8,988
23
- lamindb/core/_data.py,sha256=D89NN22Py832wT53U5neFNpvft_U4_WQS4XcFNQ0fVM,17214
24
- lamindb/core/_feature_manager.py,sha256=a1HXGmMgSRWEsE7nmlMMKYZzDo8oFAmieOM6HGa-yyU,13901
23
+ lamindb/core/__init__.py,sha256=Mw4sI-xgnMXNsu84oYFQBZOF8mxxxhp6-e3BjTQqjlA,1131
24
+ lamindb/core/_data.py,sha256=Q8w1I8pXXOaLVIxfjWBkLV6GGnzaQxCXamu9tplFgsA,17287
25
+ lamindb/core/_feature_manager.py,sha256=II0nuxtjOdEtU_9a7eB18_Clw9d1n5k1JOqk_vHisRw,13940
25
26
  lamindb/core/_label_manager.py,sha256=zrWDSd2AkR6fKsGDxLSWqHC9fz9BcGlavPZEh92Wzjg,9063
26
27
  lamindb/core/_mapped_collection.py,sha256=e4P3AoykIMjD4_88BWbISWvKyWWTklwHl-_WLa72ZG4,16841
27
- lamindb/core/_run_context.py,sha256=bwMHgO01oA4DxWTFqj4VDNBPS7Ti89Heuh1vHPv7HjU,17063
28
+ lamindb/core/_run_context.py,sha256=EK0lFJWx32NY2FdqFR1YozR9zioC-BjA394nPu-KwLQ,17510
28
29
  lamindb/core/_settings.py,sha256=kHL5e20dWKSbf7mJOAddvS7SQBrr1D0ZTeG_5sj5RpY,5735
29
30
  lamindb/core/_sync_git.py,sha256=Bn_ofx2ynaw6etmskgEUNW8n7LDJs-7r2aB41BgCvdA,3928
30
31
  lamindb/core/_track_environment.py,sha256=QjHWbyl2u8J4hbJG8Q_ToFaZIgS-H15Ej6syJgk-dvY,662
@@ -39,19 +40,13 @@ lamindb/core/datasets/_core.py,sha256=Y1UP_gPN2w6-QijaqmeKV57luYXYb5d2G-bmuSobS1
39
40
  lamindb/core/datasets/_fake.py,sha256=S8mNho-oSh1M9x9oOSsUBLLHmBAegsOLlFk6LnF81EA,942
40
41
  lamindb/core/storage/__init__.py,sha256=9alBNtyH59VnoWJS-IdjLwFKlK-kgeCGl6jXk0_wGeQ,369
41
42
  lamindb/core/storage/_anndata_sizes.py,sha256=0XVzA6AQeVGPaGPrhGusKyxFgFjeo3qSN29hxb8D5E8,993
42
- lamindb/core/storage/_backed_access.py,sha256=MD58DbKMen-62I7eloaCSUFYpU2zTERGbJuPWus2qsE,22871
43
+ lamindb/core/storage/_backed_access.py,sha256=DUJIDjkGkemjmKLD05blndP_rO5DpUD0EZdowos46HQ,24361
43
44
  lamindb/core/storage/_zarr.py,sha256=bMQSCsTOCtQy4Yo3KwCVpbUkKdWRApN9FM1rM-d2_G0,2839
44
- lamindb/core/storage/file.py,sha256=JmDnux5aNL2L30Tkyb4-uRgavlFfEJrQjt9X8957lYY,7321
45
+ lamindb/core/storage/file.py,sha256=WTeC4ENn_O6HEoinmTviB89W81UrJT3bSGtnpqPpIyE,7242
45
46
  lamindb/core/storage/object.py,sha256=MPUb2M8Fleq2j9x1Ryqr3BETmvsDKyf11Ifvbxd3NpA,1097
46
47
  lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
47
48
  lamindb/setup/core/__init__.py,sha256=LqIIvJNcONxkqjbnP6CUaP4d45Lbd6TSMAcXFp4C7_8,231
48
- lamindb/validation/__init__.py,sha256=AuonqVEhyYDXAoRqXnM9JweTUnYfAoExza8A5mQuM7Q,347
49
- lamindb/validation/_anndata_validator.py,sha256=1tc0GjmRTjTw2EEo30EHD212hp25HJ6FaldMURynx0k,4070
50
- lamindb/validation/_lookup.py,sha256=hC81FomT86tzYXM6_czoxgu9P_JVn7TfLaRyMtBUuU8,1425
51
- lamindb/validation/_register.py,sha256=RCvY7cNrqnJ0oTGGExePHEqdEe2EZvYaCL2ZGw6ajek,8261
52
- lamindb/validation/_validate.py,sha256=esStmPdp_B6YDKgSkcCBuAJxYo--QRNTd9f6FfOGoBA,4145
53
- lamindb/validation/_validator.py,sha256=vfYCo0ORC1hNajFYrSqMdf9lLC9B7ZNUUrRMU7RXyGo,6876
54
- lamindb-0.69.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
55
- lamindb-0.69.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
56
- lamindb-0.69.1.dist-info/METADATA,sha256=5aQTa6m3YG6x--MSlse9D3xph_lc1PPS47jibL-RFaE,2866
57
- lamindb-0.69.1.dist-info/RECORD,,
49
+ lamindb-0.69.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
50
+ lamindb-0.69.3.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
51
+ lamindb-0.69.3.dist-info/METADATA,sha256=X4upu_QrdDsy861PJvPiQ_nw1f6N89NrVqN1Zxn6i14,2856
52
+ lamindb-0.69.3.dist-info/RECORD,,
@@ -1,19 +0,0 @@
1
- """Validators built on LaminDB.
2
-
3
- Import the package::
4
-
5
- from lamindb.validation import Validator, AnnDataValidator
6
-
7
- This is the complete API reference:
8
-
9
- .. autosummary::
10
- :toctree: .
11
-
12
- Validator
13
- AnnDataValidator
14
- Lookup
15
- """
16
-
17
- from ._anndata_validator import AnnDataValidator
18
- from ._lookup import Lookup
19
- from ._validator import Validator
@@ -1,130 +0,0 @@
1
- from typing import Dict, Optional
2
-
3
- import anndata as ad
4
- from lnschema_core.types import FieldAttr
5
- from pandas.core.api import DataFrame as DataFrame
6
-
7
- import lamindb as ln
8
-
9
- from ._lookup import Lookup
10
- from ._register import register_artifact, register_labels
11
- from ._validate import validate_anndata
12
- from ._validator import Validator
13
-
14
-
15
- class AnnDataValidator(Validator):
16
- """Lamin AnnData validator.
17
-
18
- Args:
19
- adata: The AnnData object to validate.
20
- var_field: The registry field to validate variables index against.
21
- obs_fields: A dictionary mapping obs_column to registry_field.
22
- For example:
23
- {"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
24
- using: The reference instance containing registries to validate against.
25
- """
26
-
27
- def __init__(
28
- self,
29
- adata: ad.AnnData,
30
- var_field: FieldAttr,
31
- obs_fields: Dict[str, FieldAttr],
32
- using: str = "default",
33
- verbosity: str = "hint",
34
- **kwargs,
35
- ) -> None:
36
- self._adata = adata
37
- super().__init__(
38
- df=self._adata.obs,
39
- fields=obs_fields,
40
- using=using,
41
- verbosity=verbosity,
42
- **kwargs,
43
- )
44
- self._obs_fields = obs_fields
45
- self._var_field = var_field
46
- self._fields = {"variables": var_field, **obs_fields}
47
-
48
- @property
49
- def var_field(self) -> FieldAttr:
50
- """Return the registry field to validate variables index against."""
51
- return self._var_field
52
-
53
- @property
54
- def obs_fields(self) -> Dict:
55
- """Return the obs fields to validate against."""
56
- return self._obs_fields
57
-
58
- def lookup(self, using: Optional[str] = None) -> Lookup:
59
- """Lookup features and labels."""
60
- fields = {
61
- **{"feature": ln.Feature.name, "variables": self.var_field},
62
- **self.obs_fields,
63
- }
64
- return Lookup(fields=fields, using=using or self._using)
65
-
66
- def _register_variables(self, validated_only: bool = True, **kwargs):
67
- """Register variable records."""
68
- self._add_kwargs(**kwargs)
69
- register_labels(
70
- values=self._adata.var_names,
71
- field=self.var_field,
72
- feature_name="variables",
73
- using=self._using,
74
- validated_only=validated_only,
75
- kwargs=self._kwargs,
76
- )
77
-
78
- def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
79
- """Register labels for the given feature.
80
-
81
- Args:
82
- feature: The feature to register labels for.
83
- if "variables", register variables.
84
- validated_only: If True, only register validated labels.
85
- **kwargs: Additional metadata needed.
86
- """
87
- if feature == "variables":
88
- self._register_variables(validated_only=validated_only, **kwargs)
89
- else:
90
- super().register_labels(feature, validated_only, **kwargs)
91
-
92
- def validate(self, **kwargs) -> bool:
93
- """Validate variables and categorical observations."""
94
- self._add_kwargs(**kwargs)
95
- self._validated = validate_anndata(
96
- self._adata,
97
- var_field=self.var_field,
98
- obs_fields=self.obs_fields,
99
- **self._kwargs,
100
- )
101
-
102
- return self._validated
103
-
104
- def register_artifact(
105
- self,
106
- description: str,
107
- **kwargs,
108
- ) -> ln.Artifact:
109
- """Register the validated AnnData and metadata.
110
-
111
- Args:
112
- description: description of the AnnData object
113
- **kwargs: object level metadata
114
-
115
- Returns:
116
- a registered artifact record
117
- """
118
- self._add_kwargs(**kwargs)
119
- if not self._validated:
120
- raise ValueError("please run `validate()` first!")
121
-
122
- self._artifact = register_artifact(
123
- self._adata,
124
- description=description,
125
- var_field=self.var_field,
126
- fields=self.obs_fields,
127
- **self._kwargs,
128
- )
129
-
130
- return self._artifact
@@ -1,38 +0,0 @@
1
- from typing import Dict, Optional
2
-
3
- from lamin_utils import colors, logger
4
- from lnschema_core.types import FieldAttr
5
-
6
- import lamindb as ln
7
-
8
- from ._validate import _registry_using
9
-
10
-
11
- class Lookup:
12
- """Lookup features and labels from the reference instance."""
13
-
14
- def __init__(
15
- self, fields: Dict[str, FieldAttr], using: Optional[str] = None
16
- ) -> None:
17
- self._fields = fields
18
- self._using = using
19
- self._using_name = using or ln.setup.settings.instance.slug
20
- logger.print(f"Lookup objects from the {colors.green(self._using_name)}")
21
-
22
- def __getitem__(self, name):
23
- if name in self._fields:
24
- registry = self._fields[name].field.model
25
- if self._using == "public":
26
- return registry.public().lookup()
27
- else:
28
- return _registry_using(registry, self._using).lookup()
29
- raise AttributeError(
30
- f"'{self.__class__.__name__}' object has no attribute '{name}'"
31
- )
32
-
33
- def __repr__(self) -> str:
34
- if len(self._fields) > 0:
35
- fields = "\n ".join([str([key]) for key in self._fields.keys()])
36
- return f"Lookup objects from the {colors.green(self._using_name)}:\n {colors.green(fields)}\n\nExample:\n → categories = validator.lookup().cell_type\n → categories.alveolar_type_1_fibroblast_cell"
37
- else:
38
- return colors.warning("No fields are found!")
@@ -1,214 +0,0 @@
1
- from typing import Dict, List, Optional, Union
2
-
3
- import anndata as ad
4
- import pandas as pd
5
- from lamin_utils import colors, logger
6
- from lnschema_core.types import FieldAttr
7
-
8
- import lamindb as ln
9
-
10
- from ._validate import _registry_using, check_if_registry_needs_organism
11
-
12
-
13
- def register_artifact(
14
- data: Union[pd.DataFrame, ad.AnnData],
15
- description: str,
16
- fields: Dict[str, FieldAttr],
17
- var_field: Optional[FieldAttr] = None,
18
- **kwargs,
19
- ):
20
- """Registers all metadata with an Artifact.
21
-
22
- Args:
23
- data: The DataFrame or AnnData object to register.
24
- description: A description of the artifact.
25
- fields: A dictionary mapping obs_column to registry_field.
26
- var_field: The registry field to validate variables index against.
27
- kwargs: Additional keyword arguments to pass to the registry model.
28
- """
29
- if isinstance(data, ad.AnnData):
30
- artifact = ln.Artifact.from_anndata(data, description=description)
31
- artifact.n_observations = data.n_obs
32
- elif isinstance(data, pd.DataFrame):
33
- artifact = ln.Artifact.from_df(data, description=description)
34
- else:
35
- raise ValueError("data must be a DataFrame or AnnData object")
36
- artifact.save()
37
-
38
- organism = kwargs.pop("organism", None)
39
-
40
- if isinstance(data, ad.AnnData):
41
- artifact.features.add_from_anndata(var_field=var_field, organism=organism)
42
- else:
43
- artifact.features.add_from_df()
44
-
45
- # link validated obs metadata
46
- features = ln.Feature.lookup().dict()
47
- for feature_name, field in fields.items():
48
- feature = features.get(feature_name)
49
- registry = field.field.model
50
- filter_kwargs = kwargs.copy()
51
- if check_if_registry_needs_organism(registry, organism):
52
- filter_kwargs["organism"] = organism
53
- df = data.obs if isinstance(data, ad.AnnData) else data
54
- labels = registry.from_values(df[feature_name], field=field, **filter_kwargs)
55
- artifact.labels.add(labels, feature)
56
-
57
- logger.print("\n\n🎉 registered artifact in LaminDB!\n")
58
- if ln.setup.settings.instance.is_remote:
59
- logger.print(
60
- f"🔗 https://lamin.ai/{ln.setup.settings.instance.slug}/artifact/{artifact.uid}"
61
- )
62
-
63
- return artifact
64
-
65
-
66
- def register_labels(
67
- values: List[str],
68
- field: FieldAttr,
69
- feature_name: str,
70
- using: Optional[str] = None,
71
- validated_only: bool = True,
72
- kwargs: Dict = None,
73
- ):
74
- """Register features or labels records in the default instance from the using instance.
75
-
76
- Args:
77
- values: A list of values to be registered as labels.
78
- field: The FieldAttr object representing the field for which labels are being registered.
79
- feature_name: The name of the feature to register.
80
- using: The name of the instance from which to transfer labels (if applicable).
81
- validated_only: If True, only register validated labels.
82
- kwargs: Additional keyword arguments to pass to the registry model.
83
- """
84
- if kwargs is None:
85
- kwargs = {}
86
- registry = field.field.model
87
-
88
- check_if_registry_needs_organism(registry, kwargs.get("organism"))
89
- verbosity = ln.settings.verbosity
90
- try:
91
- ln.settings.verbosity = "error"
92
- # for labels that are registered in the using instance, transfer them to the current instance
93
- # first inspect the current instance
94
- inspect_result_current = registry.inspect(
95
- values, field=field, mute=True, **kwargs
96
- )
97
- if len(inspect_result_current.non_validated) == 0:
98
- # everything is validated in the current instance, no need to register
99
- ln.settings.verbosity = verbosity
100
- return
101
-
102
- labels_registered: Dict = {"from public": [], "without reference": []}
103
-
104
- # register labels from the using instance
105
- (
106
- labels_registered[f"from {using}"],
107
- non_validated_labels,
108
- ) = register_labels_from_using_instance(
109
- inspect_result_current.non_validated,
110
- field=field,
111
- using=using,
112
- kwargs=kwargs,
113
- )
114
-
115
- # for labels that are not registered in the using instance, register them in the current instance
116
- from_values_records = (
117
- registry.from_values(non_validated_labels, field=field, **kwargs)
118
- if len(non_validated_labels) > 0
119
- else []
120
- )
121
- ln.save(from_values_records)
122
- labels_registered["from public"] = [
123
- getattr(r, field.field.name) for r in from_values_records
124
- ]
125
- labels_registered["without reference"] = [
126
- i for i in non_validated_labels if i not in labels_registered["from public"]
127
- ]
128
- if not validated_only:
129
- non_validated_records = []
130
- for value in labels_registered["without reference"]:
131
- kwargs[field.field.name] = value
132
- if registry.__name__ == "Feature":
133
- kwargs["type"] = "category"
134
- # register non-validated labels
135
- non_validated_records.append(registry(**kwargs))
136
- ln.save(non_validated_records)
137
-
138
- # for ulabels, also register a parent label: is_{feature_name}
139
- if registry == ln.ULabel and field.field.name == "name":
140
- register_ulabels_with_parent(values, field)
141
- finally:
142
- ln.settings.verbosity = verbosity
143
- log_registered_labels(
144
- labels_registered, feature_name=feature_name, validated_only=validated_only
145
- )
146
-
147
-
148
- def log_registered_labels(
149
- labels_registered: Dict, feature_name: str, validated_only: bool = True
150
- ):
151
- """Log the registered labels."""
152
- for key, labels in labels_registered.items():
153
- if len(labels) > 0:
154
- if key == "without reference" and validated_only:
155
- msg = (
156
- f"{len(labels)} non-validated labels are not registered: {labels}!\n"
157
- " → to lookup categories, use `.lookup().{feature_name}`\n"
158
- " → to register, set `validated_only=False`"
159
- )
160
- logger.warning(colors.yellow(msg))
161
- continue
162
- logger.success(
163
- f"registered {len(labels)} records {colors.green(key)}: {labels}"
164
- )
165
-
166
-
167
- def register_ulabels_with_parent(values: List[str], field: FieldAttr):
168
- """Register a parent label for the given labels."""
169
- registry = field.field.model
170
- assert registry == ln.ULabel
171
- all_records = registry.from_values(values, field=field)
172
- is_feature = registry.filter(name=f"is_{field.field.name}").one_or_none()
173
- if is_feature is None:
174
- is_feature = registry(name=f"is_{field.field.name}")
175
- is_feature.save()
176
- # link all labels to the parent label
177
- is_feature.children.add(*all_records)
178
-
179
-
180
- def register_labels_from_using_instance(
181
- values: List[str],
182
- field: FieldAttr,
183
- using: Optional[str] = None,
184
- kwargs: Dict = None,
185
- ):
186
- """Register features or labels records from the using instance.
187
-
188
- Args:
189
- values: A list of values to be registered as labels.
190
- field: The FieldAttr object representing the field for which labels are being registered.
191
- using: The name of the instance from which to transfer labels (if applicable).
192
- kwargs: Additional keyword arguments to pass to the registry model.
193
- """
194
- if kwargs is None:
195
- kwargs = {}
196
- labels_registered = []
197
- not_registered = values
198
- if using is not None and using != "default":
199
- registry = field.field.model
200
- registry_using = _registry_using(registry, using)
201
- # then inspect the using instance
202
- inspect_result_using = registry_using.inspect(
203
- values, field=field, mute=True, **kwargs
204
- )
205
- # register the labels that are validated in the using instance
206
- # TODO: filter kwargs
207
- labels_using = registry_using.filter(
208
- **{f"{field.field.name}__in": inspect_result_using.validated}
209
- ).all()
210
- for label_using in labels_using:
211
- label_using.save()
212
- labels_registered.append(getattr(label_using, field.field.name))
213
- not_registered = inspect_result_using.non_validated
214
- return labels_registered, not_registered