lamindb 1.10.1__py3-none-any.whl → 1.11a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +89 -49
- lamindb/_finish.py +14 -12
- lamindb/_tracked.py +2 -4
- lamindb/_view.py +1 -1
- lamindb/base/__init__.py +2 -1
- lamindb/base/dtypes.py +76 -0
- lamindb/core/_settings.py +45 -2
- lamindb/core/storage/_anndata_accessor.py +118 -26
- lamindb/core/storage/_backed_access.py +10 -7
- lamindb/core/storage/_spatialdata_accessor.py +15 -4
- lamindb/core/storage/_zarr.py +3 -0
- lamindb/curators/_legacy.py +16 -3
- lamindb/curators/core.py +439 -191
- lamindb/examples/cellxgene/__init__.py +8 -3
- lamindb/examples/cellxgene/_cellxgene.py +127 -13
- lamindb/examples/cellxgene/{cxg_schema_versions.csv → cellxgene_schema_versions.csv} +11 -0
- lamindb/examples/croissant/__init__.py +12 -2
- lamindb/examples/datasets/__init__.py +2 -2
- lamindb/examples/datasets/_core.py +1 -1
- lamindb/examples/datasets/_small.py +66 -22
- lamindb/examples/datasets/mini_immuno.py +1 -0
- lamindb/migrations/0118_alter_recordproject_value_projectrecord.py +99 -0
- lamindb/migrations/0119_rename_records_project_linked_in_records.py +26 -0
- lamindb/migrations/{0117_squashed.py → 0119_squashed.py} +92 -5
- lamindb/migrations/0120_add_record_fk_constraint.py +64 -0
- lamindb/migrations/0121_recorduser.py +53 -0
- lamindb/models/__init__.py +3 -1
- lamindb/models/_describe.py +2 -2
- lamindb/models/_feature_manager.py +53 -53
- lamindb/models/_from_values.py +2 -2
- lamindb/models/_is_versioned.py +4 -4
- lamindb/models/_label_manager.py +4 -4
- lamindb/models/artifact.py +336 -136
- lamindb/models/artifact_set.py +36 -1
- lamindb/models/can_curate.py +1 -2
- lamindb/models/collection.py +3 -34
- lamindb/models/feature.py +111 -7
- lamindb/models/has_parents.py +11 -11
- lamindb/models/project.py +42 -2
- lamindb/models/query_manager.py +16 -7
- lamindb/models/query_set.py +59 -34
- lamindb/models/record.py +25 -4
- lamindb/models/run.py +8 -6
- lamindb/models/schema.py +54 -26
- lamindb/models/sqlrecord.py +123 -25
- lamindb/models/storage.py +59 -14
- lamindb/models/transform.py +17 -17
- lamindb/models/ulabel.py +6 -1
- {lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/METADATA +3 -3
- {lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/RECORD +52 -47
- {lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/LICENSE +0 -0
- {lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/WHEEL +0 -0
@@ -3,9 +3,14 @@
|
|
3
3
|
.. autosummary::
|
4
4
|
:toctree: .
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
save_cellxgene_defaults
|
7
|
+
create_cellxgene_schema
|
8
8
|
|
9
9
|
"""
|
10
10
|
|
11
|
-
from ._cellxgene import
|
11
|
+
from ._cellxgene import (
|
12
|
+
create_cellxgene_schema,
|
13
|
+
get_cxg_schema,
|
14
|
+
save_cellxgene_defaults,
|
15
|
+
save_cxg_defaults,
|
16
|
+
)
|
@@ -3,7 +3,9 @@ from __future__ import annotations
|
|
3
3
|
from typing import TYPE_CHECKING, Collection, Literal, NamedTuple
|
4
4
|
|
5
5
|
import pandas as pd
|
6
|
+
from lamindb_setup.core import deprecated
|
6
7
|
from lamindb_setup.core.upath import UPath
|
8
|
+
from packaging import version
|
7
9
|
|
8
10
|
from lamindb.models._from_values import _format_values
|
9
11
|
|
@@ -11,11 +13,25 @@ if TYPE_CHECKING:
|
|
11
13
|
from lamindb.base.types import FieldAttr
|
12
14
|
from lamindb.models import Schema, SQLRecord
|
13
15
|
|
14
|
-
CELLxGENESchemaVersions = Literal["4.0.0", "5.0.0", "5.1.0", "5.2.0", "5.3.0"]
|
16
|
+
CELLxGENESchemaVersions = Literal["4.0.0", "5.0.0", "5.1.0", "5.2.0", "5.3.0", "6.0.0"]
|
17
|
+
CELLxGENEOrganisms = Literal[
|
18
|
+
"human",
|
19
|
+
"mouse",
|
20
|
+
"zebra danio",
|
21
|
+
"rhesus macaquedomestic pig",
|
22
|
+
"chimpanzee",
|
23
|
+
"white-tufted-ear marmoset",
|
24
|
+
"sars-2",
|
25
|
+
]
|
15
26
|
FieldType = Literal["ontology_id", "name"]
|
16
27
|
|
17
28
|
|
29
|
+
@deprecated(new_name="save_cellxgene_defaults")
|
18
30
|
def save_cxg_defaults() -> None:
|
31
|
+
return save_cxg_defaults()
|
32
|
+
|
33
|
+
|
34
|
+
def save_cellxgene_defaults() -> None:
|
19
35
|
"""Save default values of the CELLxGENE schema to the instance.
|
20
36
|
|
21
37
|
Adds CELLxGENE specific (control) values that are not available in the ontologies:
|
@@ -25,7 +41,6 @@ def save_cxg_defaults() -> None:
|
|
25
41
|
- "unknown" entries for DevelopmentalStage, Phenotype, and CellType
|
26
42
|
- "tissue", "organoid", and "cell culture" ULabels (tissue_type)
|
27
43
|
- "cell", "nucleus", "na" ULabels (suspension_type)
|
28
|
-
|
29
44
|
"""
|
30
45
|
import bionty as bt
|
31
46
|
|
@@ -47,12 +62,13 @@ def save_cxg_defaults() -> None:
|
|
47
62
|
# na, unknown
|
48
63
|
for model, name in zip(
|
49
64
|
[
|
65
|
+
bt.Ethnicity,
|
50
66
|
bt.Ethnicity,
|
51
67
|
bt.DevelopmentalStage,
|
52
68
|
bt.Phenotype,
|
53
69
|
bt.CellType,
|
54
70
|
],
|
55
|
-
["na", "unknown", "unknown", "unknown"],
|
71
|
+
["na", "unknown", "unknown", "unknown", "unknown"],
|
56
72
|
):
|
57
73
|
model(ontology_id=name, name=name, description="From CellxGene schema.").save()
|
58
74
|
|
@@ -76,8 +92,24 @@ def save_cxg_defaults() -> None:
|
|
76
92
|
name=name, type=suspension_type, description="From CellxGene schema."
|
77
93
|
).save()
|
78
94
|
|
95
|
+
# organisms
|
96
|
+
taxonomy_ids = [
|
97
|
+
"NCBITaxon:9606", # Homo sapiens (Human)
|
98
|
+
"NCBITaxon:10090", # Mus musculus (House mouse)
|
99
|
+
"NCBITaxon:9544", # Macaca mulatta (Rhesus monkey)
|
100
|
+
"NCBITaxon:9825", # Sus scrofa domesticus (Domestic pig)
|
101
|
+
"NCBITaxon:9598", # Pan troglodytes (Chimpanzee)
|
102
|
+
"NCBITaxon:9483", # Callithrix jacchus (White-tufted-ear marmoset)
|
103
|
+
"NCBITaxon:7955", # Danio rerio (Zebrafish)
|
104
|
+
]
|
105
|
+
for ontology_id in taxonomy_ids:
|
106
|
+
bt.Organism.from_source(
|
107
|
+
ontology_id=ontology_id,
|
108
|
+
source=bt.Source.get(name="ncbitaxon", currently_used=True),
|
109
|
+
).save()
|
110
|
+
|
79
111
|
|
80
|
-
def
|
112
|
+
def _create_cellxgene_sources(
|
81
113
|
categoricals: dict[str, FieldAttr], schema_version: str, organism: str
|
82
114
|
) -> dict[str, SQLRecord]:
|
83
115
|
"""Create a source dictionary of CELLxGENE categoricals to Source."""
|
@@ -105,7 +137,7 @@ def _create_cxg_sources(
|
|
105
137
|
)
|
106
138
|
return source
|
107
139
|
|
108
|
-
sources_df = pd.read_csv(UPath(__file__).parent / "
|
140
|
+
sources_df = pd.read_csv(UPath(__file__).parent / "cellxgene_schema_versions.csv")
|
109
141
|
sources_df = sources_df[sources_df.schema_version == schema_version]
|
110
142
|
if sources_df.empty:
|
111
143
|
raise ValueError(
|
@@ -126,11 +158,28 @@ def _create_cxg_sources(
|
|
126
158
|
return key_to_source
|
127
159
|
|
128
160
|
|
161
|
+
@deprecated(new_name="create_cellxgene_schema")
|
129
162
|
def get_cxg_schema(
|
130
163
|
schema_version: CELLxGENESchemaVersions,
|
131
164
|
*,
|
132
165
|
field_types: FieldType | Collection[FieldType] = "ontology_id",
|
133
|
-
organism:
|
166
|
+
organism: CELLxGENEOrganisms = "human",
|
167
|
+
spatial_library_id: str | None = None,
|
168
|
+
) -> Schema:
|
169
|
+
return create_cellxgene_schema(
|
170
|
+
schema_version,
|
171
|
+
field_types=field_types,
|
172
|
+
organism=organism,
|
173
|
+
spatial_library_id=spatial_library_id,
|
174
|
+
)
|
175
|
+
|
176
|
+
|
177
|
+
def create_cellxgene_schema(
|
178
|
+
schema_version: CELLxGENESchemaVersions,
|
179
|
+
*,
|
180
|
+
field_types: FieldType | Collection[FieldType] = "ontology_id",
|
181
|
+
organism: CELLxGENEOrganisms = "human",
|
182
|
+
spatial_library_id: str | None = None,
|
134
183
|
) -> Schema:
|
135
184
|
"""Generates a :class:`~lamindb.Schema` for a specific CELLxGENE schema version.
|
136
185
|
|
@@ -138,6 +187,8 @@ def get_cxg_schema(
|
|
138
187
|
schema_version: The CELLxGENE Schema version.
|
139
188
|
field_types: One or several of 'ontology_id', 'name'.
|
140
189
|
organism: The organism of the Schema.
|
190
|
+
library_id: Identifier for the spatial library.
|
191
|
+
Specifying this value enables curation against spatial requirements.
|
141
192
|
"""
|
142
193
|
import bionty as bt
|
143
194
|
|
@@ -168,7 +219,7 @@ def get_cxg_schema(
|
|
168
219
|
"tissue": CategorySpec(bt.Tissue.name, None),
|
169
220
|
"tissue_ontology_term_id": CategorySpec(bt.Tissue.ontology_id, None),
|
170
221
|
"tissue_type": CategorySpec(ULabel.name, "tissue"),
|
171
|
-
"organism": CategorySpec(bt.Organism.
|
222
|
+
"organism": CategorySpec(bt.Organism.scientific_name, None),
|
172
223
|
"organism_ontology_term_id": CategorySpec(bt.Organism.ontology_id, None),
|
173
224
|
"donor_id": CategorySpec(str, "unknown"),
|
174
225
|
}
|
@@ -195,7 +246,17 @@ def get_cxg_schema(
|
|
195
246
|
f"Invalid field_types: {field_types}. Must contain 'ontology_id', 'name', or both."
|
196
247
|
)
|
197
248
|
|
198
|
-
|
249
|
+
is_version_6_or_later = version.parse(schema_version) >= version.parse("6.0.0")
|
250
|
+
|
251
|
+
organism_fields = {"organism", "organism_ontology_term_id"}
|
252
|
+
if is_version_6_or_later:
|
253
|
+
obs_categoricals = {
|
254
|
+
k: v for k, v in categoricals.items() if k not in organism_fields
|
255
|
+
}
|
256
|
+
else:
|
257
|
+
obs_categoricals = categoricals
|
258
|
+
|
259
|
+
sources = _create_cellxgene_sources(
|
199
260
|
categoricals=categoricals,
|
200
261
|
schema_version=schema_version,
|
201
262
|
organism=organism,
|
@@ -217,30 +278,83 @@ def get_cxg_schema(
|
|
217
278
|
obs_features = [
|
218
279
|
Feature(
|
219
280
|
name=field,
|
220
|
-
dtype=
|
281
|
+
dtype=obs_categoricals[field],
|
221
282
|
cat_filters={"source": source},
|
222
283
|
default_value=categoricals_to_spec[field].default,
|
223
284
|
).save()
|
224
285
|
for field, source in sources.items()
|
225
|
-
if field != "var_index"
|
286
|
+
if field != "var_index" and field in obs_categoricals
|
226
287
|
]
|
227
288
|
for name in ["is_primary_data", "suspension_type", "tissue_type"]:
|
228
289
|
obs_features.append(Feature(name=name, dtype=ULabel.name).save())
|
229
290
|
|
230
291
|
obs_schema = Schema(
|
231
|
-
name=f"obs of CELLxGENE version {schema_version}",
|
292
|
+
name=f"obs of CELLxGENE version {schema_version} for {organism} of {field_types}",
|
232
293
|
features=obs_features,
|
233
294
|
otype="DataFrame",
|
234
295
|
minimal_set=True,
|
235
296
|
coerce_dtype=True,
|
236
297
|
).save()
|
237
298
|
|
299
|
+
slots = {"var": var_schema, "obs": obs_schema}
|
300
|
+
|
301
|
+
if is_version_6_or_later:
|
302
|
+
uns_categoricals = {
|
303
|
+
k: v for k, v in categoricals.items() if k in organism_fields
|
304
|
+
}
|
305
|
+
|
306
|
+
uns_features = [
|
307
|
+
Feature(
|
308
|
+
name=field,
|
309
|
+
dtype=uns_categoricals[field],
|
310
|
+
cat_filters={"source": sources[field]},
|
311
|
+
default_value=categoricals_to_spec[field].default,
|
312
|
+
).save()
|
313
|
+
for field in uns_categoricals
|
314
|
+
]
|
315
|
+
|
316
|
+
uns_schema = Schema(
|
317
|
+
name=f"uns of CELLxGENE version {schema_version}",
|
318
|
+
features=uns_features,
|
319
|
+
otype="DataFrame",
|
320
|
+
minimal_set=True,
|
321
|
+
coerce_dtype=True,
|
322
|
+
).save()
|
323
|
+
|
324
|
+
slots["uns"] = uns_schema
|
325
|
+
|
326
|
+
# Add spatial validation if library_id is provided
|
327
|
+
if spatial_library_id:
|
328
|
+
scalefactors_schema = Schema(
|
329
|
+
name=f"scalefactors of spatial {spatial_library_id}",
|
330
|
+
features=[
|
331
|
+
Feature(name="spot_diameter_fullres", dtype=float).save(),
|
332
|
+
Feature(name="tissue_hires_scalef", dtype=float).save(),
|
333
|
+
],
|
334
|
+
).save()
|
335
|
+
|
336
|
+
spatial_schema = Schema(
|
337
|
+
name="CELLxGENE spatial metadata",
|
338
|
+
features=[
|
339
|
+
Feature(
|
340
|
+
name="is_single",
|
341
|
+
dtype=bool,
|
342
|
+
description="True if dataset represents single spatial unit (tissue section for Visium, array for Slide-seqV2)",
|
343
|
+
).save()
|
344
|
+
],
|
345
|
+
).save()
|
346
|
+
|
347
|
+
slots["uns:spatial"] = spatial_schema
|
348
|
+
slots[f"uns:spatial:{spatial_library_id}:scalefactors"] = (
|
349
|
+
scalefactors_schema
|
350
|
+
)
|
351
|
+
|
238
352
|
full_cxg_schema = Schema(
|
239
|
-
name=f"AnnData of CELLxGENE version {schema_version}",
|
353
|
+
name=f"AnnData of CELLxGENE version {schema_version} for {organism} of {', '.join(field_types) if isinstance(field_types, list) else field_types}",
|
240
354
|
otype="AnnData",
|
241
355
|
minimal_set=True,
|
242
356
|
coerce_dtype=True,
|
243
|
-
slots=
|
357
|
+
slots=slots,
|
244
358
|
).save()
|
245
359
|
|
246
360
|
return full_cxg_schema
|
@@ -52,3 +52,14 @@ schema_version,entity,organism,source,version
|
|
52
52
|
5.3.0,Tissue,all,uberon,2025-01-15
|
53
53
|
5.3.0,Gene,human,ensembl,release-110
|
54
54
|
5.3.0,Gene,mouse,ensembl,release-110
|
55
|
+
6.0.0,CellType,all,cl,2025-04-10
|
56
|
+
6.0.0,ExperimentalFactor,all,efo,3.78.0
|
57
|
+
6.0.0,Ethnicity,human,hancestro,3.0
|
58
|
+
6.0.0,DevelopmentalStage,human,hsapdv,2025-01-23
|
59
|
+
6.0.0,DevelopmentalStage,mouse,mmusdv,2025-01-23
|
60
|
+
6.0.0,Disease,all,mondo,2025-05-06
|
61
|
+
6.0.0,Organism,all,ncbitaxon,2025-03-13
|
62
|
+
6.0.0,Phenotype,all,pato,2025-05-14
|
63
|
+
6.0.0,Tissue,all,uberon,2025-05-28
|
64
|
+
6.0.0,Gene,human,ensembl,release-110
|
65
|
+
6.0.0,Gene,mouse,ensembl,release-110
|
@@ -1,6 +1,10 @@
|
|
1
|
-
"""
|
1
|
+
"""Examples for MLCommons Croissant files, which are used to store metadata about datasets.
|
2
|
+
|
3
|
+
.. autosummary::
|
4
|
+
:toctree: .
|
5
|
+
|
6
|
+
mini_immuno
|
2
7
|
|
3
|
-
Examples for MLCommons Croissant files, which are used to store metadata about datasets.
|
4
8
|
"""
|
5
9
|
|
6
10
|
import json
|
@@ -12,6 +16,12 @@ def mini_immuno(n_files: int = 1) -> list[Path]:
|
|
12
16
|
|
13
17
|
Args:
|
14
18
|
n_files: Number of files inside the croissant file. Default is 1.
|
19
|
+
|
20
|
+
Example
|
21
|
+
|
22
|
+
::
|
23
|
+
|
24
|
+
croissant_path, dataset1_path = ln.examples.croissant.mini_immuno()
|
15
25
|
"""
|
16
26
|
from ..datasets import file_mini_csv
|
17
27
|
from ..datasets.mini_immuno import get_dataset1
|
@@ -41,7 +41,7 @@ Dictionary, Dataframe, AnnData, MuData, SpatialData.
|
|
41
41
|
.. autosummary::
|
42
42
|
:toctree: .
|
43
43
|
|
44
|
-
|
44
|
+
dict_cellxgene_uns
|
45
45
|
df_iris
|
46
46
|
df_iris_in_meter
|
47
47
|
df_iris_in_meter_study1
|
@@ -78,7 +78,7 @@ from ._core import (
|
|
78
78
|
df_iris_in_meter,
|
79
79
|
df_iris_in_meter_study1,
|
80
80
|
df_iris_in_meter_study2,
|
81
|
-
|
81
|
+
dict_cellxgene_uns,
|
82
82
|
dir_iris_images,
|
83
83
|
dir_scrnaseq_cellranger,
|
84
84
|
file_bam,
|
@@ -418,7 +418,7 @@ def mudata_papalexi21_subset() -> MuData: # pragma: no cover
|
|
418
418
|
return mdata
|
419
419
|
|
420
420
|
|
421
|
-
def
|
421
|
+
def dict_cellxgene_uns() -> dict[str, Any]:
|
422
422
|
"""An example CELLxGENE AnnData `.uns` dictionary."""
|
423
423
|
uns = {
|
424
424
|
"organism_ontology_term_id": "NCBITaxon:9606",
|
@@ -9,32 +9,36 @@ import pandas as pd
|
|
9
9
|
|
10
10
|
def small_dataset3_cellxgene(
|
11
11
|
otype: Literal["DataFrame", "AnnData"] = "AnnData",
|
12
|
+
*,
|
12
13
|
with_obs_defaults: bool = False,
|
14
|
+
with_var_typo: bool = False,
|
13
15
|
with_obs_typo: bool = False,
|
16
|
+
with_uns_organism: bool = False,
|
17
|
+
with_uns_spatial: bool = False,
|
14
18
|
) -> tuple[pd.DataFrame, dict[str, Any]] | ad.AnnData:
|
15
|
-
|
16
|
-
|
17
|
-
var_ids = ["invalid_ensembl_id", "ENSG00000000419", "ENSG00000139618"]
|
18
|
-
|
19
|
+
var_id = "invalid_ensembl_id" if with_var_typo else "ENSG00000000457"
|
20
|
+
var_ids = [var_id, "ENSG00000000419", "ENSG00000139618"]
|
19
21
|
lung_id = "UBERON:0002048XXX" if with_obs_typo else "UBERON:0002048"
|
22
|
+
|
23
|
+
obs_data = {
|
24
|
+
"disease_ontology_term_id": [
|
25
|
+
"MONDO:0004975",
|
26
|
+
"MONDO:0004980",
|
27
|
+
"MONDO:0004980",
|
28
|
+
],
|
29
|
+
"development_stage_ontology_term_id": ["unknown", "unknown", "unknown"],
|
30
|
+
"sex_ontology_term_id": ["PATO:0000383", "PATO:0000384", "unknown"],
|
31
|
+
"tissue_ontology_term_id": [lung_id, lung_id, "UBERON:0000948"],
|
32
|
+
"cell_type": ["T cell", "B cell", "B cell"],
|
33
|
+
"self_reported_ethnicity": ["South Asian", "South Asian", "South Asian"],
|
34
|
+
"donor_id": ["-1", "1", "2"],
|
35
|
+
"is_primary_data": [False, False, False],
|
36
|
+
"suspension_type": ["cell", "cell", "cell"],
|
37
|
+
"tissue_type": ["tissue", "tissue", "tissue"],
|
38
|
+
}
|
39
|
+
|
20
40
|
obs_df = pd.DataFrame(
|
21
|
-
|
22
|
-
"disease_ontology_term_id": [
|
23
|
-
"MONDO:0004975",
|
24
|
-
"MONDO:0004980",
|
25
|
-
"MONDO:0004980",
|
26
|
-
],
|
27
|
-
"development_stage_ontology_term_id": ["unknown", "unknown", "unknown"],
|
28
|
-
"organism": ["human", "human", "human"],
|
29
|
-
"sex_ontology_term_id": ["PATO:0000383", "PATO:0000384", "unknown"],
|
30
|
-
"tissue_ontology_term_id": [lung_id, lung_id, "UBERON:0000948"],
|
31
|
-
"cell_type": ["T cell", "B cell", "B cell"],
|
32
|
-
"self_reported_ethnicity": ["South Asian", "South Asian", "South Asian"],
|
33
|
-
"donor_id": ["-1", "1", "2"],
|
34
|
-
"is_primary_data": [False, False, False],
|
35
|
-
"suspension_type": ["cell", "cell", "cell"],
|
36
|
-
"tissue_type": ["tissue", "tissue", "tissue"],
|
37
|
-
},
|
41
|
+
obs_data,
|
38
42
|
index=["barcode1", "barcode2", "barcode3"],
|
39
43
|
)
|
40
44
|
|
@@ -65,8 +69,38 @@ def small_dataset3_cellxgene(
|
|
65
69
|
# CELLxGENE requires the `.raw` slot to be set - https://github.com/chanzuckerberg/single-cell-curation/issues/1304
|
66
70
|
adata.raw = adata.copy()
|
67
71
|
adata.raw.var.drop(columns="feature_is_filtered", inplace=True)
|
72
|
+
|
68
73
|
if with_obs_defaults:
|
74
|
+
adata.obs["cell_type_ontology_term_id"] = [
|
75
|
+
"CL:0000084",
|
76
|
+
"CL:0000236",
|
77
|
+
"CL:0000236",
|
78
|
+
]
|
79
|
+
adata.obs["self_reported_ethnicity_ontology_term_id"] = "na"
|
80
|
+
adata.obs["assay_ontology_term_id"] = "EFO:1001982"
|
69
81
|
adata.obs["assay"] = "single-cell RNA sequencing"
|
82
|
+
if with_uns_organism:
|
83
|
+
adata.uns["organism_ontology_term_id"] = "NCBITaxon:9606"
|
84
|
+
adata.uns["organism"] = "Homo sapiens"
|
85
|
+
else:
|
86
|
+
adata.obs["organism_ontology_term_id"] = "NCBITaxon:9606"
|
87
|
+
obs_data["organism"] = ["Homo sapiens", "Homo sapiens", "Homo sapiens"]
|
88
|
+
if with_uns_spatial:
|
89
|
+
adata.uns["spatial"] = {
|
90
|
+
"is_single": True,
|
91
|
+
"library_123": {
|
92
|
+
"scalefactors": {
|
93
|
+
"spot_diameter_fullres": 165.0,
|
94
|
+
"tissue_hires_scalef": 0.5,
|
95
|
+
},
|
96
|
+
"images": {
|
97
|
+
"hires": np.random.default_rng().integers(
|
98
|
+
0, 255, (2000, 2000, 3), dtype=np.uint8
|
99
|
+
)
|
100
|
+
},
|
101
|
+
},
|
102
|
+
}
|
103
|
+
|
70
104
|
return adata
|
71
105
|
|
72
106
|
|
@@ -92,6 +126,16 @@ def anndata_with_obs() -> ad.AnnData:
|
|
92
126
|
df.index = "obs" + df.index.astype(str)
|
93
127
|
|
94
128
|
adata = ad.AnnData(X=np.zeros(shape=(40, 100), dtype=np.float32), obs=df)
|
95
|
-
|
129
|
+
bionty_genes = bionty_base.Gene()
|
130
|
+
# backwards compatible
|
131
|
+
adata.var.index = (
|
132
|
+
(
|
133
|
+
bionty_genes.to_dataframe()
|
134
|
+
if hasattr(bionty_genes, "to_dataframe")
|
135
|
+
else bionty_genes.df()
|
136
|
+
)
|
137
|
+
.head(100)["ensembl_gene_id"]
|
138
|
+
.values
|
139
|
+
)
|
96
140
|
|
97
141
|
return adata
|
@@ -78,6 +78,7 @@ def get_dataset1(
|
|
78
78
|
with_outdated_gene: bool = False,
|
79
79
|
with_wrong_subtype: bool = False,
|
80
80
|
with_index_type_mismatch: bool = False,
|
81
|
+
with_nested_uns: bool = False,
|
81
82
|
) -> pd.DataFrame | ad.AnnData:
|
82
83
|
"""A small tabular dataset measuring expression & metadata."""
|
83
84
|
# define the data in the dataset
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# Generated by Django 5.2 on 2025-08-07 15:28
|
2
|
+
|
3
|
+
import django.db.models.deletion
|
4
|
+
import django.db.models.functions.datetime
|
5
|
+
from django.db import migrations, models
|
6
|
+
|
7
|
+
import lamindb.base.fields
|
8
|
+
import lamindb.base.users
|
9
|
+
import lamindb.models.run
|
10
|
+
import lamindb.models.sqlrecord
|
11
|
+
|
12
|
+
|
13
|
+
class Migration(migrations.Migration):
|
14
|
+
dependencies = [
|
15
|
+
("lamindb", "0117_fix_artifact_storage_hash_unique_constraints"),
|
16
|
+
]
|
17
|
+
|
18
|
+
operations = [
|
19
|
+
migrations.AlterField(
|
20
|
+
model_name="recordproject",
|
21
|
+
name="value",
|
22
|
+
field=lamindb.base.fields.ForeignKey(
|
23
|
+
blank=True,
|
24
|
+
on_delete=django.db.models.deletion.PROTECT,
|
25
|
+
related_name="links_in_record",
|
26
|
+
to="lamindb.project",
|
27
|
+
),
|
28
|
+
),
|
29
|
+
migrations.CreateModel(
|
30
|
+
name="ProjectRecord",
|
31
|
+
fields=[
|
32
|
+
(
|
33
|
+
"created_at",
|
34
|
+
lamindb.base.fields.DateTimeField(
|
35
|
+
blank=True,
|
36
|
+
db_default=django.db.models.functions.datetime.Now(),
|
37
|
+
db_index=True,
|
38
|
+
editable=False,
|
39
|
+
),
|
40
|
+
),
|
41
|
+
("id", models.BigAutoField(primary_key=True, serialize=False)),
|
42
|
+
(
|
43
|
+
"created_by",
|
44
|
+
lamindb.base.fields.ForeignKey(
|
45
|
+
blank=True,
|
46
|
+
default=lamindb.base.users.current_user_id,
|
47
|
+
editable=False,
|
48
|
+
on_delete=django.db.models.deletion.PROTECT,
|
49
|
+
related_name="+",
|
50
|
+
to="lamindb.user",
|
51
|
+
),
|
52
|
+
),
|
53
|
+
(
|
54
|
+
"feature",
|
55
|
+
lamindb.base.fields.ForeignKey(
|
56
|
+
blank=True,
|
57
|
+
default=None,
|
58
|
+
null=True,
|
59
|
+
on_delete=django.db.models.deletion.PROTECT,
|
60
|
+
related_name="links_projectrecord",
|
61
|
+
to="lamindb.feature",
|
62
|
+
),
|
63
|
+
),
|
64
|
+
(
|
65
|
+
"project",
|
66
|
+
lamindb.base.fields.ForeignKey(
|
67
|
+
blank=True,
|
68
|
+
on_delete=django.db.models.deletion.PROTECT,
|
69
|
+
related_name="links_record",
|
70
|
+
to="lamindb.project",
|
71
|
+
),
|
72
|
+
),
|
73
|
+
(
|
74
|
+
"record",
|
75
|
+
lamindb.base.fields.ForeignKey(
|
76
|
+
blank=True,
|
77
|
+
on_delete=django.db.models.deletion.CASCADE,
|
78
|
+
related_name="links_project",
|
79
|
+
to="lamindb.record",
|
80
|
+
),
|
81
|
+
),
|
82
|
+
(
|
83
|
+
"run",
|
84
|
+
lamindb.base.fields.ForeignKey(
|
85
|
+
blank=True,
|
86
|
+
default=lamindb.models.run.current_run,
|
87
|
+
null=True,
|
88
|
+
on_delete=django.db.models.deletion.PROTECT,
|
89
|
+
related_name="+",
|
90
|
+
to="lamindb.run",
|
91
|
+
),
|
92
|
+
),
|
93
|
+
],
|
94
|
+
options={
|
95
|
+
"unique_together": {("record", "project", "feature")},
|
96
|
+
},
|
97
|
+
bases=(lamindb.models.sqlrecord.IsLink, models.Model),
|
98
|
+
),
|
99
|
+
]
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Generated by Django 5.2 on 2025-08-09 13:31
|
2
|
+
|
3
|
+
from django.db import migrations, models
|
4
|
+
|
5
|
+
|
6
|
+
class Migration(migrations.Migration):
|
7
|
+
dependencies = [
|
8
|
+
("lamindb", "0118_alter_recordproject_value_projectrecord"),
|
9
|
+
]
|
10
|
+
|
11
|
+
operations = [
|
12
|
+
migrations.RenameField(
|
13
|
+
model_name="project",
|
14
|
+
old_name="records",
|
15
|
+
new_name="linked_in_records",
|
16
|
+
),
|
17
|
+
migrations.AddField(
|
18
|
+
model_name="project",
|
19
|
+
name="records",
|
20
|
+
field=models.ManyToManyField(
|
21
|
+
related_name="projects",
|
22
|
+
through="lamindb.ProjectRecord",
|
23
|
+
to="lamindb.record",
|
24
|
+
),
|
25
|
+
),
|
26
|
+
]
|