scmcp-shared 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scmcp_shared/__init__.py +3 -0
- scmcp_shared/logging_config.py +31 -0
- scmcp_shared/schema/__init__.py +1 -0
- scmcp_shared/schema/io.py +120 -0
- scmcp_shared/schema/pl.py +948 -0
- scmcp_shared/schema/pp.py +707 -0
- scmcp_shared/schema/tl.py +902 -0
- scmcp_shared/schema/util.py +131 -0
- scmcp_shared/server/__init__.py +1 -0
- scmcp_shared/server/io.py +80 -0
- scmcp_shared/util.py +186 -0
- scmcp_shared-0.1.0.dist-info/METADATA +44 -0
- scmcp_shared-0.1.0.dist-info/RECORD +15 -0
- scmcp_shared-0.1.0.dist-info/WHEEL +4 -0
- scmcp_shared-0.1.0.dist-info/licenses/LICENSE +28 -0
@@ -0,0 +1,902 @@
|
|
1
|
+
from pydantic import BaseModel, Field, field_validator, ValidationInfo
|
2
|
+
from typing import Optional, Union, List, Dict, Any, Tuple, Literal, Mapping
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
class TSNEModel(BaseModel):
|
7
|
+
"""Input schema for the t-SNE dimensionality reduction tool."""
|
8
|
+
n_pcs: Optional[int] = Field(
|
9
|
+
default=None,
|
10
|
+
description="Number of PCs to use. If None, automatically determined.",
|
11
|
+
ge=0
|
12
|
+
)
|
13
|
+
use_rep: Optional[str] = Field(
|
14
|
+
default=None,
|
15
|
+
description="Key for .obsm to use as representation."
|
16
|
+
)
|
17
|
+
perplexity: Optional[Union[float, int]] = Field(
|
18
|
+
default=30,
|
19
|
+
description="Related to number of nearest neighbors, typically between 5-50.",
|
20
|
+
gt=0
|
21
|
+
)
|
22
|
+
early_exaggeration: Optional[Union[float, int]] = Field(
|
23
|
+
default=12,
|
24
|
+
description="Controls space between natural clusters in embedded space.",
|
25
|
+
gt=0
|
26
|
+
)
|
27
|
+
learning_rate: Optional[Union[float, int]] = Field(
|
28
|
+
default=1000,
|
29
|
+
description="Learning rate for optimization, typically between 100-1000.",
|
30
|
+
gt=0
|
31
|
+
)
|
32
|
+
use_fast_tsne: Optional[bool] = Field(
|
33
|
+
default=False,
|
34
|
+
description="Whether to use Multicore-tSNE implementation."
|
35
|
+
)
|
36
|
+
n_jobs: Optional[int] = Field(
|
37
|
+
default=None,
|
38
|
+
description="Number of jobs for parallel computation.",
|
39
|
+
gt=0
|
40
|
+
)
|
41
|
+
metric: Optional[str] = Field(
|
42
|
+
default='euclidean',
|
43
|
+
description="Distance metric to use."
|
44
|
+
)
|
45
|
+
|
46
|
+
@field_validator('n_pcs', 'perplexity', 'early_exaggeration',
|
47
|
+
'learning_rate', 'n_jobs')
|
48
|
+
def validate_positive_numbers(cls, v: Optional[Union[int, float]]) -> Optional[Union[int, float]]:
|
49
|
+
"""Validate positive numbers where applicable"""
|
50
|
+
if v is not None and v <= 0:
|
51
|
+
raise ValueError("must be a positive number")
|
52
|
+
return v
|
53
|
+
|
54
|
+
@field_validator('metric')
|
55
|
+
def validate_metric(cls, v: str) -> str:
|
56
|
+
"""Validate distance metric is supported"""
|
57
|
+
valid_metrics = ['euclidean', 'cosine', 'manhattan', 'l1', 'l2']
|
58
|
+
if v.lower() not in valid_metrics:
|
59
|
+
raise ValueError(f"metric must be one of {valid_metrics}")
|
60
|
+
return v.lower()
|
61
|
+
|
62
|
+
|
63
|
+
class UMAPModel(BaseModel):
|
64
|
+
"""Input schema for the UMAP dimensionality reduction tool."""
|
65
|
+
|
66
|
+
min_dist: Optional[float] = Field(
|
67
|
+
default=0.5,
|
68
|
+
description="Minimum distance between embedded points.",
|
69
|
+
gt=0
|
70
|
+
)
|
71
|
+
|
72
|
+
spread: Optional[float] = Field(
|
73
|
+
default=1.0,
|
74
|
+
description="Scale of embedded points.",
|
75
|
+
gt=0
|
76
|
+
)
|
77
|
+
|
78
|
+
n_components: Optional[int] = Field(
|
79
|
+
default=2,
|
80
|
+
description="Number of dimensions of the embedding.",
|
81
|
+
gt=0
|
82
|
+
)
|
83
|
+
|
84
|
+
maxiter: Optional[int] = Field(
|
85
|
+
default=None,
|
86
|
+
description="Number of iterations (epochs) of the optimization.",
|
87
|
+
gt=0
|
88
|
+
)
|
89
|
+
|
90
|
+
alpha: Optional[float] = Field(
|
91
|
+
default=1.0,
|
92
|
+
description="Initial learning rate for the embedding optimization.",
|
93
|
+
gt=0
|
94
|
+
)
|
95
|
+
|
96
|
+
gamma: Optional[float] = Field(
|
97
|
+
default=1.0,
|
98
|
+
description="Weighting applied to negative samples.",
|
99
|
+
gt=0
|
100
|
+
)
|
101
|
+
negative_sample_rate: Optional[int] = Field(
|
102
|
+
default=5,
|
103
|
+
description="Number of negative samples per positive sample.",
|
104
|
+
gt=0
|
105
|
+
)
|
106
|
+
init_pos: Optional[str] = Field(
|
107
|
+
default='spectral',
|
108
|
+
description="How to initialize the low dimensional embedding.",
|
109
|
+
)
|
110
|
+
random_state: Optional[int] = Field(
|
111
|
+
default=0,
|
112
|
+
description="Random seed for reproducibility."
|
113
|
+
)
|
114
|
+
a: Optional[float] = Field(
|
115
|
+
default=None,
|
116
|
+
description="Parameter controlling the embedding.",
|
117
|
+
gt=0
|
118
|
+
)
|
119
|
+
b: Optional[float] = Field(
|
120
|
+
default=None,
|
121
|
+
description="Parameter controlling the embedding.",
|
122
|
+
gt=0
|
123
|
+
)
|
124
|
+
method: Optional[str] = Field(
|
125
|
+
default='umap',
|
126
|
+
description="Implementation to use ('umap' or 'rapids')."
|
127
|
+
)
|
128
|
+
neighbors_key: Optional[str] = Field(
|
129
|
+
default=None,
|
130
|
+
description="Key for neighbors settings in .uns."
|
131
|
+
)
|
132
|
+
|
133
|
+
@field_validator('min_dist', 'spread', 'n_components', 'maxiter',
|
134
|
+
'alpha', 'gamma', 'negative_sample_rate', 'a', 'b')
|
135
|
+
def validate_positive_numbers(cls, v: Optional[Union[int, float]]) -> Optional[Union[int, float]]:
|
136
|
+
"""Validate positive numbers where applicable"""
|
137
|
+
if v is not None and v <= 0:
|
138
|
+
raise ValueError("must be a positive number")
|
139
|
+
return v
|
140
|
+
|
141
|
+
@field_validator('method')
|
142
|
+
def validate_method(cls, v: str) -> str:
|
143
|
+
"""Validate implementation method is supported"""
|
144
|
+
if v.lower() not in ['umap', 'rapids']:
|
145
|
+
raise ValueError("method must be either 'umap' or 'rapids'")
|
146
|
+
return v.lower()
|
147
|
+
|
148
|
+
|
149
|
+
class DrawGraphModel(BaseModel):
|
150
|
+
"""Input schema for the force-directed graph drawing tool."""
|
151
|
+
|
152
|
+
layout: str = Field(
|
153
|
+
default='fa',
|
154
|
+
description="Graph layout algorithm ('fa', 'fr', 'kk', etc.)",
|
155
|
+
)
|
156
|
+
init_pos: Optional[Union[str, bool]] = Field(
|
157
|
+
default=None,
|
158
|
+
description="Initial position for nodes ('paga'/True, False, or .obsm key)",
|
159
|
+
)
|
160
|
+
root: Optional[int] = Field(
|
161
|
+
default=None,
|
162
|
+
description="Root node for tree layouts",
|
163
|
+
ge=0
|
164
|
+
)
|
165
|
+
random_state: int = Field(
|
166
|
+
default=0,
|
167
|
+
description="Random seed for reproducibility"
|
168
|
+
)
|
169
|
+
n_jobs: Optional[int] = Field(
|
170
|
+
default=None,
|
171
|
+
description="Number of jobs for parallel computation",
|
172
|
+
gt=0
|
173
|
+
)
|
174
|
+
key_added_ext: Optional[str] = Field(
|
175
|
+
default=None,
|
176
|
+
description="Suffix for storing results in .obsm"
|
177
|
+
)
|
178
|
+
neighbors_key: Optional[str] = Field(
|
179
|
+
default=None,
|
180
|
+
description="Key for neighbors settings in .uns"
|
181
|
+
)
|
182
|
+
obsp: Optional[str] = Field(
|
183
|
+
default=None,
|
184
|
+
description="Key for adjacency matrix in .obsp"
|
185
|
+
)
|
186
|
+
|
187
|
+
@field_validator('layout')
|
188
|
+
def validate_layout(cls, v: str) -> str:
|
189
|
+
"""Validate layout is supported"""
|
190
|
+
valid_layouts = ['fa', 'fr', 'grid_fr', 'kk', 'lgl', 'drl', 'rt']
|
191
|
+
if v.lower() not in valid_layouts:
|
192
|
+
raise ValueError(f"layout must be one of {valid_layouts}")
|
193
|
+
return v.lower()
|
194
|
+
|
195
|
+
@field_validator('root', 'n_jobs')
|
196
|
+
def validate_positive_integers(cls, v: Optional[int]) -> Optional[int]:
|
197
|
+
"""Validate positive integers where applicable"""
|
198
|
+
if v is not None and v <= 0:
|
199
|
+
raise ValueError("must be a positive integer")
|
200
|
+
return v
|
201
|
+
|
202
|
+
|
203
|
+
class DiffMapModel(BaseModel):
|
204
|
+
"""Input schema for the Diffusion Maps dimensionality reduction tool."""
|
205
|
+
|
206
|
+
n_comps: int = Field(
|
207
|
+
default=15,
|
208
|
+
description="The number of dimensions of the representation.",
|
209
|
+
gt=0
|
210
|
+
)
|
211
|
+
neighbors_key: Optional[str] = Field(
|
212
|
+
default=None,
|
213
|
+
description=(
|
214
|
+
"If not specified, diffmap looks .uns['neighbors'] for neighbors settings "
|
215
|
+
"and .obsp['connectivities'], .obsp['distances'] for connectivities and "
|
216
|
+
"distances respectively. If specified, diffmap looks .uns[neighbors_key] for "
|
217
|
+
"neighbors settings and uses the corresponding connectivities and distances."
|
218
|
+
)
|
219
|
+
)
|
220
|
+
random_state: int = Field(
|
221
|
+
default=0,
|
222
|
+
description="Random seed for reproducibility."
|
223
|
+
)
|
224
|
+
|
225
|
+
@field_validator('n_comps')
|
226
|
+
def validate_positive_integers(cls, v: int) -> int:
|
227
|
+
"""Validate positive integers"""
|
228
|
+
if v <= 0:
|
229
|
+
raise ValueError("n_comps must be a positive integer")
|
230
|
+
return v
|
231
|
+
|
232
|
+
|
233
|
+
class EmbeddingDensityModel(BaseModel):
|
234
|
+
"""Input schema for the embedding density calculation tool."""
|
235
|
+
|
236
|
+
basis: str = Field(
|
237
|
+
default='umap',
|
238
|
+
description="The embedding over which the density will be calculated. This embedded representation should be found in `adata.obsm['X_[basis]']`."
|
239
|
+
)
|
240
|
+
groupby: Optional[str] = Field(
|
241
|
+
default=None,
|
242
|
+
description="Key for categorical observation/cell annotation for which densities are calculated per category."
|
243
|
+
)
|
244
|
+
key_added: Optional[str] = Field(
|
245
|
+
default=None,
|
246
|
+
description="Name of the `.obs` covariate that will be added with the density estimates."
|
247
|
+
)
|
248
|
+
components: Optional[Union[str, List[str]]] = Field(
|
249
|
+
default=None,
|
250
|
+
description="The embedding dimensions over which the density should be calculated. This is limited to two components."
|
251
|
+
)
|
252
|
+
|
253
|
+
@field_validator('components')
|
254
|
+
def validate_components(cls, v: Optional[Union[str, List[str]]]) -> Optional[Union[str, List[str]]]:
|
255
|
+
"""Validate that components are limited to two dimensions"""
|
256
|
+
if v is not None and isinstance(v, list) and len(v) > 2:
|
257
|
+
raise ValueError("components is limited to two dimensions")
|
258
|
+
return v
|
259
|
+
|
260
|
+
|
261
|
+
class LeidenModel(BaseModel):
|
262
|
+
"""Input schema for the Leiden clustering algorithm."""
|
263
|
+
|
264
|
+
resolution: Optional[float] = Field(
|
265
|
+
default=1.0,
|
266
|
+
description="A parameter value controlling the coarseness of the clustering. Higher values lead to more clusters."
|
267
|
+
)
|
268
|
+
|
269
|
+
key_added: Optional[str] = Field(
|
270
|
+
default='leiden',
|
271
|
+
description="`adata.obs` key under which to add the cluster labels."
|
272
|
+
)
|
273
|
+
|
274
|
+
directed: Optional[bool] = Field(
|
275
|
+
default=None,
|
276
|
+
description="Whether to treat the graph as directed or undirected."
|
277
|
+
)
|
278
|
+
|
279
|
+
use_weights: Optional[bool] = Field(
|
280
|
+
default=True,
|
281
|
+
description="If `True`, edge weights from the graph are used in the computation (placing more emphasis on stronger edges)."
|
282
|
+
)
|
283
|
+
|
284
|
+
n_iterations: Optional[int] = Field(
|
285
|
+
default=-1,
|
286
|
+
description="How many iterations of the Leiden clustering algorithm to perform. -1 runs until optimal clustering."
|
287
|
+
)
|
288
|
+
|
289
|
+
neighbors_key: Optional[str] = Field(
|
290
|
+
default=None,
|
291
|
+
description="Use neighbors connectivities as adjacency. If specified, leiden looks .obsp[.uns[neighbors_key]['connectivities_key']] for connectivities."
|
292
|
+
)
|
293
|
+
|
294
|
+
obsp: Optional[str] = Field(
|
295
|
+
default=None,
|
296
|
+
description="Use .obsp[obsp] as adjacency. You can't specify both `obsp` and `neighbors_key` at the same time."
|
297
|
+
)
|
298
|
+
|
299
|
+
flavor: Optional[Literal['leidenalg', 'igraph']] = Field(
|
300
|
+
default='igraph',
|
301
|
+
description="Which package's implementation to use."
|
302
|
+
)
|
303
|
+
|
304
|
+
clustering_args: Optional[Dict[str, Any]] = Field(
|
305
|
+
default=None,
|
306
|
+
description="Any further arguments to pass to the clustering algorithm."
|
307
|
+
)
|
308
|
+
|
309
|
+
@field_validator('resolution')
|
310
|
+
def validate_resolution(cls, v: float) -> float:
|
311
|
+
"""Validate resolution is positive"""
|
312
|
+
if v <= 0:
|
313
|
+
raise ValueError("resolution must be a positive number")
|
314
|
+
return v
|
315
|
+
|
316
|
+
@field_validator('obsp', 'neighbors_key')
|
317
|
+
def validate_graph_source(cls, v: Optional[str], info: ValidationInfo) -> Optional[str]:
|
318
|
+
"""Validate that obsp and neighbors_key are not both specified"""
|
319
|
+
values = info.data
|
320
|
+
if v is not None and 'obsp' in values and 'neighbors_key' in values:
|
321
|
+
if values['obsp'] is not None and values['neighbors_key'] is not None:
|
322
|
+
raise ValueError("Cannot specify both obsp and neighbors_key")
|
323
|
+
return v
|
324
|
+
|
325
|
+
@field_validator('flavor')
|
326
|
+
def validate_flavor(cls, v: str) -> str:
|
327
|
+
"""Validate flavor is supported"""
|
328
|
+
if v not in ['leidenalg', 'igraph']:
|
329
|
+
raise ValueError("flavor must be either 'leidenalg' or 'igraph'")
|
330
|
+
return v
|
331
|
+
|
332
|
+
|
333
|
+
class LouvainModel(BaseModel):
|
334
|
+
"""Input schema for the Louvain clustering algorithm."""
|
335
|
+
|
336
|
+
resolution: Optional[float] = Field(
|
337
|
+
default=None,
|
338
|
+
description="For the default flavor ('vtraag') or for 'RAPIDS', you can provide a resolution (higher resolution means finding more and smaller clusters), which defaults to 1.0."
|
339
|
+
)
|
340
|
+
|
341
|
+
random_state: int = Field(
|
342
|
+
default=0,
|
343
|
+
description="Change the initialization of the optimization."
|
344
|
+
)
|
345
|
+
|
346
|
+
key_added: str = Field(
|
347
|
+
default='louvain',
|
348
|
+
description="Key under which to add the cluster labels."
|
349
|
+
)
|
350
|
+
|
351
|
+
flavor: Literal['vtraag', 'igraph', 'rapids'] = Field(
|
352
|
+
default='vtraag',
|
353
|
+
description="Package for computing the clustering: 'vtraag' (default, more powerful), 'igraph' (built-in method), or 'rapids' (GPU accelerated)."
|
354
|
+
)
|
355
|
+
|
356
|
+
directed: bool = Field(
|
357
|
+
default=True,
|
358
|
+
description="Interpret the adjacency matrix as directed graph."
|
359
|
+
)
|
360
|
+
|
361
|
+
use_weights: bool = Field(
|
362
|
+
default=False,
|
363
|
+
description="Use weights from knn graph."
|
364
|
+
)
|
365
|
+
|
366
|
+
partition_kwargs: Optional[Dict[str, Any]] = Field(
|
367
|
+
default=None,
|
368
|
+
description="Key word arguments to pass to partitioning, if 'vtraag' method is being used."
|
369
|
+
)
|
370
|
+
|
371
|
+
neighbors_key: Optional[str] = Field(
|
372
|
+
default=None,
|
373
|
+
description="Use neighbors connectivities as adjacency. If specified, louvain looks .obsp[.uns[neighbors_key]['connectivities_key']] for connectivities."
|
374
|
+
)
|
375
|
+
|
376
|
+
obsp: Optional[str] = Field(
|
377
|
+
default=None,
|
378
|
+
description="Use .obsp[obsp] as adjacency. You can't specify both `obsp` and `neighbors_key` at the same time."
|
379
|
+
)
|
380
|
+
|
381
|
+
@field_validator('resolution')
|
382
|
+
def validate_resolution(cls, v: Optional[float]) -> Optional[float]:
|
383
|
+
"""Validate resolution is positive if provided"""
|
384
|
+
if v is not None and v <= 0:
|
385
|
+
raise ValueError("resolution must be a positive number")
|
386
|
+
return v
|
387
|
+
|
388
|
+
@field_validator('obsp', 'neighbors_key')
|
389
|
+
def validate_graph_source(cls, v: Optional[str], info: ValidationInfo) -> Optional[str]:
|
390
|
+
"""Validate that obsp and neighbors_key are not both specified"""
|
391
|
+
values = info.data
|
392
|
+
if v is not None and 'obsp' in values and 'neighbors_key' in values:
|
393
|
+
if values['obsp'] is not None and values['neighbors_key'] is not None:
|
394
|
+
raise ValueError("Cannot specify both obsp and neighbors_key")
|
395
|
+
return v
|
396
|
+
|
397
|
+
@field_validator('flavor')
|
398
|
+
def validate_flavor(cls, v: str) -> str:
|
399
|
+
"""Validate flavor is supported"""
|
400
|
+
if v not in ['vtraag', 'igraph', 'rapids']:
|
401
|
+
raise ValueError("flavor must be one of 'vtraag', 'igraph', or 'rapids'")
|
402
|
+
return v
|
403
|
+
|
404
|
+
|
405
|
+
class DendrogramModel(BaseModel):
|
406
|
+
"""Input schema for the hierarchical clustering dendrogram tool."""
|
407
|
+
|
408
|
+
groupby: str = Field(
|
409
|
+
..., # Required field
|
410
|
+
description="The categorical observation annotation to use for grouping."
|
411
|
+
)
|
412
|
+
n_pcs: Optional[int] = Field(
|
413
|
+
default=None,
|
414
|
+
description="Use this many PCs. If n_pcs==0 use .X if use_rep is None.",
|
415
|
+
ge=0
|
416
|
+
)
|
417
|
+
use_rep: Optional[str] = Field(
|
418
|
+
default=None,
|
419
|
+
description="Use the indicated representation. 'X' or any key for .obsm is valid."
|
420
|
+
)
|
421
|
+
var_names: Optional[List[str]] = Field(
|
422
|
+
default=None,
|
423
|
+
description="List of var_names to use for computing the hierarchical clustering. If provided, use_rep and n_pcs are ignored."
|
424
|
+
)
|
425
|
+
use_raw: Optional[bool] = Field(
|
426
|
+
default=None,
|
427
|
+
description="Only when var_names is not None. Use raw attribute of adata if present."
|
428
|
+
)
|
429
|
+
cor_method: str = Field(
|
430
|
+
default='pearson',
|
431
|
+
description="Correlation method to use: 'pearson', 'kendall', or 'spearman'."
|
432
|
+
)
|
433
|
+
linkage_method: str = Field(
|
434
|
+
default='complete',
|
435
|
+
description="Linkage method to use for hierarchical clustering."
|
436
|
+
)
|
437
|
+
optimal_ordering: bool = Field(
|
438
|
+
default=False,
|
439
|
+
description="Reorders the linkage matrix so that the distance between successive leaves is minimal."
|
440
|
+
)
|
441
|
+
key_added: Optional[str] = Field(
|
442
|
+
default=None,
|
443
|
+
description="By default, the dendrogram information is added to .uns[f'dendrogram_{groupby}']."
|
444
|
+
)
|
445
|
+
|
446
|
+
@field_validator('cor_method')
|
447
|
+
def validate_cor_method(cls, v: str) -> str:
|
448
|
+
"""Validate correlation method is supported"""
|
449
|
+
valid_methods = ['pearson', 'kendall', 'spearman']
|
450
|
+
if v.lower() not in valid_methods:
|
451
|
+
raise ValueError(f"cor_method must be one of {valid_methods}")
|
452
|
+
return v.lower()
|
453
|
+
|
454
|
+
@field_validator('linkage_method')
|
455
|
+
def validate_linkage_method(cls, v: str) -> str:
|
456
|
+
"""Validate linkage method is supported"""
|
457
|
+
valid_methods = ['single', 'complete', 'average', 'weighted', 'centroid', 'median', 'ward']
|
458
|
+
if v.lower() not in valid_methods:
|
459
|
+
raise ValueError(f"linkage_method must be one of {valid_methods}")
|
460
|
+
return v.lower()
|
461
|
+
|
462
|
+
@field_validator('n_pcs')
|
463
|
+
def validate_n_pcs(cls, v: Optional[int]) -> Optional[int]:
|
464
|
+
"""Validate n_pcs is non-negative"""
|
465
|
+
if v is not None and v < 0:
|
466
|
+
raise ValueError("n_pcs must be a non-negative integer")
|
467
|
+
return v
|
468
|
+
|
469
|
+
|
470
|
+
class DPTModel(BaseModel):
|
471
|
+
"""Input schema for the Diffusion Pseudotime (DPT) tool."""
|
472
|
+
|
473
|
+
n_dcs: int = Field(
|
474
|
+
default=10,
|
475
|
+
description="The number of diffusion components to use.",
|
476
|
+
gt=0
|
477
|
+
)
|
478
|
+
n_branchings: int = Field(
|
479
|
+
default=0,
|
480
|
+
description="Number of branchings to detect.",
|
481
|
+
ge=0
|
482
|
+
)
|
483
|
+
min_group_size: float = Field(
|
484
|
+
default=0.01,
|
485
|
+
description="During recursive splitting of branches, do not consider groups that contain less than min_group_size data points. If a float, refers to a fraction of the total number of data points.",
|
486
|
+
gt=0,
|
487
|
+
le=1.0
|
488
|
+
)
|
489
|
+
allow_kendall_tau_shift: bool = Field(
|
490
|
+
default=True,
|
491
|
+
description="If a very small branch is detected upon splitting, shift away from maximum correlation in Kendall tau criterion to stabilize the splitting."
|
492
|
+
)
|
493
|
+
neighbors_key: Optional[str] = Field(
|
494
|
+
default=None,
|
495
|
+
description="If specified, dpt looks .uns[neighbors_key] for neighbors settings and uses the corresponding connectivities and distances."
|
496
|
+
)
|
497
|
+
|
498
|
+
@field_validator('n_dcs')
|
499
|
+
def validate_n_dcs(cls, v: int) -> int:
|
500
|
+
"""Validate n_dcs is positive"""
|
501
|
+
if v <= 0:
|
502
|
+
raise ValueError("n_dcs must be a positive integer")
|
503
|
+
return v
|
504
|
+
|
505
|
+
@field_validator('n_branchings')
|
506
|
+
def validate_n_branchings(cls, v: int) -> int:
|
507
|
+
"""Validate n_branchings is non-negative"""
|
508
|
+
if v < 0:
|
509
|
+
raise ValueError("n_branchings must be a non-negative integer")
|
510
|
+
return v
|
511
|
+
|
512
|
+
@field_validator('min_group_size')
|
513
|
+
def validate_min_group_size(cls, v: float) -> float:
|
514
|
+
"""Validate min_group_size is between 0 and 1"""
|
515
|
+
if v <= 0 or v > 1:
|
516
|
+
raise ValueError("min_group_size must be between 0 and 1")
|
517
|
+
return v
|
518
|
+
|
519
|
+
class PAGAModel(BaseModel):
|
520
|
+
"""Input schema for the Partition-based Graph Abstraction (PAGA) tool."""
|
521
|
+
|
522
|
+
groups: Optional[str] = Field(
|
523
|
+
default=None,
|
524
|
+
description="Key for categorical in adata.obs. You can pass your predefined groups by choosing any categorical annotation of observations. Default: The first present key of 'leiden' or 'louvain'."
|
525
|
+
)
|
526
|
+
use_rna_velocity: bool = Field(
|
527
|
+
default=False,
|
528
|
+
description="Use RNA velocity to orient edges in the abstracted graph and estimate transitions. Requires that adata.uns contains a directed single-cell graph with key ['velocity_graph']."
|
529
|
+
)
|
530
|
+
model: Literal['v1.2', 'v1.0'] = Field(
|
531
|
+
default='v1.2',
|
532
|
+
description="The PAGA connectivity model."
|
533
|
+
)
|
534
|
+
neighbors_key: Optional[str] = Field(
|
535
|
+
default=None,
|
536
|
+
description="If specified, paga looks .uns[neighbors_key] for neighbors settings and uses the corresponding connectivities and distances."
|
537
|
+
)
|
538
|
+
|
539
|
+
@field_validator('model')
|
540
|
+
def validate_model(cls, v: str) -> str:
|
541
|
+
"""Validate model version is supported"""
|
542
|
+
if v not in ['v1.2', 'v1.0']:
|
543
|
+
raise ValueError("model must be either 'v1.2' or 'v1.0'")
|
544
|
+
return v
|
545
|
+
|
546
|
+
|
547
|
+
class IngestModel(BaseModel):
|
548
|
+
"""Input schema for the ingest tool that maps labels and embeddings from reference data to new data."""
|
549
|
+
|
550
|
+
obs: Optional[Union[str, List[str]]] = Field(
|
551
|
+
default=None,
|
552
|
+
description="Labels' keys in adata_ref.obs which need to be mapped to adata.obs (inferred for observation of adata)."
|
553
|
+
)
|
554
|
+
|
555
|
+
embedding_method: Union[str, List[str]] = Field(
|
556
|
+
default=['umap', 'pca'],
|
557
|
+
description="Embeddings in adata_ref which need to be mapped to adata. The only supported values are 'umap' and 'pca'."
|
558
|
+
)
|
559
|
+
|
560
|
+
labeling_method: str = Field(
|
561
|
+
default='knn',
|
562
|
+
description="The method to map labels in adata_ref.obs to adata.obs. The only supported value is 'knn'."
|
563
|
+
)
|
564
|
+
|
565
|
+
neighbors_key: Optional[str] = Field(
|
566
|
+
default=None,
|
567
|
+
description="If specified, ingest looks adata_ref.uns[neighbors_key] for neighbors settings and uses the corresponding distances."
|
568
|
+
)
|
569
|
+
|
570
|
+
@field_validator('embedding_method')
|
571
|
+
def validate_embedding_method(cls, v: Union[str, List[str]]) -> Union[str, List[str]]:
|
572
|
+
"""Validate embedding method is supported"""
|
573
|
+
valid_methods = ['umap', 'pca']
|
574
|
+
|
575
|
+
if isinstance(v, str):
|
576
|
+
if v.lower() not in valid_methods:
|
577
|
+
raise ValueError(f"embedding_method must be one of {valid_methods}")
|
578
|
+
return v.lower()
|
579
|
+
|
580
|
+
elif isinstance(v, list):
|
581
|
+
for method in v:
|
582
|
+
if method.lower() not in valid_methods:
|
583
|
+
raise ValueError(f"embedding_method must contain only values from {valid_methods}")
|
584
|
+
return [method.lower() for method in v]
|
585
|
+
|
586
|
+
return v
|
587
|
+
|
588
|
+
@field_validator('labeling_method')
|
589
|
+
def validate_labeling_method(cls, v: str) -> str:
|
590
|
+
"""Validate labeling method is supported"""
|
591
|
+
if v.lower() != 'knn':
|
592
|
+
raise ValueError("labeling_method must be 'knn'")
|
593
|
+
return v.lower()
|
594
|
+
|
595
|
+
|
596
|
+
class RankGenesGroupsModel(BaseModel):
|
597
|
+
"""Input schema for the rank_genes_groups tool."""
|
598
|
+
|
599
|
+
groupby: str = Field(
|
600
|
+
..., # Required field
|
601
|
+
description="The key of the observations grouping to consider."
|
602
|
+
)
|
603
|
+
mask_var: Optional[Union[str, List[bool]]] = Field(
|
604
|
+
default=None,
|
605
|
+
description="Select subset of genes to use in statistical tests."
|
606
|
+
)
|
607
|
+
use_raw: Optional[bool] = Field(
|
608
|
+
default=None,
|
609
|
+
description="Use raw attribute of adata if present."
|
610
|
+
)
|
611
|
+
groups: Union[Literal['all'], List[str]] = Field(
|
612
|
+
default='all',
|
613
|
+
description="Subset of groups to which comparison shall be restricted, or 'all' for all groups."
|
614
|
+
)
|
615
|
+
reference: str = Field(
|
616
|
+
default='rest',
|
617
|
+
description="If 'rest', compare each group to the union of the rest of the group. If a group identifier, compare with respect to this group."
|
618
|
+
)
|
619
|
+
n_genes: Optional[int] = Field(
|
620
|
+
default=None,
|
621
|
+
description="The number of genes that appear in the returned tables. Defaults to all genes.",
|
622
|
+
gt=0
|
623
|
+
)
|
624
|
+
rankby_abs: bool = Field(
|
625
|
+
default=False,
|
626
|
+
description="Rank genes by the absolute value of the score, not by the score."
|
627
|
+
)
|
628
|
+
pts: bool = Field(
|
629
|
+
default=False,
|
630
|
+
description="Compute the fraction of cells expressing the genes."
|
631
|
+
)
|
632
|
+
key_added: Optional[str] = Field(
|
633
|
+
default=None,
|
634
|
+
description="The key in adata.uns information is saved to."
|
635
|
+
)
|
636
|
+
method: Optional[str] = Field(
|
637
|
+
default=None,
|
638
|
+
description="Method for differential expression analysis. Default is 't-test'."
|
639
|
+
)
|
640
|
+
corr_method: str = Field(
|
641
|
+
default='benjamini-hochberg',
|
642
|
+
description="p-value correction method. Used only for 't-test', 't-test_overestim_var', and 'wilcoxon'."
|
643
|
+
)
|
644
|
+
tie_correct: bool = Field(
|
645
|
+
default=False,
|
646
|
+
description="Use tie correction for 'wilcoxon' scores. Used only for 'wilcoxon'."
|
647
|
+
)
|
648
|
+
layer: Optional[str] = Field(
|
649
|
+
default=None,
|
650
|
+
description="Key from adata.layers whose value will be used to perform tests on."
|
651
|
+
)
|
652
|
+
|
653
|
+
@field_validator('method')
|
654
|
+
def validate_method(cls, v: Optional[str]) -> Optional[str]:
|
655
|
+
"""Validate method is supported"""
|
656
|
+
if v is not None:
|
657
|
+
valid_methods = ['t-test', 't-test_overestim_var', 'wilcoxon', 'logreg']
|
658
|
+
if v not in valid_methods:
|
659
|
+
raise ValueError(f"method must be one of {valid_methods}")
|
660
|
+
return v
|
661
|
+
|
662
|
+
@field_validator('corr_method')
|
663
|
+
def validate_corr_method(cls, v: str) -> str:
|
664
|
+
"""Validate correction method is supported"""
|
665
|
+
valid_methods = ['benjamini-hochberg', 'bonferroni']
|
666
|
+
if v not in valid_methods:
|
667
|
+
raise ValueError(f"corr_method must be one of {valid_methods}")
|
668
|
+
return v
|
669
|
+
|
670
|
+
@field_validator('n_genes')
|
671
|
+
def validate_n_genes(cls, v: Optional[int]) -> Optional[int]:
|
672
|
+
"""Validate n_genes is positive"""
|
673
|
+
if v is not None and v <= 0:
|
674
|
+
raise ValueError("n_genes must be a positive integer")
|
675
|
+
return v
|
676
|
+
|
677
|
+
|
678
|
+
class FilterRankGenesGroupsModel(BaseModel):
|
679
|
+
"""Input schema for filtering ranked genes groups."""
|
680
|
+
|
681
|
+
key: Optional[str] = Field(
|
682
|
+
default=None,
|
683
|
+
description="Key from adata.uns where rank_genes_groups output is stored."
|
684
|
+
)
|
685
|
+
|
686
|
+
groupby: Optional[str] = Field(
|
687
|
+
default=None,
|
688
|
+
description="The key of the observations grouping to consider."
|
689
|
+
)
|
690
|
+
|
691
|
+
use_raw: Optional[bool] = Field(
|
692
|
+
default=None,
|
693
|
+
description="Use raw attribute of adata if present."
|
694
|
+
)
|
695
|
+
|
696
|
+
key_added: str = Field(
|
697
|
+
default='rank_genes_groups_filtered',
|
698
|
+
description="The key in adata.uns information is saved to."
|
699
|
+
)
|
700
|
+
|
701
|
+
min_in_group_fraction: float = Field(
|
702
|
+
default=0.25,
|
703
|
+
description="Minimum fraction of cells expressing the gene within the group.",
|
704
|
+
ge=0.0,
|
705
|
+
le=1.0
|
706
|
+
)
|
707
|
+
|
708
|
+
min_fold_change: Union[int, float] = Field(
|
709
|
+
default=1,
|
710
|
+
description="Minimum fold change for a gene to be considered significant.",
|
711
|
+
gt=0
|
712
|
+
)
|
713
|
+
|
714
|
+
max_out_group_fraction: float = Field(
|
715
|
+
default=0.5,
|
716
|
+
description="Maximum fraction of cells expressing the gene outside the group.",
|
717
|
+
ge=0.0,
|
718
|
+
le=1.0
|
719
|
+
)
|
720
|
+
|
721
|
+
compare_abs: bool = Field(
|
722
|
+
default=False,
|
723
|
+
description="If True, compare absolute values of log fold change with min_fold_change."
|
724
|
+
)
|
725
|
+
|
726
|
+
@field_validator('min_in_group_fraction', 'max_out_group_fraction')
|
727
|
+
def validate_fractions(cls, v: float) -> float:
|
728
|
+
"""Validate fractions are between 0 and 1"""
|
729
|
+
if v < 0 or v > 1:
|
730
|
+
raise ValueError("Fraction values must be between 0 and 1")
|
731
|
+
return v
|
732
|
+
|
733
|
+
@field_validator('min_fold_change')
|
734
|
+
def validate_fold_change(cls, v: Union[int, float]) -> Union[int, float]:
|
735
|
+
"""Validate min_fold_change is positive"""
|
736
|
+
if v <= 0:
|
737
|
+
raise ValueError("min_fold_change must be a positive number")
|
738
|
+
return v
|
739
|
+
|
740
|
+
|
741
|
+
class MarkerGeneOverlapModel(BaseModel):
|
742
|
+
"""Input schema for the marker gene overlap tool."""
|
743
|
+
|
744
|
+
key: str = Field(
|
745
|
+
default='rank_genes_groups',
|
746
|
+
description="The key in adata.uns where the rank_genes_groups output is stored."
|
747
|
+
)
|
748
|
+
|
749
|
+
method: str = Field(
|
750
|
+
default='overlap_count',
|
751
|
+
description="Method to calculate marker gene overlap: 'overlap_count', 'overlap_coef', or 'jaccard'."
|
752
|
+
)
|
753
|
+
|
754
|
+
normalize: Optional[Literal['reference', 'data']] = Field(
|
755
|
+
default=None,
|
756
|
+
description="Normalization option for the marker gene overlap output. Only applicable when method is 'overlap_count'."
|
757
|
+
)
|
758
|
+
|
759
|
+
top_n_markers: Optional[int] = Field(
|
760
|
+
default=None,
|
761
|
+
description="The number of top data-derived marker genes to use. By default the top 100 marker genes are used.",
|
762
|
+
gt=0
|
763
|
+
)
|
764
|
+
|
765
|
+
adj_pval_threshold: Optional[float] = Field(
|
766
|
+
default=None,
|
767
|
+
description="A significance threshold on the adjusted p-values to select marker genes.",
|
768
|
+
gt=0,
|
769
|
+
le=1.0
|
770
|
+
)
|
771
|
+
|
772
|
+
key_added: str = Field(
|
773
|
+
default='marker_gene_overlap',
|
774
|
+
description="Name of the .uns field that will contain the marker overlap scores."
|
775
|
+
)
|
776
|
+
|
777
|
+
@field_validator('method')
|
778
|
+
def validate_method(cls, v: str) -> str:
|
779
|
+
"""Validate method is supported"""
|
780
|
+
valid_methods = ['overlap_count', 'overlap_coef', 'jaccard']
|
781
|
+
if v not in valid_methods:
|
782
|
+
raise ValueError(f"method must be one of {valid_methods}")
|
783
|
+
return v
|
784
|
+
|
785
|
+
@field_validator('normalize')
|
786
|
+
def validate_normalize(cls, v: Optional[str], info: ValidationInfo) -> Optional[str]:
|
787
|
+
"""Validate normalize is only used with overlap_count method"""
|
788
|
+
if v is not None:
|
789
|
+
if v not in ['reference', 'data']:
|
790
|
+
raise ValueError("normalize must be either 'reference' or 'data'")
|
791
|
+
|
792
|
+
values = info.data
|
793
|
+
if 'method' in values and values['method'] != 'overlap_count':
|
794
|
+
raise ValueError("normalize can only be used when method is 'overlap_count'")
|
795
|
+
return v
|
796
|
+
|
797
|
+
@field_validator('top_n_markers')
|
798
|
+
def validate_top_n_markers(cls, v: Optional[int]) -> Optional[int]:
|
799
|
+
"""Validate top_n_markers is positive"""
|
800
|
+
if v is not None and v <= 0:
|
801
|
+
raise ValueError("top_n_markers must be a positive integer")
|
802
|
+
return v
|
803
|
+
|
804
|
+
@field_validator('adj_pval_threshold')
|
805
|
+
def validate_adj_pval_threshold(cls, v: Optional[float]) -> Optional[float]:
|
806
|
+
"""Validate adj_pval_threshold is between 0 and 1"""
|
807
|
+
if v is not None and (v <= 0 or v > 1):
|
808
|
+
raise ValueError("adj_pval_threshold must be between 0 and 1")
|
809
|
+
return v
|
810
|
+
|
811
|
+
|
812
|
+
class ScoreGenesModel(BaseModel):
|
813
|
+
"""Input schema for the score_genes tool that calculates gene scores based on average expression."""
|
814
|
+
|
815
|
+
ctrl_size: int = Field(
|
816
|
+
default=50,
|
817
|
+
description="Number of reference genes to be sampled from each bin.",
|
818
|
+
gt=0
|
819
|
+
)
|
820
|
+
|
821
|
+
gene_pool: Optional[List[str]] = Field(
|
822
|
+
default=None,
|
823
|
+
description="Genes for sampling the reference set. Default is all genes."
|
824
|
+
)
|
825
|
+
|
826
|
+
n_bins: int = Field(
|
827
|
+
default=25,
|
828
|
+
description="Number of expression level bins for sampling.",
|
829
|
+
gt=0
|
830
|
+
)
|
831
|
+
|
832
|
+
score_name: str = Field(
|
833
|
+
default='score',
|
834
|
+
description="Name of the field to be added in .obs."
|
835
|
+
)
|
836
|
+
|
837
|
+
random_state: int = Field(
|
838
|
+
default=0,
|
839
|
+
description="The random seed for sampling."
|
840
|
+
)
|
841
|
+
|
842
|
+
use_raw: Optional[bool] = Field(
|
843
|
+
default=None,
|
844
|
+
description="Whether to use raw attribute of adata. Defaults to True if .raw is present."
|
845
|
+
)
|
846
|
+
|
847
|
+
@field_validator('ctrl_size', 'n_bins')
|
848
|
+
def validate_positive_integers(cls, v: int) -> int:
|
849
|
+
"""Validate positive integers"""
|
850
|
+
if v <= 0:
|
851
|
+
raise ValueError("must be a positive integer")
|
852
|
+
return v
|
853
|
+
|
854
|
+
|
855
|
+
class ScoreGenesCellCycleModel(BaseModel):
|
856
|
+
"""Input schema for the score_genes_cell_cycle tool that scores cell cycle genes."""
|
857
|
+
|
858
|
+
s_genes: List[str] = Field(
|
859
|
+
..., # Required field
|
860
|
+
description="List of genes associated with S phase."
|
861
|
+
)
|
862
|
+
g2m_genes: List[str] = Field(
|
863
|
+
..., # Required field
|
864
|
+
description="List of genes associated with G2M phase."
|
865
|
+
)
|
866
|
+
gene_pool: Optional[List[str]] = Field(
|
867
|
+
default=None,
|
868
|
+
description="Genes for sampling the reference set. Default is all genes."
|
869
|
+
)
|
870
|
+
n_bins: int = Field(
|
871
|
+
default=25,
|
872
|
+
description="Number of expression level bins for sampling.",
|
873
|
+
gt=0
|
874
|
+
)
|
875
|
+
score_name: Optional[str] = Field(
|
876
|
+
default=None,
|
877
|
+
description="Name of the field to be added in .obs. If None, the scores are added as 'S_score' and 'G2M_score'."
|
878
|
+
)
|
879
|
+
random_state: int = Field(
|
880
|
+
default=0,
|
881
|
+
description="The random seed for sampling."
|
882
|
+
)
|
883
|
+
use_raw: Optional[bool] = Field(
|
884
|
+
default=None,
|
885
|
+
description="Whether to use raw attribute of adata. Defaults to True if .raw is present."
|
886
|
+
)
|
887
|
+
|
888
|
+
@field_validator('s_genes', 'g2m_genes')
|
889
|
+
def validate_gene_lists(cls, v: List[str]) -> List[str]:
|
890
|
+
"""Validate gene lists are not empty"""
|
891
|
+
if len(v) == 0:
|
892
|
+
raise ValueError("Gene list cannot be empty")
|
893
|
+
return v
|
894
|
+
|
895
|
+
@field_validator('n_bins')
|
896
|
+
def validate_positive_integers(cls, v: int) -> int:
|
897
|
+
"""Validate positive integers"""
|
898
|
+
if v <= 0:
|
899
|
+
raise ValueError("n_bins must be a positive integer")
|
900
|
+
return v
|
901
|
+
|
902
|
+
|