scmcp-shared 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scmcp_shared/__init__.py +1 -3
- scmcp_shared/agent.py +38 -21
- scmcp_shared/backend.py +44 -0
- scmcp_shared/cli.py +75 -46
- scmcp_shared/kb.py +139 -0
- scmcp_shared/logging_config.py +6 -8
- scmcp_shared/mcp_base.py +184 -0
- scmcp_shared/schema/io.py +101 -59
- scmcp_shared/schema/pl.py +386 -490
- scmcp_shared/schema/pp.py +514 -265
- scmcp_shared/schema/preset/__init__.py +15 -0
- scmcp_shared/schema/preset/io.py +103 -0
- scmcp_shared/schema/preset/pl.py +843 -0
- scmcp_shared/schema/preset/pp.py +616 -0
- scmcp_shared/schema/preset/tl.py +917 -0
- scmcp_shared/schema/preset/util.py +123 -0
- scmcp_shared/schema/tl.py +355 -407
- scmcp_shared/schema/util.py +57 -72
- scmcp_shared/server/__init__.py +5 -10
- scmcp_shared/server/auto.py +15 -11
- scmcp_shared/server/code.py +3 -0
- scmcp_shared/server/preset/__init__.py +14 -0
- scmcp_shared/server/{io.py → preset/io.py} +26 -22
- scmcp_shared/server/{pl.py → preset/pl.py} +162 -78
- scmcp_shared/server/{pp.py → preset/pp.py} +123 -65
- scmcp_shared/server/{tl.py → preset/tl.py} +142 -79
- scmcp_shared/server/{util.py → preset/util.py} +123 -66
- scmcp_shared/server/rag.py +13 -0
- scmcp_shared/util.py +109 -38
- {scmcp_shared-0.4.0.dist-info → scmcp_shared-0.6.0.dist-info}/METADATA +6 -2
- scmcp_shared-0.6.0.dist-info/RECORD +35 -0
- scmcp_shared/server/base.py +0 -148
- scmcp_shared-0.4.0.dist-info/RECORD +0 -24
- {scmcp_shared-0.4.0.dist-info → scmcp_shared-0.6.0.dist-info}/WHEEL +0 -0
- {scmcp_shared-0.4.0.dist-info → scmcp_shared-0.6.0.dist-info}/licenses/LICENSE +0 -0
scmcp_shared/schema/io.py
CHANGED
@@ -1,113 +1,155 @@
|
|
1
|
-
from pydantic import
|
2
|
-
Field,
|
3
|
-
field_validator,
|
4
|
-
model_validator, BaseModel
|
5
|
-
)
|
1
|
+
from pydantic import Field, field_validator, model_validator, BaseModel
|
6
2
|
from typing import Optional, Literal
|
7
3
|
|
8
4
|
|
9
|
-
|
10
|
-
class ReadParams(BaseModel):
|
5
|
+
class ReadParam(BaseModel):
|
11
6
|
"""Input schema for the read tool."""
|
7
|
+
|
8
|
+
adata: str = Field(
|
9
|
+
..., description="The AnnData object variable name will be stored in."
|
10
|
+
)
|
12
11
|
filename: str = Field(
|
13
12
|
...,
|
14
|
-
description="Path to the file to read."
|
13
|
+
description="Path to the file to read. If the filename has no file extension, it is interpreted as a key for generating a filename via sc.settings.writedir / (filename + sc.settings.file_format_data).",
|
15
14
|
)
|
16
15
|
|
17
|
-
backed: Literal[
|
16
|
+
backed: Optional[Literal["r", "r+"]] = Field(
|
18
17
|
default=None,
|
19
|
-
description="If 'r', load AnnData in
|
18
|
+
description="If 'r', load AnnData in backed mode instead of fully loading it into memory (memory mode). If you want to modify backed attributes of the AnnData object, you need to choose 'r+'.",
|
20
19
|
)
|
21
|
-
|
20
|
+
|
21
|
+
sheet: Optional[str] = Field(
|
22
|
+
default=None, description="Name of sheet/table in hdf5 or Excel file."
|
23
|
+
)
|
24
|
+
|
25
|
+
ext: Optional[str] = Field(
|
22
26
|
default=None,
|
23
|
-
description="
|
27
|
+
description="Extension that indicates the file type. If None, uses extension of filename.",
|
24
28
|
)
|
25
|
-
|
29
|
+
|
30
|
+
delimiter: Optional[str] = Field(
|
26
31
|
default=None,
|
27
|
-
description="Delimiter that separates data within text file. If None, will split at arbitrary number of white spaces, which is different from enforcing splitting at any single white space."
|
32
|
+
description="Delimiter that separates data within text file. If None, will split at arbitrary number of white spaces, which is different from enforcing splitting at any single white space ' '.",
|
28
33
|
)
|
34
|
+
|
29
35
|
first_column_names: bool = Field(
|
30
36
|
default=False,
|
31
|
-
description="Assume the first column stores row names. This is only necessary if these are not strings: strings in the first column are automatically assumed to be row names."
|
32
|
-
)
|
33
|
-
transpose: bool = Field(
|
34
|
-
default=False,
|
35
|
-
description="If True, the data will be transposed."
|
37
|
+
description="Assume the first column stores row names. This is only necessary if these are not strings: strings in the first column are automatically assumed to be row names.",
|
36
38
|
)
|
37
|
-
|
39
|
+
|
40
|
+
backup_url: Optional[str] = Field(
|
38
41
|
default=None,
|
39
|
-
description="Retrieve the file from an URL if not present on disk."
|
42
|
+
description="Retrieve the file from an URL if not present on disk.",
|
40
43
|
)
|
44
|
+
|
41
45
|
cache: bool = Field(
|
42
46
|
default=False,
|
43
|
-
description="If False, read from source, if True, read from fast 'h5ad' cache."
|
47
|
+
description="If False, read from source, if True, read from fast 'h5ad' cache.",
|
44
48
|
)
|
45
|
-
|
49
|
+
|
50
|
+
cache_compression: Optional[Literal["gzip", "lzf"]] = Field(
|
46
51
|
default=None,
|
47
|
-
description="See the h5py
|
52
|
+
description="See the h5py Filter pipeline. (Default: settings.cache_compression)",
|
53
|
+
)
|
54
|
+
|
55
|
+
@field_validator("backed")
|
56
|
+
def validate_backed(cls, v: Optional[str]) -> Optional[str]:
|
57
|
+
if v is not None and v not in ["r", "r+"]:
|
58
|
+
raise ValueError("If backed is provided, it must be either 'r' or 'r+'")
|
59
|
+
return v
|
60
|
+
|
61
|
+
@field_validator("cache_compression")
|
62
|
+
def validate_cache_compression(cls, v: Optional[str]) -> Optional[str]:
|
63
|
+
if v is not None and v not in ["gzip", "lzf"]:
|
64
|
+
raise ValueError("cache_compression must be either 'gzip', 'lzf', or None")
|
65
|
+
return v
|
66
|
+
|
67
|
+
|
68
|
+
class Read10xMtxParam(BaseModel):
|
69
|
+
"""Input schema for the read_10x_mtx tool."""
|
70
|
+
|
71
|
+
path: str = Field(
|
72
|
+
...,
|
73
|
+
description="Path to directory for .mtx and .tsv files, e.g. './filtered_gene_bc_matrices/hg19/'.",
|
48
74
|
)
|
49
|
-
|
50
|
-
|
51
|
-
description="The variables index
|
75
|
+
|
76
|
+
var_names: Literal["gene_symbols", "gene_ids"] = Field(
|
77
|
+
default="gene_symbols", description="The variables index."
|
52
78
|
)
|
79
|
+
|
53
80
|
make_unique: bool = Field(
|
54
81
|
default=True,
|
55
|
-
description="Whether to make the variables index unique by appending '-1', '-2' etc. or not.
|
82
|
+
description="Whether to make the variables index unique by appending '-1', '-2' etc. or not.",
|
83
|
+
)
|
84
|
+
|
85
|
+
cache: bool = Field(
|
86
|
+
default=False,
|
87
|
+
description="If False, read from source, if True, read from fast 'h5ad' cache.",
|
56
88
|
)
|
89
|
+
|
90
|
+
cache_compression: Optional[Literal["gzip", "lzf"]] = Field(
|
91
|
+
default=None,
|
92
|
+
description="See the h5py Filter pipeline. (Default: settings.cache_compression)",
|
93
|
+
)
|
94
|
+
|
57
95
|
gex_only: bool = Field(
|
58
96
|
default=True,
|
59
|
-
description="Only keep 'Gene Expression' data and ignore other feature types, e.g. 'Antibody Capture', 'CRISPR Guide Capture', or 'Custom'
|
97
|
+
description="Only keep 'Gene Expression' data and ignore other feature types, e.g. 'Antibody Capture', 'CRISPR Guide Capture', or 'Custom'",
|
60
98
|
)
|
61
|
-
|
99
|
+
|
100
|
+
prefix: Optional[str] = Field(
|
62
101
|
default=None,
|
63
|
-
description="Any prefix before matrix.mtx, genes.tsv and barcodes.tsv. For instance, if the files are named patientA_matrix.mtx, patientA_genes.tsv and patientA_barcodes.tsv the prefix is patientA_.
|
102
|
+
description="Any prefix before matrix.mtx, genes.tsv and barcodes.tsv. For instance, if the files are named patientA_matrix.mtx, patientA_genes.tsv and patientA_barcodes.tsv the prefix is patientA_. (Default: no prefix)",
|
64
103
|
)
|
65
|
-
|
66
|
-
@field_validator(
|
67
|
-
def
|
68
|
-
|
69
|
-
|
104
|
+
|
105
|
+
@field_validator("path")
|
106
|
+
def validate_path(cls, v: str) -> str:
|
107
|
+
"""Validate path is not empty"""
|
108
|
+
if not v.strip():
|
109
|
+
raise ValueError("path cannot be empty")
|
110
|
+
return v
|
111
|
+
|
112
|
+
@field_validator("var_names")
|
113
|
+
def validate_var_names(cls, v: str) -> str:
|
114
|
+
"""Validate var_names is supported"""
|
115
|
+
if v not in ["gene_symbols", "gene_ids"]:
|
116
|
+
raise ValueError("var_names must be either 'gene_symbols' or 'gene_ids'")
|
70
117
|
return v
|
71
|
-
|
72
|
-
@field_validator(
|
118
|
+
|
119
|
+
@field_validator("cache_compression")
|
73
120
|
def validate_cache_compression(cls, v: Optional[str]) -> Optional[str]:
|
74
|
-
|
121
|
+
"""Validate cache_compression is supported if provided"""
|
122
|
+
if v is not None and v not in ["gzip", "lzf"]:
|
75
123
|
raise ValueError("cache_compression must be either 'gzip', 'lzf', or None")
|
76
124
|
return v
|
77
|
-
|
78
|
-
@field_validator('var_names')
|
79
|
-
def validate_var_names(cls, v: Optional[str]) -> Optional[str]:
|
80
|
-
if v is not None and v not in ['gene_symbols', 'gene_ids']:
|
81
|
-
raise ValueError("var_names must be either 'gene_symbols' or 'gene_ids'")
|
82
|
-
return v
|
83
125
|
|
84
126
|
|
85
|
-
class
|
127
|
+
class WriteParam(BaseModel):
|
86
128
|
"""Input schema for the write tool."""
|
129
|
+
|
130
|
+
adata: str = Field(..., description="The AnnData object variable name.")
|
87
131
|
filename: str = Field(
|
88
132
|
description="Path to save the file. If no extension is provided, the default format will be used."
|
89
133
|
)
|
90
|
-
ext: Literal[
|
134
|
+
ext: Literal["h5", "csv", "txt", "npz"] = Field(
|
91
135
|
default=None,
|
92
|
-
description="File extension to infer file format. If None, defaults to scanpy's settings.file_format_data."
|
136
|
+
description="File extension to infer file format. If None, defaults to scanpy's settings.file_format_data.",
|
93
137
|
)
|
94
|
-
compression: Literal[
|
95
|
-
default=
|
96
|
-
description="Compression format for h5 files."
|
138
|
+
compression: Literal["gzip", "lzf"] = Field(
|
139
|
+
default="gzip", description="Compression format for h5 files."
|
97
140
|
)
|
98
141
|
compression_opts: int = Field(
|
99
|
-
default=None,
|
100
|
-
description="Compression options for h5 files."
|
142
|
+
default=None, description="Compression options for h5 files."
|
101
143
|
)
|
102
|
-
|
103
|
-
@field_validator(
|
144
|
+
|
145
|
+
@field_validator("filename")
|
104
146
|
def validate_filename(cls, v: str) -> str:
|
105
147
|
# Allow any filename since the extension is optional and can be inferred
|
106
148
|
return v
|
107
|
-
|
108
|
-
@model_validator(mode=
|
109
|
-
def validate_extension_compression(self) ->
|
149
|
+
|
150
|
+
@model_validator(mode="after")
|
151
|
+
def validate_extension_compression(self) -> "WriteParam":
|
110
152
|
# If ext is provided and not h5, compression should be None
|
111
|
-
if self.ext is not None and self.ext !=
|
153
|
+
if self.ext is not None and self.ext != "h5" and self.compression is not None:
|
112
154
|
raise ValueError("Compression can only be used with h5 files")
|
113
155
|
return self
|