PyPI - scmcp-shared - Versions diffs - 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

scmcp-shared 0.4.0py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

scmcp_shared/__init__.py +1 -3
scmcp_shared/agent.py +38 -21
scmcp_shared/backend.py +44 -0
scmcp_shared/cli.py +75 -46
scmcp_shared/kb.py +139 -0
scmcp_shared/logging_config.py +6 -8
scmcp_shared/mcp_base.py +184 -0
scmcp_shared/schema/io.py +101 -59
scmcp_shared/schema/pl.py +386 -490
scmcp_shared/schema/pp.py +514 -265
scmcp_shared/schema/preset/__init__.py +15 -0
scmcp_shared/schema/preset/io.py +103 -0
scmcp_shared/schema/preset/pl.py +843 -0
scmcp_shared/schema/preset/pp.py +616 -0
scmcp_shared/schema/preset/tl.py +917 -0
scmcp_shared/schema/preset/util.py +123 -0
scmcp_shared/schema/tl.py +355 -407
scmcp_shared/schema/util.py +57 -72
scmcp_shared/server/__init__.py +5 -10
scmcp_shared/server/auto.py +15 -11
scmcp_shared/server/code.py +3 -0
scmcp_shared/server/preset/__init__.py +14 -0
scmcp_shared/server/{io.py → preset/io.py} +26 -22
scmcp_shared/server/{pl.py → preset/pl.py} +162 -78
scmcp_shared/server/{pp.py → preset/pp.py} +123 -65
scmcp_shared/server/{tl.py → preset/tl.py} +142 -79
scmcp_shared/server/{util.py → preset/util.py} +123 -66
scmcp_shared/server/rag.py +13 -0
scmcp_shared/util.py +109 -38
{scmcp_shared-0.4.0.dist-info → scmcp_shared-0.6.0.dist-info}/METADATA +6 -2
scmcp_shared-0.6.0.dist-info/RECORD +35 -0
scmcp_shared/server/base.py +0 -148
scmcp_shared-0.4.0.dist-info/RECORD +0 -24
{scmcp_shared-0.4.0.dist-info → scmcp_shared-0.6.0.dist-info}/WHEEL +0 -0
{scmcp_shared-0.4.0.dist-info → scmcp_shared-0.6.0.dist-info}/licenses/LICENSE +0 -0

scmcp_shared/schema/io.py CHANGED Viewed

@@ -1,113 +1,155 @@
-from pydantic import (
-    Field,
-    field_validator,
-    model_validator, BaseModel
-)
+from pydantic import Field, field_validator, model_validator, BaseModel
 from typing import Optional, Literal
-class ReadParams(BaseModel):
+class ReadParam(BaseModel):
     """Input schema for the read tool."""
+    adata: str = Field(
+        ..., description="The AnnData object variable name will be stored in."
+    )
     filename: str = Field(
         ...,
-        description="Path to the file to read."
+        description="Path to the file to read. If the filename has no file extension, it is interpreted as a key for generating a filename via sc.settings.writedir / (filename + sc.settings.file_format_data).",
     )
-    backed: Literal['r', 'r+'] = Field(
+    backed: Optional[Literal["r", "r+"]] = Field(
         default=None,
-        description="If 'r', load AnnData in 'backed' mode instead of fully loading it into memory ('memory' mode). If you want to modify backed attributes of the AnnData object, you need to choose 'r+'."
+        description="If 'r', load AnnData in backed mode instead of fully loading it into memory (memory mode). If you want to modify backed attributes of the AnnData object, you need to choose 'r+'.",
     )
-    sheet: str = Field(
+    sheet: Optional[str] = Field(
+        default=None, description="Name of sheet/table in hdf5 or Excel file."
+    )
+    ext: Optional[str] = Field(
         default=None,
-        description="Name of sheet/table in hdf5 or Excel file."
+        description="Extension that indicates the file type. If None, uses extension of filename.",
     )
-    delimiter: str = Field(
+    delimiter: Optional[str] = Field(
         default=None,
-        description="Delimiter that separates data within text file. If None, will split at arbitrary number of white spaces, which is different from enforcing splitting at any single white space."
+        description="Delimiter that separates data within text file. If None, will split at arbitrary number of white spaces, which is different from enforcing splitting at any single white space ' '.",
     )
     first_column_names: bool = Field(
         default=False,
-        description="Assume the first column stores row names. This is only necessary if these are not strings: strings in the first column are automatically assumed to be row names."
-    )
-    transpose: bool = Field(
-        default=False,
-        description="If True, the data will be transposed."
+        description="Assume the first column stores row names. This is only necessary if these are not strings: strings in the first column are automatically assumed to be row names.",
     )
-    backup_url: str = Field(
+    backup_url: Optional[str] = Field(
         default=None,
-        description="Retrieve the file from an URL if not present on disk."
+        description="Retrieve the file from an URL if not present on disk.",
     )
     cache: bool = Field(
         default=False,
-        description="If False, read from source, if True, read from fast 'h5ad' cache."
+        description="If False, read from source, if True, read from fast 'h5ad' cache.",
     )
-    cache_compression: Literal['gzip', 'lzf'] = Field(
+    cache_compression: Optional[Literal["gzip", "lzf"]] = Field(
         default=None,
-        description="See the h5py dataset_compression. (Default: settings.cache_compression)"
+        description="See the h5py Filter pipeline. (Default: settings.cache_compression)",
+    )
+    @field_validator("backed")
+    def validate_backed(cls, v: Optional[str]) -> Optional[str]:
+        if v is not None and v not in ["r", "r+"]:
+            raise ValueError("If backed is provided, it must be either 'r' or 'r+'")
+        return v
+    @field_validator("cache_compression")
+    def validate_cache_compression(cls, v: Optional[str]) -> Optional[str]:
+        if v is not None and v not in ["gzip", "lzf"]:
+            raise ValueError("cache_compression must be either 'gzip', 'lzf', or None")
+        return v
+class Read10xMtxParam(BaseModel):
+    """Input schema for the read_10x_mtx tool."""
+    path: str = Field(
+        ...,
+        description="Path to directory for .mtx and .tsv files, e.g. './filtered_gene_bc_matrices/hg19/'.",
     )
-    var_names: str = Field(
-        default="gene_symbols",
-        description="The variables index for 10x mtx format. Either 'gene_symbols' or 'gene_ids'."
+    var_names: Literal["gene_symbols", "gene_ids"] = Field(
+        default="gene_symbols", description="The variables index."
     )
     make_unique: bool = Field(
         default=True,
-        description="Whether to make the variables index unique by appending '-1', '-2' etc. or not. Used for 10x mtx format."
+        description="Whether to make the variables index unique by appending '-1', '-2' etc. or not.",
+    )
+    cache: bool = Field(
+        default=False,
+        description="If False, read from source, if True, read from fast 'h5ad' cache.",
     )
+    cache_compression: Optional[Literal["gzip", "lzf"]] = Field(
+        default=None,
+        description="See the h5py Filter pipeline. (Default: settings.cache_compression)",
+    )
     gex_only: bool = Field(
         default=True,
-        description="Only keep 'Gene Expression' data and ignore other feature types, e.g. 'Antibody Capture', 'CRISPR Guide Capture', or 'Custom'. Used for 10x formats."
+        description="Only keep 'Gene Expression' data and ignore other feature types, e.g. 'Antibody Capture', 'CRISPR Guide Capture', or 'Custom'",
     )
-    prefix: str = Field(
+    prefix: Optional[str] = Field(
         default=None,
-        description="Any prefix before matrix.mtx, genes.tsv and barcodes.tsv. For instance, if the files are named patientA_matrix.mtx, patientA_genes.tsv and patientA_barcodes.tsv the prefix is patientA_. Used for 10x mtx format."
+        description="Any prefix before matrix.mtx, genes.tsv and barcodes.tsv. For instance, if the files are named patientA_matrix.mtx, patientA_genes.tsv and patientA_barcodes.tsv the prefix is patientA_. (Default: no prefix)",
     )
-    @field_validator('backed')
-    def validate_backed(cls, v: Optional[str]) -> Optional[str]:
-        if v is not None and v not in ['r', 'r+']:
-            raise ValueError("If backed is provided, it must be either 'r' or 'r+'")
+    @field_validator("path")
+    def validate_path(cls, v: str) -> str:
+        """Validate path is not empty"""
+        if not v.strip():
+            raise ValueError("path cannot be empty")
+        return v
+    @field_validator("var_names")
+    def validate_var_names(cls, v: str) -> str:
+        """Validate var_names is supported"""
+        if v not in ["gene_symbols", "gene_ids"]:
+            raise ValueError("var_names must be either 'gene_symbols' or 'gene_ids'")
         return v
-    @field_validator('cache_compression')
+    @field_validator("cache_compression")
     def validate_cache_compression(cls, v: Optional[str]) -> Optional[str]:
-        if v is not None and v not in ['gzip', 'lzf']:
+        """Validate cache_compression is supported if provided"""
+        if v is not None and v not in ["gzip", "lzf"]:
             raise ValueError("cache_compression must be either 'gzip', 'lzf', or None")
         return v
-    @field_validator('var_names')
-    def validate_var_names(cls, v: Optional[str]) -> Optional[str]:
-        if v is not None and v not in ['gene_symbols', 'gene_ids']:
-            raise ValueError("var_names must be either 'gene_symbols' or 'gene_ids'")
-        return v
-class WriteParams(BaseModel):
+class WriteParam(BaseModel):
     """Input schema for the write tool."""
+    adata: str = Field(..., description="The AnnData object variable name.")
     filename: str = Field(
         description="Path to save the file. If no extension is provided, the default format will be used."
     )
-    ext: Literal['h5', 'csv', 'txt', 'npz'] = Field(
+    ext: Literal["h5", "csv", "txt", "npz"] = Field(
         default=None,
-        description="File extension to infer file format. If None, defaults to scanpy's settings.file_format_data."
+        description="File extension to infer file format. If None, defaults to scanpy's settings.file_format_data.",
     )
-    compression: Literal['gzip', 'lzf'] = Field(
-        default='gzip',
-        description="Compression format for h5 files."
+    compression: Literal["gzip", "lzf"] = Field(
+        default="gzip", description="Compression format for h5 files."
     )
     compression_opts: int = Field(
-        default=None,
-        description="Compression options for h5 files."
+        default=None, description="Compression options for h5 files."
     )
-    @field_validator('filename')
+    @field_validator("filename")
     def validate_filename(cls, v: str) -> str:
         # Allow any filename since the extension is optional and can be inferred
         return v
-    @model_validator(mode='after')
-    def validate_extension_compression(self) -> 'WriteModel':
+    @model_validator(mode="after")
+    def validate_extension_compression(self) -> "WriteParam":
         # If ext is provided and not h5, compression should be None
-        if self.ext is not None and self.ext != 'h5' and self.compression is not None:
+        if self.ext is not None and self.ext != "h5" and self.compression is not None:
             raise ValueError("Compression can only be used with h5 files")
         return self

scmcp-shared 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

scmcp-shared 0.4.0py3-none-any.whl → 0.6.0py3-none-any.whl