PyPI - scdataloader - Versions diffs - 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

scdataloader 1.2.1py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

scdataloader/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 1.2.1
1	+ 1.2.2

scdataloader/__main__.py CHANGED Viewed

@@ -53,14 +53,14 @@ def main():
     )
     parser.add_argument(
         "--filter_gene_by_counts",
-        type=Union[int, bool],
-        default=False,
+        type=int,
+        default=0,
         help="Determines whether to filter genes by counts.",
     )
     parser.add_argument(
         "--filter_cell_by_counts",
-        type=Union[int, bool],
-        default=False,
+        type=int,
+        default=0,
         help="Determines whether to filter cells by counts.",
     )
     parser.add_argument(
@@ -153,6 +153,12 @@ def main():
         default=False,
         help="Determines whether to do postprocessing.",
     )
+    parser.add_argument(
+        "--cache",
+        type=bool,
+        default=True,
+        help="Determines whether to cache the dataset.",
+    )
     args = parser.parse_args()
     # Load the collection
@@ -178,6 +184,7 @@ def main():
         normalize_sum=args.normalize_sum,
         subset_hvg=args.subset_hvg,
         hvg_flavor=args.hvg_flavor,
+        cache=args.cache,
         binning=args.binning,
         result_binned_key=args.result_binned_key,
         length_normalize=args.length_normalize,

scdataloader/utils.py CHANGED Viewed

@@ -127,15 +127,15 @@ def getBiomartTable(
     cache_folder = os.path.expanduser(cache_folder)
     createFoldersFor(cache_folder)
-    cachefile = os.path.join(cache_folder, ".biomart.csv")
+    cachefile = os.path.join(cache_folder, ".biomart.parquet")
     if useCache & os.path.isfile(cachefile):
         print("fetching gene names from biomart cache")
-        res = pd.read_csv(cachefile)
+        res = pd.read_parquet(cachefile)
     else:
         print("downloading gene names from biomart")
         res = _fetchFromServer(ensemble_server, attr + attributes, database=database)
-        res.to_csv(cachefile, index=False)
+        res.to_parquet(cachefile, index=False)
     res.columns = attr + attributes
     if type(res) is not type(pd.DataFrame()):
         raise ValueError("should be a dataframe")
@@ -368,7 +368,14 @@ def load_genes(organisms: Union[str, list] = "NCBITaxon:9606"):  # "NCBITaxon:10
         genesdf["organism"] = organism
         organismdf.append(genesdf)
     organismdf = pd.concat(organismdf)
-    for col in ["source_id", "run_id", "created_by_id", "updated_at", "stable_id", "created_at"]:
+    for col in [
+        "source_id",
+        "run_id",
+        "created_by_id",
+        "updated_at",
+        "stable_id",
+        "created_at",
+    ]:
         if col in organismdf.columns:
             organismdf.drop(columns=[col], inplace=True)
     return organismdf
@@ -412,7 +419,7 @@ def populate_my_ontology(
     # cell type
     if celltypes is not None:
         if len(celltypes) == 0:
-            bt.CellType.import_from_source()
+            bt.CellType.import_from_source(update=True)
         else:
             names = bt.CellType.public().df().index if not celltypes else celltypes
             records = bt.CellType.from_values(names, field="ontology_id")
@@ -446,7 +453,7 @@ def populate_my_ontology(
     # ethnicity
     if ethnicities is not None:
         if len(ethnicities) == 0:
-            bt.Ethnicity.import_from_source()
+            bt.Ethnicity.import_from_source(update=True)
         else:
             names = bt.Ethnicity.public().df().index if not ethnicities else ethnicities
             records = bt.Ethnicity.from_values(names, field="ontology_id")
@@ -457,7 +464,7 @@ def populate_my_ontology(
     # ExperimentalFactor
     if assays is not None:
         if len(assays) == 0:
-            bt.ExperimentalFactor.import_from_source()
+            bt.ExperimentalFactor.import_from_source(update=True)
         else:
             names = bt.ExperimentalFactor.public().df().index if not assays else assays
             records = bt.ExperimentalFactor.from_values(names, field="ontology_id")
@@ -468,7 +475,7 @@ def populate_my_ontology(
     # Tissue
     if tissues is not None:
         if len(tissues) == 0:
-            bt.Tissue.import_from_source()
+            bt.Tissue.import_from_source(update=True)
         else:
             names = bt.Tissue.public().df().index if not tissues else tissues
             records = bt.Tissue.from_values(names, field="ontology_id")
@@ -477,7 +484,7 @@ def populate_my_ontology(
     # DevelopmentalStage
     if dev_stages is not None:
         if len(dev_stages) == 0:
-            bt.DevelopmentalStage.import_from_source()
+            bt.DevelopmentalStage.import_from_source(update=True)
             source = bt.PublicSource.filter(organism="mouse", name="mmusdv").last()
             bt.DevelopmentalStage.import_from_source(source=source)
         else:
@@ -493,7 +500,7 @@ def populate_my_ontology(
     # Disease
     if diseases is not None:
         if len(diseases) == 0:
-            bt.Disease.import_from_source()
+            bt.Disease.import_from_source(update=True)
         else:
             names = bt.Disease.public().df().index if not diseases else diseases
             records = bt.Disease.from_values(names, field="ontology_id")

{scdataloader-1.2.1.dist-info → scdataloader-1.2.2.dist-info}/METADATA RENAMED Viewed

@@ -1,11 +1,10 @@
 Metadata-Version: 2.3
 Name: scdataloader
-Version: 1.2.1
+Version: 1.2.2
 Summary: a dataloader for single cell data in lamindb
 Project-URL: repository, https://github.com/jkobject/scDataLoader
 Author-email: jkobject <jkobject@gmail.com>
-License-Expression: MIT
-License-File: LICENSE
+License: MIT
 Keywords: dataloader,lamindb,pytorch,scPRINT,scRNAseq
 Requires-Python: <3.11,>=3.10
 Requires-Dist: anndata>=0.9.0
@@ -18,6 +17,7 @@ Requires-Dist: leidenalg>=0.8.0
 Requires-Dist: lightning>=2.0.0
 Requires-Dist: matplotlib>=3.5.0
 Requires-Dist: numpy>=1.26.0
+Requires-Dist: palantir>=1.3.3
 Requires-Dist: pandas>=2.0.0
 Requires-Dist: scikit-misc>=0.5.0
 Requires-Dist: seaborn>=0.11.0

{scdataloader-1.2.1.dist-info → scdataloader-1.2.2.dist-info}/RECORD RENAMED Viewed

@@ -1,14 +1,14 @@
-scdataloader/VERSION,sha256=bPTghLR_M8mwLveSedFXgzho-PcFFBaadovjU-4yj-o,6
+scdataloader/VERSION,sha256=xipcxhrEUlk1dT9ewoTAoFKksdpLOjWA3OK313ohVK4,6
 scdataloader/__init__.py,sha256=5y9VzRhOAUWeYMn2MrRRRlzgdiMjRFytr7gcn-I6IkE,147
-scdataloader/__main__.py,sha256=Hu7Bnc7P4UfOzNWyDAVoNZsItgy27hldaw3y8OS3gPM,6387
+scdataloader/__main__.py,sha256=VXrt2IykBypnIXWydwA7NfF7LtRGc-0Khjtm5OIBNpI,6527
 scdataloader/base.py,sha256=M1gD59OffRdLOgS1vHKygOomUoAMuzjpRtAfM3SBKF8,338
 scdataloader/collator.py,sha256=gzHiuixUwK8JClhAbG12kgWMU_VTKkowibA-tDFpbwo,11341
 scdataloader/config.py,sha256=rrW2DZxG4J2_pmpDbXXsaKJkpNC57w5dIlItiFbANYw,2905
 scdataloader/data.py,sha256=3dCp-lIAfOkCi76SH5W3iSqFmAWZslwARkN9v5mylz8,14907
 scdataloader/datamodule.py,sha256=B-udBevPSPF__hfy0pOz1dGovgE95K2pxPupjB7RblI,16936
 scdataloader/preprocess.py,sha256=pH4EPrcRqH34o3t5X3A4kETiYdCZngih5SdP_PPfgOo,29178
-scdataloader/utils.py,sha256=5-6CnI3Utn5XFpqgZiJa0MT6gfvkFNg078SgrE6P4s8,22365
-scdataloader-1.2.1.dist-info/METADATA,sha256=JeE7j8HkByp_MMGVXp4GOvpdkjIjoyEoByXA-FWISuk,9802
-scdataloader-1.2.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-scdataloader-1.2.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-scdataloader-1.2.1.dist-info/RECORD,,
+scdataloader/utils.py,sha256=7tgt3sPj_XTKb-UlJDAZWvQr0_DG9VTC6ioiLdBWFFE,22498
+scdataloader-1.2.2.dist-info/METADATA,sha256=XMtKO9ImiyY--F92njvMUe69OaJgDx8C3xQtBAXqo8g,9800
+scdataloader-1.2.2.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
+scdataloader-1.2.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+scdataloader-1.2.2.dist-info/RECORD,,

{scdataloader-1.2.1.dist-info → scdataloader-1.2.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: hatchling 1.25.0
+Generator: hatchling 1.26.3
 Root-Is-Purelib: true
 Tag: py3-none-any

{scdataloader-1.2.1.dist-info → scdataloader-1.2.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

scdataloader 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl

scdataloader 1.2.1py3-none-any.whl → 1.2.2py3-none-any.whl