PyPI - immucellai2 - Versions diffs - 2.1.33__tar.gz → 2.1.34__tar.gz - Mend

immucellai2 2.1.33tar.gz → 2.1.34tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{immucellai2-2.1.33 → immucellai2-2.1.34}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: immucellai2
-Version: 2.1.33
+Version: 2.1.34
 Summary: A tool for immune cell type deconvolution
 Home-page: https://github.com/VyvyanYjm/ImmuCellAI2.0
 Author: YangJingmin

{immucellai2-2.1.33 → immucellai2-2.1.34}/immucellai2/Deconvolution.py RENAMED Viewed

@@ -8,9 +8,13 @@ from immucellai2.myclasses import CLASS_FOR_RUN, CLASS_FOR_RUNRESULT, CLASS_FOR_
 import scipy
 import os
 import tqdm
+import joblib
+import dask
+import dask.delayed
+import dask.multiprocessing
+from dask.distributed import Client, LocalCluster
 from concurrent.futures import ProcessPoolExecutor, as_completed
 from itertools import chain
-import joblib
 import multiprocessing
 from copy import deepcopy
@@ -22,7 +26,6 @@ class Move:
         raise NotImplementedError
     def propose(self, state, random_state):
         raise NotImplementedError
 class GibbsMHMove(Move):
     def __init__(self, phi, alpha, sp):
         self.phi = phi
@@ -34,13 +37,12 @@ class GibbsMHMove(Move):
         q, _ = self.get_proposal(state.coords, random_state)
         new_state = State(q)
         return new_state, np.ones(state.coords.shape[0], dtype=bool)
 def gibbs_proposal_function(coords, random, phi, alpha, sp):
     min_threshold = 1e-6
     updated_coords = np.copy(coords)
     column_sums = np.sum(phi, axis=0)
     normalized_matrix = phi / column_sums
-    G, K = normalized_matrix.shape
+    G, K = normalized_matrix.shape  # ʹ�� normalized_matrix ����״
     for n in range(len(coords)):
         theta_n = updated_coords[n, :]
         prob_mat = np.multiply(normalized_matrix, theta_n)
@@ -48,7 +50,7 @@ def gibbs_proposal_function(coords, random, phi, alpha, sp):
         for g in range(G):
             row_sum = np.sum(prob_mat[g, :])  # ʹ�� NumPy ���������к�
             pvals = np.full(K, 1/K) if row_sum < min_threshold else prob_mat[g, :] / row_sum
-            Z_n.append(random_state.multinomial(n=round(sp[g]), pvals=pvals))
+            Z_n.append(np.random.multinomial(n=round(sp[g]), pvals=pvals))
         Z_nk = np.sum(Z_n, axis=0)
         alpha_param = Z_nk + alpha
         updated_coords[n, :] = np.random.dirichlet(alpha=alpha_param)
@@ -64,7 +66,7 @@ class EnsembleSampler:
         self.alpha_value = alpha_value
         self.sp_value = sp_value
         self.backend = Backend(nwalkers=nwalkers, ndim=ndim, iterations=iterations)
-        self.random_state = np.random.mtrand.RandomState()
+        self._random = np.random.mtrand.RandomState()
     def sample(self, initial_state, iterations=1, progress=False, progress_kwargs=None):
         if progress_kwargs is None:
             progress_kwargs = {}
@@ -126,101 +128,36 @@ def get_progress_bar(display, total, **kwargs):
                 return getattr(tqdm, "tqdm_" + display)(total=total, **kwargs)
     return _NoOpPBar()
-'''
-def mcse_python(samples):
-    n_iter, n_dim = samples.shape
-    mcse_vec = np.empty(n_dim)
-    for k in range(n_dim):
-        x = samples[:, k]
-        rho = np.correlate(x - x.mean(), x - x.mean(), mode='full')
-        rho = rho[n_iter - 1:] / rho[n_iter - 1]
-        lag = next((i for i, r in enumerate(rho[1:], 1) if r < 0.05), n_iter)
-        ess = n_iter / (1 + 2 * rho[1:lag + 1].sum())
-        mcse_vec[k] = np.std(x, ddof=1) / np.sqrt(ess)
-    return mcse_vec
-def mcse_python(segment):
-    return np.std(segment, axis=0) / np.sqrt(len(segment))
 def ThreadRunEachSamples(
-    EmceeParameters = None,
-    referenceMatrix = None,
-    SampleExpressionData = None,
-    SampleName=None,
-    MAPorMLE = ('MAP','MLE'),
-    *args):
-    samplename = SampleName
-    phi_value = referenceMatrix
-    alpha_value = 1
-    sp_value = SampleExpressionData
-    K = EmceeParameters.ndims
-    rng = np.random.RandomState(seed)
-    max_iter   = EmceeParameters.nsteps
-    nwalkers = EmceeParameters.nwalkers
-    tol_ratio  = 0.01
-    eval_step  = 50
-    theta = EmceeParameters.position
-    if theta.ndim == 1:
-        theta = theta.reshape(nwalkers, K)
-    else:
-        theta = theta[:nwalkers, :K]
-    full_chain = np.zeros((max_iter, nwalkers, K))
-    converged = False
-    converged_step = None
-    for i in range(max_iter):
-        gibbs_move = GibbsMHMove(phi_value, alpha_value, sp_value)
-        state, _ = gibbs_move.propose(State(theta), rng)
-        theta = state.coords
-        full_chain[i] = theta
-        if (i + 1) % eval_step == 0 and i + 1 >= eval_step:
-            segment = full_chain[(i + 1 - eval_step): (i + 1)].reshape(-1, K)
-            mcse_vec = mcse_python(segment)
-            theta_hat = segment.mean(axis=0)
-            theta_hat_safe = np.where(theta_hat == 0, 1e-10, theta_hat)
-            ratio = mcse_vec / theta_hat_safe
-            if np.all(np.abs(ratio) <= tol_ratio):
-                converged = True
-                converged_step = i + 1
-                break
-    if converged and converged_step is not None:
-        start_step = max(0, converged_step - 100)
-        truncated_chain = full_chain[start_step:converged_step]
-    else:
-        start_step = max(0, max_iter - 100)
-        truncated_chain = full_chain[start_step:max_iter]
-    flat_samples = truncated_chain.reshape(-1, K)
-    return full_chain, flat_samples'''
-def ThreadRunEachSamples(
-   EmceeParameters = None, referenceMatrix = None,
-   SampleExpressionData = None, SampleName=None, MAPorMLE = ('MAP','MLE'),  *args):
+   EmceeParameters = None, referenceMatrix = None,
+   SampleExpressionData = None, SampleName=None, MAPorMLE = ('MAP','MLE'),  *args):
    nwalkers, ndims = EmceeParameters.nwalkers, EmceeParameters.ndims
    position, nsteps = EmceeParameters.position, EmceeParameters.nsteps
-   discard, thin = EmceeParameters.discard, EmceeParameters.thin
+   discard, thin = EmceeParameters.discard, EmceeParameters.thin
    samplename = SampleName
-   phi_value = referenceMatrix
-   alpha_value = 1
-   sp_value = SampleExpressionData
-   print(" create new threading for sample '%s'"%str(samplename))
-   if MAPorMLE == 'MAP':
+   phi_value = referenceMatrix
+   alpha_value = 1
+   sp_value = SampleExpressionData
+   print(" create new threading for sample '%s'"%str(samplename))
+   if MAPorMLE == 'MAP':
       sampler = EnsembleSampler(nwalkers, ndims, phi_value, alpha_value, sp_value)
    sampler.run_mcmc(position, nsteps, progress=True)
    mcmc_samples, flat_samples = [[[]]], [[]]
    mcmc_samples = sampler.get_chain()
    flat_samples = sampler.get_chain( discard = discard, thin = thin, flat = True)
    print( "Threading for sample '%s' run over, exiting..."%str(samplename))
-   return mcmc_samples, flat_samples
+   return mcmc_samples, flat_samples
 class MultiprocessesResult(object):
-    def __init__(self, SampleName, SamplingResult, CellType):
-        self.SampleName = SampleName
-        self.CellTypeList = CellType
-        self.ResultObjectOne = CLASS_FOR_RUNRESULT(
-            SampleNameList = [ self.SampleName ] ,
-            McmcSamplingResultList = np.array([ SamplingResult[0] ]) ,
-            FlatSamplingResultList = np.array([ SamplingResult[1] ]) ,
-            CellTypeList = CellType
-        )
+   def __init__(self, SampleName, SamplingResult, CellType):
+      self.SampleName = SampleName
+      self.CellTypeList = CellType
+      self.ResultObjectOne = CLASS_FOR_RUNRESULT(
+         SampleNameList = [ self.SampleName ] ,
+         McmcSamplingResultList = np.array([ SamplingResult[0] ]) ,
+         FlatSamplingResultList = np.array([ SamplingResult[1] ]) ,
+         CellTypeList = CellType
+      )
 def MergeResultsForAllSample(ThreadList=list(), OtherParams=[], *args):
     print("Merge all results from deconvolution into one ResultObject...")
     if len(ThreadList) < 1:
@@ -243,34 +180,35 @@ def MergeResultsForAllSample(ThreadList=list(), OtherParams=[], *args):
     FileCellTypeCategory = OtherParams[1]
     # Creating the final merged result object
     MergeResultsObject = CLASS_FOR_RUNRESULT(
-        SampleNameList=SampleNameWhole,
-        CellTypeList=CellTypeWhole,
-        FileCellTypeCategory=FileCellTypeCategory,
-        McmcSamplingResultList=McmcSamplingWhole,
-        FlatSamplingResultList=FlatSamplingWhole,
-        CellTypeRatioResult=CellTypeRatioResultWhole,
-        CellTypeRatioResultFinal=CellTypeRatioResultFinalWhole)
+       SampleNameList=SampleNameWhole,
+       CellTypeList=CellTypeWhole,
+       FileCellTypeCategory=FileCellTypeCategory,
+       McmcSamplingResultList=McmcSamplingWhole,
+       FlatSamplingResultList=FlatSamplingWhole,
+       CellTypeRatioResult=CellTypeRatioResultWhole,
+       CellTypeRatioResultFinal=CellTypeRatioResultFinalWhole)
     return MergeResultsObject
-def MainRun(RunObject, seed=42, MultithreadModule = 'joblib'):
-    EnvironmentRun = RunObject.EnvironmentRun
+def MainRun(RunObject, seed = 42, MultithreadModule = 'joblib'):
+    EnvironmentRun = RunObject.EnvironmentRun
     RecordTime = CLASS_FOR_TIME()
     nsamples = len(RunObject.SampleList)
+    EmceeParameterCopy = RunObject.EmceeParameter.mycopy()
+    CelltypesReferenceMatrix = RunObject.CelltypesReferenceMatrix
     SampleNameToIndex = {name: idx for idx, name in enumerate(RunObject.SampleList)}
-    parameterlist = []
+    parameterlist = []
     for SampleNameii in range(nsamples):
         SampleName = RunObject.SampleList[SampleNameii]
-        SampleIndex = SampleNameToIndex[SampleName]
+        SampleIndex = SampleNameToIndex[SampleName]  # ��ȡ����
         parameterlist.append((
-            RunObject.EmceeParameter.mycopy(),
-            RunObject.CelltypesReferenceMatrix,
-            RunObject.SamplesBulkRNAseqExpression[SampleIndex],
+            EmceeParameterCopy,
+            CelltypesReferenceMatrix,
+            RunObject.SamplesBulkRNAseqExpression[SampleIndex],  # ʹ��������������
             SampleName,
             RunObject.MAPorMLE
         ))
-    MultiprocessingReturnValue = []
+    MultiprocessingReturnValue = []
     if MultithreadModule == 'joblib':
-       try:
+       try:
           MultiprocessingReturnValue = joblib.Parallel(n_jobs=int(EnvironmentRun.ThreadNum), backend='loky',verbose=0)(joblib.delayed(ThreadRunEachSamples)(*arg) for arg in parameterlist)
        except:
           print('error occurs while paralleling')
@@ -288,15 +226,14 @@ def MainRun(RunObject, seed=42, MultithreadModule = 'joblib'):
                 pool.join()
                 print('Finished all!')
     MergedResultObject = MergeResultsForAllSample(
-        ThreadList = [MultiprocessesResult(
-            SampleName = RunObject.SampleList[Sampleii],
-            SamplingResult = MultiprocessingReturnValue[Sampleii],
+        ThreadList = [MultiprocessesResult(
+            SampleName = RunObject.SampleList[Sampleii],
+            SamplingResult = MultiprocessingReturnValue[Sampleii],
             CellType=deepcopy(RunObject.CellType),
         )   for Sampleii in range(len(MultiprocessingReturnValue))],
         OtherParams=[deepcopy(RunObject.CellType), RunObject.FileCellTypeCategory]
-        )
+        )
     RecordTime.ShowCostTime()
-    if os.path.exists("TempThread"):
-        os.system("rm -rf TempThread")
+    if os.path.exists("TempThread"): os.system("rm -rf TempThread")
     print("###<----Main program Run finished...")
     return MergedResultObject

{immucellai2-2.1.33 → immucellai2-2.1.34}/immucellai2/ObtainCategory.py RENAMED Viewed

@@ -49,7 +49,7 @@ def ObtainInformation2(line, SearchString, DictValue):
    else:
       DictValue[SearchString] =  obcontent
-def ObtainCellTypeCateogry(CellTypeCateogry):
+def ObtainCellTypeCategory(CellTypeCateogry):
     if CellTypeCateogry is None or not os.path.isfile(CellTypeCateogry):
         try:
             from importlib.resources import files

{immucellai2-2.1.33 → immucellai2-2.1.34}/immucellai2/PrepareData.py RENAMED Viewed

@@ -1,19 +1,23 @@
 #!/usr/bin/python3
-from immucellai2.myclasses import CLASS_FOR_RUN
-from immucellai2.ObtainCategory import ObtainCellTypeCateogry
+from .myclasses import CLASS_FOR_RUN
+from .ObtainCategory import ObtainCellTypeCategory
 import pandas
 import os
+import importlib.util
 import re
 import sys
 import multiprocessing as mp
-import importlib.util
+def Obtainmyconfigpath():
+   script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
+   return os.path.join(script_dir, "immucellai2", "myconfig", "")
 def get_package_dir(package_name):
     spec = importlib.util.find_spec(package_name)
     if spec is None:
         raise ModuleNotFoundError(f"Package '{package_name}' not found")
     path = spec.origin or spec.submodule_search_locations[0]
-    return os.path.dirname(path)
+    return os.path.dirname(path)
 def SelectGeneForDeconvolution(DFReferenceProfile, FileCoveredGenes="", Method="UsedMarker"):
     print("Select the gene for the following deconvolution...")
@@ -29,10 +33,6 @@ def SelectGeneForDeconvolution(DFReferenceProfile, FileCoveredGenes="", Method="
             GeneUsedForDeconvolution = list(set(GeneUsedForDeconvolution0).intersection(set(DFReferenceProfileGenes)))
     return GeneUsedForDeconvolution
-def Obtainmyconfigpath():
-   script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
-   return os.path.join(script_dir, "immucellai2", "myconfig", "")
 def CelltypeCategoryCheck(FileCellTypeCategory = "", celltypelist = [] ):
    print("Check the Celltype covered by configfile")
    if FileCellTypeCategory == "":
@@ -67,11 +67,11 @@ def InitialCellTypeRatioCheck(InitialCellTypeRatio, FileInitialCellTypeRatio = "
    elif Expactedcelltypenum in [ ncelltype, ncelltype -1 ]:
       return FileInitialCellTypeRatio
    else:
-      InitialCellTypeRatio = 'randn'
+      InitialCellTypeRatio = 'randn'
 def PrepareData(FileReferenceProfile ,
    FileSampleExpressionProfile ,
-   EnvironmentConfig = "" ,
+   EnvironmentConfig = ("", "") ,
    FileCoveredGenes = "" ,
    FileCellTypeCategory = "" ,
    FileInitialCellTypeRatio = "" ,
@@ -80,26 +80,27 @@ def PrepareData(FileReferenceProfile ,
    if FileReferenceProfile.shape[1] < 2:
       print("warning: When open Reference File, might sep = ' ' not '\t'")
    print("celltype reference raw matrix:\n", FileReferenceProfile.iloc[0:4, 0:4])
-   ReferenceCelltype = {}
+   ReferenceCelltype = {}
    for oneCellType in FileReferenceProfile.columns.values.tolist():
       numbertail = re.findall("\.[0-9]*$", oneCellType)
       oneCellType0 = oneCellType
       if numbertail != []: oneCellType = oneCellType[:-len(numbertail)]
-      if oneCellType in ReferenceCelltype.keys():
+      if oneCellType in ReferenceCelltype.keys():
          ReferenceCelltype[oneCellType].append(ReferenceCelltype[oneCellType])
       else: ReferenceCelltype[oneCellType] = [oneCellType0]
    DFReferenceProfile = pandas.DataFrame(columns = list(ReferenceCelltype.keys()),
        index = FileReferenceProfile.index.values)
    for celltype in  DFReferenceProfile.columns.values:
-        DFReferenceProfile[celltype] = (
+        DFReferenceProfile[celltype] = (
            FileReferenceProfile.loc[:, ReferenceCelltype[celltype] ]).mean(axis = 1)
-   print("celltype reference matrix:\n", DFReferenceProfile.iloc[0:4, 0:4])
+   print("celltype reference matrix:\n", DFReferenceProfile.iloc[0:4, 0:4])
    DFSampleExpressionProfile = pandas.read_table(FileSampleExpressionProfile, sep = "\t", header = 0, index_col = 0)
-   print(" initialize a Object For running...")
-   print("environment config(threads): ", EnvironmentConfig)
+   print(" initialize a Object For running...")
+   print("environment config(cpus, threads): ", EnvironmentConfig)
    GeneUsedForDeconvolution = SelectGeneForDeconvolution(DFReferenceProfile)
    #FileCellTypeCategory = CelltypeCategoryCheck(FileCellTypeCategory, celltypelist = list(ReferenceCelltype.keys()))
-   FileInitialCellTypeRatio = InitialCellTypeRatioCheck(InitialCellTypeRatio, FileInitialCellTypeRatio, ncelltype = DFReferenceProfile.shape[1])
+   FileInitialCellTypeRatio = InitialCellTypeRatioCheck(InitialCellTypeRatio,
+      FileInitialCellTypeRatio, ncelltype = DFReferenceProfile.shape[1])
    DFReferenceProfile0 = DFReferenceProfile.loc[GeneUsedForDeconvolution, ]
    DFReferenceProfile0 = DFReferenceProfile0[DFReferenceProfile0.index.isin(DFSampleExpressionProfile.index)]
    selected_DFSampleExpressionProfile = DFSampleExpressionProfile.loc[DFReferenceProfile0.index]

{immucellai2-2.1.33 → immucellai2-2.1.34}/immucellai2/myclasses.py RENAMED Viewed

@@ -103,15 +103,15 @@ class CLASS_FOR_RUNRESULT(object):
            return FlatSamplingResultOne / FlatSamplingResultOne.sum(axis=1)[:, None]
        elif FlatSamplingResultOne.shape[1] + 1 == nCellType:
            return numpy.column_stack((FlatSamplingResultOne, 1 - FlatSamplingResultOne.sum(axis=1)))
-   def EmceeFunction2(self, FlatSamplingResultList):
-      fshape = FlatSamplingResultList.shape
-      nCellType = len(self.CellType)
-      NewFlatSamplingResultList = numpy.zeros((fshape[0], fshape[1], nCellType))
-      for sampleii in range(fshape[0]):
-         FlatSamplingResultOne = FlatSamplingResultList[sampleii, :, :]
-         NewFlatSamplingResultOne = self.optimized_calculation(FlatSamplingResultOne, nCellType)
-         NewFlatSamplingResultList[sampleii, :, :] = NewFlatSamplingResultOne
-      return NewFlatSamplingResultList
+   def EmceeFunction2(self, FlatSamplingResultList):
+       fshape = FlatSamplingResultList.shape
+       nCellType = len(self.CellType)
+       NewFlatSamplingResultList = numpy.zeros((fshape[0], fshape[1], nCellType))
+       for sampleii in range(fshape[0]):
+           FlatSamplingResultOne = FlatSamplingResultList[sampleii, :, :]
+           NewFlatSamplingResultOne = self.optimized_calculation(FlatSamplingResultOne, nCellType)
+           NewFlatSamplingResultList[sampleii, :, :] = NewFlatSamplingResultOne
+       return NewFlatSamplingResultList
    def CalculateCellTypeRatio(self):
       CellTypeRatioResult = pandas.DataFrame(numpy.zeros(( len(self.SampleName),
                                                            len(self.CellType) )),
@@ -127,7 +127,7 @@ class CLASS_FOR_RUNRESULT(object):
          CellTypeRatioResult.loc[SampleNameOne, ] = FlatSamplingAverageOne
          FinalSampling = self.McmcSamplingResult[Oneii, -1, :, :]
          CellTypeRatioResultFinal.loc[SampleNameOne, ] = numpy.mean(FinalSampling, axis = 0)
-      return CellTypeRatioResult, CellTypeRatioResultFinal
+      return CellTypeRatioResult, CellTypeRatioResultFinal
    def CalculateRatioOld(self):
       CellTypeRatioResult = pandas.DataFrame([[]], columns = self.CellType, index = self.SampleName )
       for Oneii in range(len(self.FlatSamplingResult)):
@@ -135,19 +135,19 @@ class CLASS_FOR_RUNRESULT(object):
          SampleNameOne = self.SampleName[Oneii]
          CellTypeRatioOne = list()
          for ii in range(len(self.Celltype)):
-            CellTypeRatioOne.append((FlatSamplingOne[:, :, ii].sum(axis =1))[1])
-         CellTypeRatioResult.loc[SampleNameOne,:] = CellTypeRatioOne
+             CellTypeRatioOne.append((FlatSamplingOne[:, :, ii].sum(axis =1))[1])
+         CellTypeRatioResult.loc[SampleNameOne,] = CellTypeRatioOne
       return CellTypeRatioResult
    def save_result(self, FilenameToSave, ResultIndex = 0):
       DataCelltypeRatio = self.get_result( ResultIndex = ResultIndex )
       (DataCelltypeRatio.T).to_excel(FilenameToSave, index = True, header = True)
    def get_result(self, ResultIndex = 0):
-      if ResultIndex == 0:
-         return self.CellTypeRatioResult
-      elif ResultIndex  == 1:
-         return self.CellTypeRatioResultFinal
-      else:
-         pass
+     if ResultIndex == 0:
+        return self.CellTypeRatioResult
+     elif ResultIndex  == 1:
+        return self.CellTypeRatioResultFinal
+     else:
+        pass
    @property
    def CellTypeCateogryContent(self):
       return self._CellTypeCateogryContent
@@ -156,7 +156,6 @@ class CLASS_FOR_RUNRESULT(object):
       self._CellTypeCateogryContent = CellTypeCateogryContent
       del self.FileCellTypeCategory
 class CLASS_FOR_EMCEEPARAMETER(object):
    def __init__(self, position, CellType = list(), nsteps = 1000,
       nwalkers = 1,
@@ -242,6 +241,4 @@ class CLASS_ENVIRONMENT_CONFIG(object):
    def __init__(self, Environment):
       self.ThreadNum =  Environment
-#exmaples1 = CLASSONE(111)

{immucellai2-2.1.33 → immucellai2-2.1.34}/immucellai2.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: immucellai2
-Version: 2.1.33
+Version: 2.1.34
 Summary: A tool for immune cell type deconvolution
 Home-page: https://github.com/VyvyanYjm/ImmuCellAI2.0
 Author: YangJingmin

{immucellai2-2.1.33 → immucellai2-2.1.34}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = immucellai2
-version = 2.1.33
+version = 2.1.34
 author = YangJingmin
 author_email = yangjingmin2021@163.com
 description = A tool for immune cell type deconvolution