biopipen 0.17.6__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (31) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +36 -23
  3. biopipen/ns/delim.py +1 -1
  4. biopipen/ns/scrna.py +132 -49
  5. biopipen/ns/tcr.py +62 -0
  6. biopipen/reports/scrna/MarkersFinder.svelte +30 -8
  7. biopipen/reports/scrna/SeuratClusterStats.svelte +64 -109
  8. biopipen/reports/tcr/TESSA.svelte +43 -0
  9. biopipen/scripts/delim/SampleInfo.R +18 -15
  10. biopipen/scripts/scrna/MarkersFinder.R +58 -2
  11. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +40 -0
  12. biopipen/scripts/scrna/SeuratClusterStats-features.R +236 -0
  13. biopipen/scripts/scrna/SeuratClusterStats-stats.R +105 -0
  14. biopipen/scripts/scrna/SeuratClusterStats.R +7 -521
  15. biopipen/scripts/scrna/SeuratClustering.R +20 -1
  16. biopipen/scripts/tcr/ImmunarchLoading.R +1 -1
  17. biopipen/scripts/tcr/TESSA.R +198 -0
  18. biopipen/scripts/tcr/TESSA_source/Atchley_factors.csv +21 -0
  19. biopipen/scripts/tcr/TESSA_source/BriseisEncoder.py +168 -0
  20. biopipen/scripts/tcr/TESSA_source/MCMC_control.R +71 -0
  21. biopipen/scripts/tcr/TESSA_source/TrainedEncoder.h5 +0 -0
  22. biopipen/scripts/tcr/TESSA_source/fixed_b.csv +31 -0
  23. biopipen/scripts/tcr/TESSA_source/initialization.R +120 -0
  24. biopipen/scripts/tcr/TESSA_source/post_analysis.R +124 -0
  25. biopipen/scripts/tcr/TESSA_source/real_data.R +67 -0
  26. biopipen/scripts/tcr/TESSA_source/update.R +195 -0
  27. biopipen/scripts/tcr/TESSA_source/utility.R +18 -0
  28. {biopipen-0.17.6.dist-info → biopipen-0.18.0.dist-info}/METADATA +8 -8
  29. {biopipen-0.17.6.dist-info → biopipen-0.18.0.dist-info}/RECORD +31 -16
  30. {biopipen-0.17.6.dist-info → biopipen-0.18.0.dist-info}/WHEEL +0 -0
  31. {biopipen-0.17.6.dist-info → biopipen-0.18.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,124 @@
1
+ getmode <- function(v) {
2
+ uniqv <- unique(v)
3
+ uniqv[which.max(tabulate(match(v, uniqv)))]
4
+ }
5
+ plot_tessa<-function(tessa_results,folder,labels=NA)
6
+ {
7
+ library(data.table)
8
+ # load data
9
+ meta_dedup=tessa_results$meta_dedup
10
+ meta=tessa_results$meta
11
+ master_dist_e=tessa_results$master_dist_e
12
+ b=tessa_results$b
13
+ t=tessa_results$t
14
+ K=tessa_results$K
15
+ dt=tessa_results$dt
16
+ de=tessa_results$de
17
+ ak=tessa_results$ak
18
+ phi=tessa_results$phi
19
+ if (!file.exists(folder)) {dir.create(folder)}
20
+ # are cluster centers more expanded than other members of the clusters?
21
+ png(paste(folder,"/clone_size.png",sep=""), width=8, height=8, units="in", res=100)
22
+ tmp=aggregate(meta$barcode,by=list(meta$group_ID),length)
23
+ meta_dedup$n=NA
24
+ meta_dedup[tmp$Group.1,"n"]=tmp$x
25
+ if(!is.na(labels)){
26
+ tmp2=aggregate(labels,by=list(meta$group_ID),getmode)
27
+ tmp_keep=data.table(cbind(meta$group_ID,labels))
28
+ tmp_table=table(tmp_keep)/tmp$x
29
+ tmp_table=tmp_table[match(tmp2$Group.1,row.names(tmp_table)),]
30
+ keep=sapply(1:nrow(tmp_table),function(i) tmp_table[i,tmp2$x[i]])
31
+ meta_dedup[tmp2$Group.1,"labels"]=tmp2$x
32
+ meta_dedup[tmp2$Group.1[keep<0.6],"labels"]=999
33
+ }
34
+ meta_dedup0=meta_dedup[meta_dedup$cluster_number %in% names(phi[phi>2]),]
35
+ keep=meta_dedup0$group_ID!=meta_dedup0$cluster_number
36
+ non_center_n=aggregate(meta_dedup0[keep,"n"],by=list(meta_dedup0$cluster_number[keep]),median)
37
+ if(!is.na(labels)){
38
+ meta_dedup0$col=c('salmon','blue','grey')[factor(meta_dedup0$labels,levels = c(1,0,999))]
39
+ #salmon: CD8; blue: CD4; 'grey: mixed center (CD8 and CD4 cells -> same TCRs)
40
+ #or the center expresses same levels of CD8 and CD4'
41
+ plot(non_center_n$x,meta_dedup0[non_center_n$Group.1,"n"],xlab="Clone size of non-center TCR groups",
42
+ ylab="Clone size of center TCR groups",pch=19,col=meta_dedup0[non_center_n$Group.1,"col"])
43
+ abline(0,1,col="red")
44
+ }else{
45
+ plot(non_center_n$x,meta_dedup0[non_center_n$Group.1,"n"],xlab="Clone size of non-center TCR groups",
46
+ ylab="Clone size of center TCR groups",pch=19)
47
+ abline(0,1,col="red")
48
+ }
49
+ dev.off()
50
+ mean(non_center_n[,2])
51
+ mean(meta_dedup0[non_center_n$Group.1,"n"])
52
+
53
+ # expression-TCR distance plot
54
+ png(paste(folder,"/exp_TCR_pair_plot.png",sep=""),width=8,height=12, units="in", res=100)
55
+ par(mfrow=c(3,2))
56
+ de_dt_ak=as.data.frame(cbind(de_len=sapply(de,length),dt_len=sapply(dt,length),ak))
57
+ de_dt_ak=de_dt_ak[order(-de_dt_ak$de_len),]
58
+ for (k in rownames(de_dt_ak)[1:min(6,K)])
59
+ {
60
+ plot(de[[k]],dt[[k]],main=paste("Cluster:",k),pch=19,
61
+ col=1+as.numeric(as.factor(names(de[[k]]))),xlab="Expression dist.",
62
+ ylab="TCR dist.")
63
+ segments(x0=0,y0=0,y1=max(dt[[k]]),x1=ak[k]*max(dt[[k]]),lwd=2,lty=2)
64
+ }
65
+ dev.off()
66
+
67
+ # density of TCR distances
68
+ png(paste(folder,"/TCR_dist_density.png",sep=""),width=8,height=6, units="in", res=100)
69
+ plot(density(as.matrix(dist(t(t/sqrt(b))))^2/2),xlab="TCR distances",ylab="",lwd=3,main="")
70
+ lines(density(unlist(dt)),lwd=3,col="red")
71
+ dev.off()
72
+
73
+ # exploratory plot at the TCR level
74
+ png(paste(folder,"/TCR_explore.png",sep=""),width=12,height=12, units="in", res=100)
75
+ pca_t=prcomp(t(t/sqrt(b)),scale.=F)$x
76
+ plot(pca_t[,1],pca_t[,2],type="n",xlab="PC1",ylab="PC2")
77
+ tmp=as.numeric(as.factor(meta_dedup$cluster_number))
78
+ names(tmp)=meta_dedup$group_ID
79
+ text(pca_t[,1],pca_t[,2],label=tmp,col=tmp,cex=0.5+0.5*(rownames(pca_t) %in% names(dt)))
80
+ for (k in 1:K)
81
+ {
82
+ for (group in names(dt[[k]]))
83
+ {
84
+ segments(x0=pca_t[names(dt)[k],1],y0=pca_t[names(dt)[k],2],
85
+ x1=pca_t[group,1],y1=pca_t[group,2],col=tmp[group])
86
+ }
87
+ }
88
+ dev.off()
89
+ }
90
+
91
+ predict_tessa<-function(tessa_results,t_new,cutoff=NA)
92
+ {
93
+ b=tessa_results$b
94
+ if(is.na(cutoff)){
95
+ cutofflist=sapply(tessa_results$dt,function(x) quantile(x,0.5))
96
+ cutoff=quantile(cutofflist[cutofflist!=0],0.4)
97
+ }
98
+ cluster_new=cutree(hclust(dist(t(t_new/sqrt(b)),method='manhattan'),method='single'),h=cutoff)
99
+ }
100
+
101
+ plot_Tessa_clusters=function(tessa_results,folder){
102
+ library(igraph)
103
+ meta=tessa_results$meta_dedup
104
+ relations=data.frame('source'=meta$group_ID,'target'=meta$cluster_number,stringsAsFactors = F)
105
+ node_size=setNames(sqrt(as.vector(table(tessa_results$meta$group_ID))),
106
+ meta$group_ID)
107
+ relations = relations[relations[,1]!=relations[,2], ]
108
+ verticies=data.frame('name'=unique(c(relations$source,relations$target)),stringsAsFactors = F)
109
+ node_size=node_size[verticies$name]
110
+ g=graph_from_data_frame(relations,directed=F,vertices = verticies)
111
+ verticies$group=edge.betweenness.community(g)$membership
112
+ png(paste(folder,"/TCR_explore_clusters.png",sep=""),width=12,height=12, units="in", res=100)
113
+ plot.igraph(g,
114
+ #mark.groups=verticies$group, # group vertices by betweeness indicator (redish blob background)
115
+ layout=layout.auto,
116
+ vertex.color = verticies$group, # color vertices by edge betweeness
117
+ vertex.label=NA, # no vertex label (name)
118
+ edge.arrow.size=1,
119
+ edge.arrow.width=1,
120
+ edge.width=4,
121
+ edge.lty="solid",
122
+ vertex.size=node_size)
123
+ dev.off()
124
+ }
@@ -0,0 +1,67 @@
1
+ library(MASS)
2
+ library(LaplacesDemon)
3
+ library(Rtsne)
4
+
5
+ run_tessa <- function(
6
+ script_dir,
7
+ exp_file,
8
+ contigs_file,
9
+ cdr3_file,
10
+ save,
11
+ is_sampleCluster,
12
+ fixed_b,
13
+ xi = 1e+25,
14
+ g = 0.001,
15
+ initialize_cluster_factor = 6,
16
+ max_iter = 1000
17
+ ) {
18
+ source(file.path(script_dir, 'update.R'))
19
+ source(file.path(script_dir, 'initialization.R'))
20
+ source(file.path(script_dir, 'MCMC_control.R'))
21
+ source(file.path(script_dir, 'utility.R'))
22
+ source(file.path(script_dir, 'post_analysis.R'))
23
+
24
+ # the users need to provide these data:
25
+ # The columns/rows/length of them should be matched up wherever applicable
26
+ # exp_file: expression data, cells on columns, and genes on rows.
27
+ # e can be constructed by PCA or t-SNE, the first row is the first PC, the second row is the second PC, etc
28
+ # contigs_file: encoded CDR3 values, cells on columns, and embeddings on rows。
29
+ # cdr3: character vectors of CDR3 sequences
30
+ # save: a file dir to store tessa results
31
+ # (optional) sample_id: a column vector of sample categories. If is_sampleCluster=TRUE, users must provide an additional
32
+ # column next to the cdr3 column.
33
+ # (optional) fixed_b: a vector of pre-defined b. The vector must be numerical and has the length of TCR embeddings.
34
+ exp_data <- read.csv(exp_file, row.names=1, stringsAsFactors=F, check.names = FALSE)
35
+ n <- ncol(exp_data)
36
+ tmp <- apply(exp_data, 1, sd)
37
+ # Run TSNE
38
+ e <- t(Rtsne(t(exp_data[tmp > quantile(tmp, 0.9), 1:n]), dims = 3)$Y)
39
+ colnames(e) <- colnames(exp_data)[1:n]
40
+ contigs_encoded <- read.csv(contigs_file, stringsAsFactors = FALSE)
41
+ t <- t(contigs_encoded[1:n, -1])
42
+ meta <- read.csv(cdr3_file, header = TRUE, stringsAsFactors = FALSE)
43
+ cdr3 <- meta$cdr3
44
+ if (is_sampleCluster) {
45
+ sample_id <- meta$sample
46
+ } else {
47
+ sample_id <- NULL
48
+ }
49
+ if (!is.null(fixed_b)) {
50
+ b <- read.csv(fixed_b, header = TRUE, stringsAsFactors = F)$b
51
+ } else {
52
+ b <- NULL
53
+ }
54
+ # the users need to provide these parameters, here are the suggested values
55
+ hyper_priors <- list(
56
+ lambda = mean(apply(t, 1, var)),
57
+ xi = xi,
58
+ g = g,
59
+ tau = 100,
60
+ u = 0.1,
61
+ v = 0.1,
62
+ initialize_cluster_factor = initialize_cluster_factor
63
+ )
64
+ #save="~/projects/scTCR/data/Tessa_save"
65
+ # Tessa
66
+ Tessa(e, cdr3, t, hyper_priors, max_iter, sample_id, save, b)
67
+ }
@@ -0,0 +1,195 @@
1
+ update_t0=function(t,meta_dedup,K,lambda,b,phi,t0)
2
+ {
3
+ tmp=aggregate(t(t),by=list(meta_dedup$cluster_number),sum)
4
+ rownames(tmp)=tmp[,1]
5
+ tmp=t(as.matrix(tmp[,-1]))
6
+ tmp=tmp[,names(phi),drop=F]
7
+ for (k in 1:K) # this may be further optimized
8
+ {t0[[k]]=rnorm(dim(t)[1],tmp[,k]/(b/lambda+phi[k]),sqrt(1/(1/lambda+phi[k]/b)))}
9
+ t0
10
+ }
11
+
12
+ update_ak=function(K,dt,de,sigma,a,g,phi,ak)
13
+ {
14
+ AK=sapply(1:K,function(k) sum(dt[[k]]^2))+1/g
15
+ BK=sapply(1:K,function(k) sum(dt[[k]]*de[[k]]))+a/g
16
+ ak_new=rnorm(K,BK/AK,sigma/sqrt(AK))
17
+ ak[phi>1]=ak_new[phi>1]
18
+ ak
19
+ }
20
+
21
+ update_sigma=function(u,v,K,phi,g,ak,a,de,dt,regression_loss)
22
+ {
23
+ C=u+1+(K+sum(phi))/2
24
+ D=v+sum((ak-a)^2)/2/g+sum(regression_loss)/2
25
+ sqrt(rinvgamma(1, shape=C-1, scale=D))
26
+ }
27
+
28
+ update_a=function(tau,K,g,sigma,ak)
29
+ {
30
+ E=1/tau+K/(g*sigma^2)
31
+ D=sum(ak)/(g*sigma^2)
32
+ a=rtnorm(mu=D/E,sd=sqrt(1/E),a=0,b=Inf)
33
+ a
34
+ }
35
+
36
+ update_b=function(u,v,phi,t,t0,meta_dedup,K,dt,de,ak,sigma,regression_loss,b,preset_b)
37
+ {
38
+ if(!preset_b){
39
+ # new beta
40
+ MH_alpha=u+sum(phi)/2
41
+ MH_beta=v+rowSums((t-unlist(t0[meta_dedup[colnames(t),"cluster_number"]]))^2)/2
42
+ bnew=sapply(1:length(b),function(q) rinvgamma(1,shape=MH_alpha,scale=MH_beta[q]))
43
+
44
+ # new dt
45
+ dtnew=dt
46
+ for(k in 1:K)
47
+ {
48
+ c=names(phi)[k]
49
+ group=names(dt[[k]])
50
+ dtnew[[k]]=colSums((t[,group,drop=F]-t[,c])^2/bnew/2)
51
+ }
52
+
53
+ # F
54
+ regression_loss_new=sapply(1:K,function(k) sum((de[[k]]-ak[k]*dtnew[[k]])^2))
55
+ F=sum(regression_loss_new-regression_loss)/2/sigma^2
56
+
57
+ updated=0
58
+ if (runif(1,0,1)<min(1,exp(-F)))
59
+ {
60
+ updated=1
61
+ b=bnew
62
+ dt=dtnew
63
+ }
64
+ }else{
65
+ b=b;dt=dt;updated=0
66
+ }
67
+ return(list(b=b,dt=dt,updated=updated))
68
+ }
69
+
70
+ calulate_DPprob<-function(phi,xi,group_ID,b,t0_new,ak_new,master_dist_e,sigma,t,ak,t0)
71
+ {
72
+ t_group_ID=t[,group_ID]
73
+ part1=c(phi,xi)
74
+ part2=c(sapply(t0,function(x) sum(-(x-t_group_ID)^2/2/b)),
75
+ sum(-(t0_new-t_group_ID)^2/2/b))
76
+ de_test=c(master_dist_e[group_ID,names(phi)],master_dist_e[group_ID,group_ID])
77
+ dt_test=c(colSums((t_group_ID-t[,names(phi),drop=F])^2/b/2),0)
78
+ part3=(de_test-dt_test*c(ak,ak_new))^2/(-2*sigma^2)
79
+ tmp=part2+part3
80
+ tmp=tmp-max(tmp)
81
+ part1*exp(tmp)
82
+ }
83
+
84
+ DP<-function(meta_dedup,meta,t0,dt,de,ak,phi,t,lambda,g,sigma,b,master_dist_e,K,
85
+ a,xi,mean_t,sample_id)
86
+ {
87
+ if(!is.null(sample_id)){
88
+ sample_id_dedup=sample_id[!duplicated(meta$group_ID)]
89
+ names(sample_id_dedup)=meta_dedup$group_ID
90
+ }else{
91
+ sample_id_dedup=NULL
92
+ }
93
+ for (group_ID in 1:dim(meta_dedup)[1])
94
+ {
95
+ # remove group from old cluster
96
+ group_to_operate=meta_dedup$group_ID[group_ID]
97
+ kth_cluster_ind=which(names(ak)==meta_dedup$cluster_number[group_ID])
98
+
99
+ if (phi[kth_cluster_ind]==1)
100
+ {
101
+ K=K-1
102
+ phi=phi[-kth_cluster_ind]
103
+ t0=t0[-kth_cluster_ind]
104
+ dt=dt[-kth_cluster_ind]
105
+ de=de[-kth_cluster_ind]
106
+ ak=ak[-kth_cluster_ind]
107
+ }else
108
+ {
109
+ phi[kth_cluster_ind]=phi[kth_cluster_ind]-1
110
+ }
111
+
112
+ # create a new cluster
113
+ t0_new=rnorm(n=dim(t)[1],mean_t,sqrt(lambda))
114
+ ak_new=rnorm(1,a,sigma*sqrt(g))
115
+
116
+ # the assignment
117
+ prob=calulate_DPprob(phi,xi,group_ID,b,t0_new,ak_new,master_dist_e,sigma,t,ak,t0)
118
+ if(!is.null(sample_id)){
119
+ sample_id_cluster=sapply(names(prob)[-length(prob)],function(name) strsplit(name,split = ';')[[1]][2])
120
+ prob2rm=sample_id_cluster!=sample_id_dedup[group_ID]
121
+ prob2rm=c(prob2rm,FALSE)
122
+ if(sum(prob[!prob2rm])==0){
123
+ prob[!prob2rm]=1
124
+ print(paste('Random selection:',sample_id_dedup[group_ID]))
125
+ }
126
+ prob[prob2rm]=0
127
+ }
128
+ prob2sc=prob/sum(prob)
129
+ new_cluster=sample(1:length(prob),1,prob=prob2sc)
130
+ old_cluster_name=meta_dedup$cluster_number[group_ID]
131
+
132
+ # move the group to operate from the old cluster into the new cluster
133
+ if (new_cluster==length(prob)) # the newly created cluster
134
+ {
135
+ meta_dedup$cluster_number[group_ID]=group_to_operate # this CDR3 becomes the center naturally
136
+ K=K+1
137
+ # modify the old cluster
138
+ keep=meta_dedup$cluster_number==old_cluster_name & meta_dedup$group_ID!=group_to_operate
139
+ go_back=F
140
+ }else # one of the old clusters
141
+ {
142
+ new_cluster_name=names(prob)[new_cluster]
143
+ meta_dedup$cluster_number[group_ID]=new_cluster_name
144
+ phi[new_cluster_name]=phi[new_cluster_name]+1
145
+ # modify the old cluster
146
+ keep=meta_dedup$cluster_number==old_cluster_name
147
+ go_back=new_cluster_name==old_cluster_name # the picked cluster is the original cluster
148
+ }
149
+
150
+ # modify the old cluster
151
+ if (sum(keep)>0 && (!go_back))
152
+ {
153
+ old_cluster_name1=find_center(t[,keep,drop=F],b)
154
+ meta_dedup$cluster_number[keep]=old_cluster_name1
155
+ which_to_update=which(names(phi)==old_cluster_name)
156
+ names(phi)[which_to_update]=old_cluster_name1
157
+ names(t0)=names(ak)=names(dt)=names(de)=names(phi)
158
+ old_cluster_name=old_cluster_name1
159
+ tmp=names(de[[old_cluster_name]])
160
+ groups_to_operate=tmp[tmp!=group_to_operate]
161
+ de_old_cluster_name=master_dist_e[groups_to_operate,old_cluster_name]
162
+ dt_old_cluster_name=colSums((t[,groups_to_operate,drop=F]-t[,old_cluster_name])^2/b/2)
163
+ de[[old_cluster_name]]=named_c(NULL,de_old_cluster_name,groups_to_operate)
164
+ dt[[old_cluster_name]]=named_c(NULL,dt_old_cluster_name,groups_to_operate)
165
+ }
166
+
167
+ # modify the new cluster
168
+ if (new_cluster==length(prob)) # new cluster
169
+ {
170
+ ak=named_c(ak,ak_new,group_to_operate)
171
+ phi=named_c(phi,1,group_to_operate)
172
+ t0[[group_to_operate]]=t0_new
173
+ tmp=master_dist_e[group_to_operate,group_to_operate]
174
+ de[[group_to_operate]]=named_c(NULL,tmp,group_to_operate)
175
+ dt[[group_to_operate]]=named_c(NULL,0,group_to_operate)
176
+ }else if (!go_back) # one of the old cluster, and also a different one
177
+ {
178
+ keep=meta_dedup$cluster_number==new_cluster_name
179
+ new_cluster_name1=find_center(t[,keep,drop=F],b)
180
+ meta_dedup$cluster_number[keep]=new_cluster_name1
181
+ which_to_update=which(names(phi)==new_cluster_name)
182
+ names(phi)[which_to_update]=new_cluster_name1
183
+ names(t0)=names(ak)=names(dt)=names(de)=names(phi)
184
+ new_cluster_name=new_cluster_name1
185
+ groups_to_operate=c(names(de[[new_cluster_name]]),group_to_operate)
186
+ de_new_cluster_name=master_dist_e[groups_to_operate,new_cluster_name]
187
+ dt_new_cluster_name=colSums((t[,groups_to_operate,drop=F]-t[,new_cluster_name])^2/b/2)
188
+ de[[new_cluster_name]]=named_c(NULL,de_new_cluster_name,groups_to_operate)
189
+ dt[[new_cluster_name]]=named_c(NULL,dt_new_cluster_name,groups_to_operate)
190
+ }
191
+ }
192
+
193
+ return(list(cluster_number=meta_dedup$cluster_number,phi=phi,de=de,K=K,
194
+ dt=dt,ak=ak,t0=t0))
195
+ }
@@ -0,0 +1,18 @@
1
+ # truncated normal
2
+ rtnorm<-function(mu,sd=1,a=-Inf,b=Inf)
3
+ {
4
+ #truncated normal distribution sampling function.
5
+ F<-runif(n=length(mu))
6
+ Fa<-pnorm((a-mu)/sd,0,sd=1)
7
+ Fa[a==-Inf] <-0
8
+ Fb<-pnorm((b-mu)/sd,0,sd=1)
9
+ Fb[b==Inf]<-1
10
+ y<-mu+sd*qnorm(F*(Fb-Fa)+Fa)
11
+ y
12
+ }
13
+
14
+ named_c<-function(vector,new_element,new_name)
15
+ {
16
+ names(new_element)=new_name
17
+ c(vector,new_element)
18
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biopipen
3
- Version: 0.17.6
3
+ Version: 0.18.0
4
4
  Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
5
5
  License: MIT
6
6
  Author: pwwang
@@ -13,10 +13,10 @@ Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Provides-Extra: runinfo
16
- Requires-Dist: datar[pandas] (>=0.13,<0.14)
17
- Requires-Dist: pipen (>=0.10,<0.11)
18
- Requires-Dist: pipen-board[report] (>=0.11,<0.12)
19
- Requires-Dist: pipen-cli-run (>=0.9,<0.10)
20
- Requires-Dist: pipen-filters (>=0.8,<0.9)
21
- Requires-Dist: pipen-runinfo (>=0.2,<0.3) ; extra == "runinfo"
22
- Requires-Dist: pipen-verbose (>=0.7,<0.8)
16
+ Requires-Dist: datar[pandas] (>=0.15.2,<0.16.0)
17
+ Requires-Dist: pipen (>=0.11,<0.12)
18
+ Requires-Dist: pipen-board[report] (>=0.12,<0.13)
19
+ Requires-Dist: pipen-cli-run (>=0.10,<0.11)
20
+ Requires-Dist: pipen-filters (>=0.9,<0.10)
21
+ Requires-Dist: pipen-runinfo (>=0.3,<0.4) ; extra == "runinfo"
22
+ Requires-Dist: pipen-verbose (>=0.8,<0.9)
@@ -1,9 +1,9 @@
1
- biopipen/__init__.py,sha256=mmrB6n6zH1c3iHQ4iJcecY24GV6KoBQ8Vbb5t5vYe3E,23
1
+ biopipen/__init__.py,sha256=0EHw4xygmgkGSyfwNfEoMlQyN0uHxjHtlSFF79s6120,23
2
2
  biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
4
4
  biopipen/core/config.toml,sha256=JALO2S7TfmV3gIRPJ0cLTFWncPXXheQJS3vYQlyX6wQ,1600
5
5
  biopipen/core/defaults.py,sha256=yPeehPLk_OYCf71IgRVCWuQRxLAMixDF81Ium0HtPKI,344
6
- biopipen/core/filters.py,sha256=22VAjUjmkXuVxTD2FgznwKwefVtjhDYUrXR2qsUNa9w,6671
6
+ biopipen/core/filters.py,sha256=NSeEElvRujLZQTV1e4zmQXZb2RjaAvGt1AaRH_xUOTA,7061
7
7
  biopipen/core/proc.py,sha256=7TsjBM7EEtMMB-w4jbxV_CSRY8J970gM8320Ga1YeHU,717
8
8
  biopipen/core/testing.py,sha256=5vR15kkCjfXM7Bx0HBzabNLtDLAEX4uU94TskCkPni8,1447
9
9
  biopipen/ns/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -13,17 +13,17 @@ biopipen/ns/bed.py,sha256=Gu6ctc56NkkuSHoXjJ_fgWXXTCJMtedrtZ8GUX2WEo8,5380
13
13
  biopipen/ns/cnv.py,sha256=vq6dZfEOyuVuqg3nP6FQtNmQ-JocpBJMX9IYlZ0OPD0,6803
14
14
  biopipen/ns/cnvkit.py,sha256=5mA2Q8-YDs4g1HoxtpB_NWnyZYwEThNr3s3wlubLQrQ,31130
15
15
  biopipen/ns/cnvkit_pipeline.py,sha256=_9OusUwS4avpuaqa4TOra74-7pcMbhczTNPrxctrNyM,36769
16
- biopipen/ns/delim.py,sha256=1rQnqpZDuwD43HWfsXpazxGKkBHsOjv--v2tgDSeQCg,5673
16
+ biopipen/ns/delim.py,sha256=_PTQqxVJUp2t2g6-jPUnjf9C6T11YdYHHkaF3EYJBw0,5683
17
17
  biopipen/ns/gene.py,sha256=Q5FzRByfnRITXRNRZR65ApG09FRyiihRC3TcIXxufzE,2228
18
18
  biopipen/ns/gsea.py,sha256=EsNRAPYsagaV2KYgr4Jv0KCnZGqayM209v4yOGGTIOI,7423
19
19
  biopipen/ns/misc.py,sha256=fzn0pXvdghMkQhu-e3MMapPNMyO6IAJbtTzVU3GbFa0,3246
20
20
  biopipen/ns/plot.py,sha256=yguxmErUOH-hOM10JfuI_sXw2p49XF8yGR_gXfbd5yQ,4066
21
21
  biopipen/ns/rnaseq.py,sha256=l4vFeRasGhkexopGTM_VfSyIFewOxg-9L5niFzhWUNA,565
22
- biopipen/ns/scrna.py,sha256=NXI9ljfClE2aTibYXGCQcKi4CHSdFXt7rbu0ASyuhbg,74979
22
+ biopipen/ns/scrna.py,sha256=EIvjkMRtVakruO5X3eoD4j4pWV5TGwf-E48Kav5WB1Q,79087
23
23
  biopipen/ns/scrna_basic.py,sha256=os00_hhEaCVRspSCnHvOoItS11OU_aFKgMqbocVyZQU,8538
24
24
  biopipen/ns/scrna_metabolic_landscape.py,sha256=AW36EOH-5AJNcMK4j7_LPgJYpyXASH3jj99g_m15SaA,26056
25
25
  biopipen/ns/tcgamaf.py,sha256=AFbUJIxiMSvsVY3RcHgjRFuMnNh2DG3Mr5slLNEyz6o,1455
26
- biopipen/ns/tcr.py,sha256=6C8_B85hOVrW8ntsEEEe4OnZGaIqSL8S1yJPf2DF1ko,57204
26
+ biopipen/ns/tcr.py,sha256=Z02tWLZDSiXMfh9SCKbbwATUN7OcrCB3fOgepAagasY,59922
27
27
  biopipen/ns/vcf.py,sha256=cdkKroii0_nl_bSP2cnO09qESUAhHqu6btOiTSKS79Y,15314
28
28
  biopipen/ns/web.py,sha256=3zucrDo-IVsSnIvlw-deoScuxqWa6OMTm8Vo-R4E44Q,2224
29
29
  biopipen/reports/bam/CNAClinic.svelte,sha256=D4IxQcgDCPQZMbXog-aZP5iJEQTK2N4i0C60e_iXyfs,213
@@ -41,11 +41,11 @@ biopipen/reports/gsea/GSEA.svelte,sha256=lYHf8h8RLx3i-jNCEGu_LM-dbYm9ZJDzyAEadsZ
41
41
  biopipen/reports/scrna/CellsDistribution.svelte,sha256=C2Zua-3sXRESttx00cJkQFTUDN5mGbSY0WRyJ5Aoi1Y,1198
42
42
  biopipen/reports/scrna/DimPlots.svelte,sha256=ZLbtN0ioevRyEky4jb_DkDGAcpy_jAhaHfFym5ELEPM,479
43
43
  biopipen/reports/scrna/GeneExpressionInvistigation.svelte,sha256=nsWHX5VaRSIbUYSFKYcBZaeJIxuUlk5UItA7WPK4clY,793
44
- biopipen/reports/scrna/MarkersFinder.svelte,sha256=OnakwMU9ZD2tH7nJgG4XxOljAjacJHGj9UBPTHWv754,2770
44
+ biopipen/reports/scrna/MarkersFinder.svelte,sha256=5VKGCNmH0EVfDTINCG6Klr8dxpHgl662EYsS4RNiaKI,3856
45
45
  biopipen/reports/scrna/MetaMarkers.svelte,sha256=mPdwV9sTXn6fLTZWqTvFNSn7fdIs-7w4HSerV69o6C0,3709
46
46
  biopipen/reports/scrna/RadarPlots.svelte,sha256=QnqQ5J-27KR5apcKmP10fDkoAyO9n3Q2LPEWqDuRxdw,1075
47
47
  biopipen/reports/scrna/ScFGSEA.svelte,sha256=uLOlfillgvLbDCO7CzIKZ3HzwO4fZcCcJOpzJRnKwm4,1268
48
- biopipen/reports/scrna/SeuratClusterStats.svelte,sha256=-EaVQF6A2iRs7FQ6C01AAhdEEnoWMRIWAg-BdO_ukdo,4959
48
+ biopipen/reports/scrna/SeuratClusterStats.svelte,sha256=_vRaffYxpPh_Zdw1O-U7zKkZTUHtrugyhuT7REYaWEk,3433
49
49
  biopipen/reports/scrna/SeuratMap2Ref.svelte,sha256=SYuoKJR1PuBf9c7WvUXXjmokVo-JxFLqZqiu9kTtLuY,542
50
50
  biopipen/reports/scrna/SeuratPreparing.svelte,sha256=PHeL_prMFTZ-tddhUHG3O6dGUL2ygNNnMEAeHzZVq-k,1182
51
51
  biopipen/reports/scrna/TopExpressingGenes.svelte,sha256=lVvlKwMep4woYTiTPTTvlSPOycjxUb44aiEqNzj9u18,1952
@@ -58,6 +58,7 @@ biopipen/reports/tcr/CloneResidency.svelte,sha256=otmBEL1gI3uJjD-ezjwRvklzHHZBXh
58
58
  biopipen/reports/tcr/Immunarch.svelte,sha256=IbZgx95-fe4DeoYcxT8o9Z-gqrlLNaar0RbebbFjOfE,6446
59
59
  biopipen/reports/tcr/SampleDiversity.svelte,sha256=GrPUpQ6aJPtyQHoFL9ayG6knpW6yZznqwo0ZVZ2tMl4,5524
60
60
  biopipen/reports/tcr/TCRClusteringStats.svelte,sha256=gAfrPpo5GtpZKOvA1nEUFZKw5LhtwbEzN2mwmG__Zq4,1865
61
+ biopipen/reports/tcr/TESSA.svelte,sha256=UI7ZeQcLAZXHJQe6pWck9_Hw3ll3YE9L0y_b0cF347Y,2508
61
62
  biopipen/reports/tcr/VJUsage.svelte,sha256=c9G2-APGG0l-c6b32aFOimrP_QYD-oELBASN4VabXmk,426
62
63
  biopipen/reports/utils/gsea.liq,sha256=5uxNPyIr0xd7nrZePJlIsRyqCPx1uVWso7ehE1F-C4k,3556
63
64
  biopipen/reports/utils/misc.liq,sha256=HLK3mkWtIMQqBtM2IIRFUdKKTcY8cvBtyHJB9DbWBdw,1653
@@ -93,7 +94,7 @@ biopipen/scripts/cnvkit/CNVkitScatter.py,sha256=7DhTiXPHEHbdXn0VFcDOR-wTP6sks08N
93
94
  biopipen/scripts/cnvkit/CNVkitSegment.py,sha256=q5iGAjY6-yIehPcJpi3hX6EuGre0YgWTPkG_d5LEV48,1629
94
95
  biopipen/scripts/cnvkit/guess_baits.py,sha256=7OCMtSMHIJWWZv9qEYVXnB0N4hU_JaGEesKdkr6tvJc,10586
95
96
  biopipen/scripts/delim/RowsBinder.R,sha256=yp960u7Ui_jFCL8WDvODa-0vhJvyLo64ll35PzXYUbI,1444
96
- biopipen/scripts/delim/SampleInfo.R,sha256=e4LSg7_KlNGTZzmb7YFqgSo23uT_uezbllVvNHlRfIs,5096
97
+ biopipen/scripts/delim/SampleInfo.R,sha256=abO6oI2zBzJdKfyL4CvRrQQWkdLD0NYCnhnhYqrosmc,5253
97
98
  biopipen/scripts/gene/GeneNameConversion.py,sha256=2RveardTsLv2K1XSj3G0ERYLiln9bcR74bjkRdKcChc,1880
98
99
  biopipen/scripts/gsea/Enrichr.R,sha256=tr4vInlVIeiGXumh22ARuTQmy0-Qq869RiX7d7ERqCg,661
99
100
  biopipen/scripts/gsea/FGSEA.R,sha256=RLqDgrqnYEacHfzEEuZ3d29lxNqWehigOnGuu248SRg,1483
@@ -116,14 +117,17 @@ biopipen/scripts/scrna/ExprImpution-rmagic.R,sha256=yYnkyVfqIaNynsbaZZLGS6DrAJ_X
116
117
  biopipen/scripts/scrna/ExprImpution-scimpute.R,sha256=mg40qCUW7-nP5oHPvARq7dmtoahM0GRFWXQpum0BXVk,1082
117
118
  biopipen/scripts/scrna/ExprImpution.R,sha256=7768ezrr59xUZDXq8lO9jj2XhnkSsx-xxBmOD9_DO7c,313
118
119
  biopipen/scripts/scrna/GeneExpressionInvistigation.R,sha256=FI5MWic3xRml2DN7ONcyT7pbceOnL30Zd4nBHRZRFNQ,3800
119
- biopipen/scripts/scrna/MarkersFinder.R,sha256=E99kJOoDV0PvDZgbST3y3Sny3behrWr3GFnMGSAUFF8,8027
120
+ biopipen/scripts/scrna/MarkersFinder.R,sha256=eMLIpJxrdCUNDGmdLL90rNSfKyn6hwxmBvUC4A1S_SE,10089
120
121
  biopipen/scripts/scrna/MetaMarkers.R,sha256=d0fPCkD12F9GUes6UkxvMe0e9YqWuV64uRWAcvzJe8s,8331
121
122
  biopipen/scripts/scrna/ModuleScoreCalculator.R,sha256=0mLGoTvJRpTbCnmuYbYKqZnP3ZdJQkTn6getJddBKRs,2495
122
123
  biopipen/scripts/scrna/RadarPlots.R,sha256=5J1G4c1qfjyI2OTbyDj-XHV8mmh8KWkkuLbO-BNURao,6955
123
124
  biopipen/scripts/scrna/SCImpute.R,sha256=dSJOHhmJ3x_72LBRXT72dbCti5oiB85CJ-OjWtqONbk,2958
124
125
  biopipen/scripts/scrna/ScFGSEA.R,sha256=puEc9HLzPuHg8DFY6b0Kie3Iyra0H1D4h0r4VX7iIis,4953
125
- biopipen/scripts/scrna/SeuratClusterStats.R,sha256=HogtO4sVYtH22qMWaQhUDMGBWC3Eo7QU03YGmRzRog0,15740
126
- biopipen/scripts/scrna/SeuratClustering.R,sha256=H0ix8fJQ-G5i8KLSSK3RJWcKzRCcaHcXRp8LEoXtSiY,6700
126
+ biopipen/scripts/scrna/SeuratClusterStats-dimplots.R,sha256=DKSs32gv1fxpdeCOrrys5n_FnEKEaQniEoKDhrXTvKE,1197
127
+ biopipen/scripts/scrna/SeuratClusterStats-features.R,sha256=vsDys4zTeMfkCYlUnxGt8kO0MuMPozCWu5FjORizY7Y,7438
128
+ biopipen/scripts/scrna/SeuratClusterStats-stats.R,sha256=IKEGW5mjXqpIKbfSLmHm4az2BJqxzM8-Bklnw3ZqhK8,3636
129
+ biopipen/scripts/scrna/SeuratClusterStats.R,sha256=Xst0PIyqrk9QT76hjHW8tK0BZeUHiSoHzGg4l86_i3M,556
130
+ biopipen/scripts/scrna/SeuratClustering.R,sha256=Q_QGWPay3mdn3sy42Ym6kDNnowqctOfcXBSwVUIM-OM,7476
127
131
  biopipen/scripts/scrna/SeuratFilter.R,sha256=BrYK0MLdaTtQvInMaQsmOt7oH_hlks0M1zykkJtg2lM,509
128
132
  biopipen/scripts/scrna/SeuratLoading.R,sha256=ekWKnHIqtQb3kHVQiVymAHXXqiUxs6KKefjZKjaykmk,900
129
133
  biopipen/scripts/scrna/SeuratMap2Ref.R,sha256=TkDSDc5do6BOtkAq3fS5HpjyqWUwsu8YqRC5in62oz8,3750
@@ -161,11 +165,22 @@ biopipen/scripts/tcr/Immunarch-tracking.R,sha256=Zkzh6-9GZS8aKBRyZAegfoiVNUvKu0z
161
165
  biopipen/scripts/tcr/Immunarch.R,sha256=Cb7ZkA2-vyqGg4MNQ7C-yFlAXF_kNUo3Q7oak0uNcyo,1214
162
166
  biopipen/scripts/tcr/Immunarch2VDJtools.R,sha256=QB9ILGbnsfoWaRANK6ceb14wpSWy8F1V1EdEmfIqiks,706
163
167
  biopipen/scripts/tcr/ImmunarchFilter.R,sha256=o25O36FwH_0w6F8DFQ0SfpcwDzlzaGefXqr9ESrvb4k,3974
164
- biopipen/scripts/tcr/ImmunarchLoading.R,sha256=HS-80YtWB7MYbc8G7pjQB2Ug7X14PHWkCl40JVorg1A,5824
168
+ biopipen/scripts/tcr/ImmunarchLoading.R,sha256=vg3Pg_5JvcOiT3gOrWn98EpjGi8r7SYh52hhvfSyyzk,5836
165
169
  biopipen/scripts/tcr/ImmunarchSplitIdents.R,sha256=FGCeGV0uSmFU91lKkldUAeV4A2m3hHw5X4GNi8ffGzI,1873
166
170
  biopipen/scripts/tcr/SampleDiversity.R,sha256=jQ1OU3b8vswD8tZhLt3fkcqJKrl2bhQX0giHM2rXz3Y,2643
167
171
  biopipen/scripts/tcr/TCRClustering.R,sha256=yfIiCMQuywjoJnAXwRJjlJsoYIA8swUMKIt_AsGvHQY,8566
168
172
  biopipen/scripts/tcr/TCRClusteringStats.R,sha256=_NoMX_c22WIYZlVXkJjs7c3YsmEqhWJJl6cXMFa_Srw,8297
173
+ biopipen/scripts/tcr/TESSA.R,sha256=9WuLf3Di4PKb6zclPy7FW1EBITB2He-1vTMRy2vhQI8,6317
174
+ biopipen/scripts/tcr/TESSA_source/Atchley_factors.csv,sha256=SumqDOqP67P54uM7Cuc5_O_rySTWcGo7eX3psMSPX9s,763
175
+ biopipen/scripts/tcr/TESSA_source/BriseisEncoder.py,sha256=z4_Q_6StymffuUGGjHP1-B3aTsXtamKao5Q1-Kg9has,6831
176
+ biopipen/scripts/tcr/TESSA_source/MCMC_control.R,sha256=93Nnz0IG8KfFnVscZDvmBp1qccZoSoG_jIVpOWBQLHE,2911
177
+ biopipen/scripts/tcr/TESSA_source/TrainedEncoder.h5,sha256=U-z4jcmeDXnJQwgmXoIjeJvCj_R8f5wHsesDKYBaF98,457824
178
+ biopipen/scripts/tcr/TESSA_source/fixed_b.csv,sha256=tJJ2hUt0kJLZOxTH_vo_vd_CSbHa_W_JGa3jcOWrczY,570
179
+ biopipen/scripts/tcr/TESSA_source/initialization.R,sha256=TYPciAvWrrOVByfxdsqouEk8k9vgB45oW1jQFfUGLjM,3900
180
+ biopipen/scripts/tcr/TESSA_source/post_analysis.R,sha256=6RX7gdqd1N7lQVfwJBgjfEZEwi1Q4r15mpAthsLSSJ8,5090
181
+ biopipen/scripts/tcr/TESSA_source/real_data.R,sha256=tg3BbiTpRVQWBRZStnzkC2jI1PCmxUA0fjWK09J6grw,2448
182
+ biopipen/scripts/tcr/TESSA_source/update.R,sha256=kVrf6zgIkhhiQ2O55XGmxm_DGBKSpcShFgkyPNQIet0,6661
183
+ biopipen/scripts/tcr/TESSA_source/utility.R,sha256=6qbkMV7yp4bgQe718QiASQUAgOE2euiLgAXOeSqrvHQ,374
169
184
  biopipen/scripts/tcr/VJUsage.R,sha256=LjHEbAHW3WriCYiM9-T6Esd4jc6pnoiSxBKTN_YA490,437
170
185
  biopipen/scripts/tcr/vdjtools-patch.sh,sha256=rL5qp2S18CrpqduKkeR1HVmmuWhCVJOMKJXvWiKdYIc,566
171
186
  biopipen/scripts/vcf/TruvariBench.sh,sha256=80yLQ73OzSgsJ4ltzgpcWxYvvX1hFnCG8YSBhhhRQ9Y,765
@@ -195,7 +210,7 @@ biopipen/utils/plot.R,sha256=pzl37PomNeUZPxohHZ2w93j3Fc4T0Qrc62FF-9MTKdw,4417
195
210
  biopipen/utils/reference.py,sha256=6bPSwQa-GiDfr7xLR9a5T64Ey40y24yn3QfQ5wDFZkU,4420
196
211
  biopipen/utils/rnaseq.R,sha256=Ro2B2dG-Z2oVaT5tkwp9RHBz4dp_RF-JcizlM5GYXFs,1298
197
212
  biopipen/utils/vcf.py,sha256=ajXs0M_QghEctlvUlSRjWQIABVF02wPdYd-0LP4mIsU,9377
198
- biopipen-0.17.6.dist-info/METADATA,sha256=cppV1wETU_aHmQNPFqvQrfBeY1BRtvGkYrf7L4GkJo0,863
199
- biopipen-0.17.6.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
200
- biopipen-0.17.6.dist-info/entry_points.txt,sha256=sfI6oDEEuMvAg0KNujE9uu-c29y7IwQQA1_A2sUjPhc,527
201
- biopipen-0.17.6.dist-info/RECORD,,
213
+ biopipen-0.18.0.dist-info/METADATA,sha256=KctoN8cx0TfZIBSFiXSDNvB64FkCD14Z_07zamsbG1w,869
214
+ biopipen-0.18.0.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
215
+ biopipen-0.18.0.dist-info/entry_points.txt,sha256=sfI6oDEEuMvAg0KNujE9uu-c29y7IwQQA1_A2sUjPhc,527
216
+ biopipen-0.18.0.dist-info/RECORD,,