biopipen 0.31.7__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

@@ -5,220 +5,222 @@ import subprocess as sp
5
5
  import pandas as pd
6
6
  from GIANA4 import *
7
7
 
8
- def CreateReference(rFile, outdir='./', Vgene=True, ST=3):
8
+
9
+ def CreateReference(rFile, outdir="./", Vgene=True, ST=3):
9
10
  ## convert input reference file into a python workplace
10
- h=open(rFile)
11
- alines=h.readlines()
12
- ww=alines[0].strip().split('\t')
13
- if not ww[0].startswith('C'):
11
+ h = open(rFile)
12
+ alines = h.readlines()
13
+ ww = alines[0].strip().split("\t")
14
+ if not ww[0].startswith("C"):
14
15
  ## header line
15
- hline=alines[0]
16
- alines=alines[1:]
17
- elif 'CDR3' in ww[0]:
18
- hline=alines[0]
19
- alines=alines[1:]
16
+ hline = alines[0]
17
+ alines = alines[1:]
18
+ elif "CDR3" in ww[0]:
19
+ hline = alines[0]
20
+ alines = alines[1:]
20
21
  else:
21
- hline='CDR3\t'+'\t'.join(['Info'+str(x) for x in range(len(ww)-1)])
22
- seqs=[]
23
- vgs=[]
24
- infoList=[]
25
- count=0
22
+ hline = "CDR3\t" + "\t".join(["Info" + str(x) for x in range(len(ww) - 1)])
23
+ seqs = []
24
+ vgs = []
25
+ infoList = []
26
+ count = 0
26
27
  for ll in alines:
27
- ww=ll.strip().split('\t')
28
- cdr3=ww[0]
29
- if '*' in cdr3:
28
+ ww = ll.strip().split("\t")
29
+ cdr3 = ww[0]
30
+ if "*" in cdr3:
30
31
  continue
31
- if '_' in cdr3:
32
+ if "_" in cdr3:
32
33
  continue
33
34
  seqs.append(ww[0])
34
35
  if Vgene:
35
36
  vgs.append(ww[1])
36
- infoList.append('\t'.join(ww[1:]))
37
+ infoList.append("\t".join(ww[1:]))
37
38
  else:
38
- infoList.append('\t'.join(ww[1:]))
39
- count+=1
40
- LD,VD, ID,SD= BuildLengthDict(seqs, vGene=vgs,INFO=infoList,sIDs=[x for x in range(len(seqs))])
39
+ infoList.append("\t".join(ww[1:]))
40
+ count += 1
41
+ LD, VD, ID, SD = BuildLengthDict(
42
+ seqs, vGene=vgs, INFO=infoList, sIDs=[x for x in range(len(seqs))]
43
+ )
41
44
  LDu_r, VDu_r, IDu_r, SDu_r = CollapseUnique(LD, VD, ID, SD)
42
- flagLD_r={}
43
- dMD_r={}
45
+ flagLD_r = {}
46
+ dMD_r = {}
44
47
  for kk in LDu_r:
45
- vss=SDu_r[kk]
46
- vInfo=IDu_r[kk]
47
- flagL=[len(x)-1 for x in vInfo]
48
- flagLD_r[kk]=flagL
49
- dM=np.array([EncodingCDR3(x[ST:-2], M6, n0) for x in vss])
50
- dM=dM.astype("float32")
51
- dMD_r[kk]=dM
52
- ## ff0=re.sub('.txt','',rFile)
53
- ## outfile=outdir+ff0+'_giana_ref.shelve'
54
- ## giana_shelf = shelve.open(outfile, 'n')
55
- ## giana_shelf['flagLD']=flagLD_r
56
- ## giana_shelf['dMD']=dMD_r
57
- ## giana_shelf['LDu']=LDu_r
58
- ## giana_shelf['VDu']=VDu_r
59
- ## giana_shelf['IDu']=IDu_r
60
- ## giana_shelf['SDu']=SDu_r
61
- ## giana_shelf.close()
48
+ vss = SDu_r[kk]
49
+ vInfo = IDu_r[kk]
50
+ flagL = [len(x) - 1 for x in vInfo]
51
+ flagLD_r[kk] = flagL
52
+ dM = np.array([EncodingCDR3(x[ST:-2], M6, n0) for x in vss])
53
+ dM = dM.astype("float32")
54
+ dMD_r[kk] = dM
55
+ ## ff0=re.sub('.txt','',rFile)
56
+ ## outfile=outdir+ff0+'_giana_ref.shelve'
57
+ ## giana_shelf = shelve.open(outfile, 'n')
58
+ ## giana_shelf['flagLD']=flagLD_r
59
+ ## giana_shelf['dMD']=dMD_r
60
+ ## giana_shelf['LDu']=LDu_r
61
+ ## giana_shelf['VDu']=VDu_r
62
+ ## giana_shelf['IDu']=IDu_r
63
+ ## giana_shelf['SDu']=SDu_r
64
+ ## giana_shelf.close()
62
65
  return [LDu_r, VDu_r, IDu_r, SDu_r, dMD_r]
63
66
 
64
- def MakeQuery(qFile, rData=[],dbFile=None, Vgene=True, thr=7, ST=3, thr_s=3.3):
67
+
68
+ def MakeQuery(qFile, rData=[], dbFile=None, Vgene=True, thr=7, ST=3, thr_s=3.3):
65
69
  if dbFile is not None:
66
70
  with shelve.open(dbFile) as db:
67
71
  for key in db:
68
- globals()[key]=db[key]
72
+ globals()[key] = db[key]
69
73
  else:
70
- if len(rData)==0:
71
- raise("Need to provide either a reference file or a shelve")
72
- LDu_r=rData[0]
73
- VDu_r=rData[1]
74
- IDu_r=rData[2]
75
- SDu_r=rData[3]
76
- dMD_r=rData[4]
77
- h=open(qFile)
78
- alines=h.readlines()
79
- ww=alines[0].strip().split('\t')
80
- if not ww[0].startswith('C'):
74
+ if len(rData) == 0:
75
+ raise ("Need to provide either a reference file or a shelve")
76
+ LDu_r = rData[0]
77
+ VDu_r = rData[1]
78
+ IDu_r = rData[2]
79
+ SDu_r = rData[3]
80
+ dMD_r = rData[4]
81
+ h = open(qFile)
82
+ alines = h.readlines()
83
+ ww = alines[0].strip().split("\t")
84
+ if not ww[0].startswith("C"):
81
85
  ## header line
82
- hline=alines[0]
83
- alines=alines[1:]
84
- elif 'CDR3' in ww[0]:
85
- hline=alines[0]
86
- alines=alines[1:]
86
+ hline = alines[0]
87
+ alines = alines[1:]
88
+ elif "CDR3" in ww[0]:
89
+ hline = alines[0]
90
+ alines = alines[1:]
87
91
  else:
88
- hline='CDR3\t'+'\t'.join(['Info'+str(x) for x in range(len(ww)-1)])
89
- seqs=[]
90
- vgs=[]
91
- infoList=[]
92
- count=0
92
+ hline = "CDR3\t" + "\t".join(["Info" + str(x) for x in range(len(ww) - 1)])
93
+ seqs = []
94
+ vgs = []
95
+ infoList = []
96
+ count = 0
93
97
  for ll in alines:
94
- ww=ll.strip().split('\t')
95
- cdr3=ww[0]
96
- if '*' in cdr3:
98
+ ww = ll.strip().split("\t")
99
+ cdr3 = ww[0]
100
+ if "*" in cdr3:
97
101
  continue
98
- if '_' in cdr3:
102
+ if "_" in cdr3:
99
103
  continue
100
104
  seqs.append(ww[0])
101
105
  if Vgene:
102
106
  vgs.append(ww[1])
103
- infoList.append('\t'.join(ww[1:]))
107
+ infoList.append("\t".join(ww[1:]))
104
108
  else:
105
- infoList.append('\t'.join(ww[1:]))
106
- count+=1
107
- LD,VD, ID,SD= BuildLengthDict(seqs, vGene=vgs,INFO=infoList,sIDs=[x for x in range(len(seqs))])
109
+ infoList.append("\t".join(ww[1:]))
110
+ count += 1
111
+ LD, VD, ID, SD = BuildLengthDict(
112
+ seqs, vGene=vgs, INFO=infoList, sIDs=[x for x in range(len(seqs))]
113
+ )
108
114
  LDu, VDu, IDu, SDu = CollapseUnique(LD, VD, ID, SD)
109
- tmpFile='tmp_query.txt'
110
- g=open(tmpFile,'w')
115
+ tmpFile = "tmp_query.txt"
116
+ g = open(tmpFile, "w")
111
117
  for kk in LDu:
112
- vss=SDu[kk]
113
- vInfo=IDu[kk]
114
- vss_r=SDu_r[kk]
115
- vInfo_r=IDu_r[kk]
116
- flagL=[len(x)-1 for x in vInfo]
117
- dM_r=dMD_r[kk]
118
- dM=np.array([EncodingCDR3(x[ST:-2], M6, n0) for x in vss])
119
- dM=dM.astype("float32")
120
- nq=dM.shape[0]
121
- nr=dM_r.shape[0]
122
- vssc=vss+vss_r
123
- vInfoc=vInfo+vInfo_r
124
- dMc=np.concatenate((dM, dM_r))
125
- index = faiss.IndexFlatL2(Ndim*6)
118
+ vss = SDu[kk]
119
+ vInfo = IDu[kk]
120
+ vss_r = SDu_r[kk]
121
+ vInfo_r = IDu_r[kk]
122
+ flagL = [len(x) - 1 for x in vInfo]
123
+ dM_r = dMD_r[kk]
124
+ dM = np.array([EncodingCDR3(x[ST:-2], M6, n0) for x in vss])
125
+ dM = dM.astype("float32")
126
+ nq = dM.shape[0]
127
+ nr = dM_r.shape[0]
128
+ vssc = vss + vss_r
129
+ vInfoc = vInfo + vInfo_r
130
+ dMc = np.concatenate((dM, dM_r))
131
+ index = faiss.IndexFlatL2(Ndim * 6)
126
132
  index.add(dMc)
127
133
  D, I = index.search(dM, 2)
128
- vv=np.where((D[0:nq,1]<=thr))[0]
129
- flagL=np.array(flagL)
130
- vv0=np.where((D[0:nq,1]>thr) & (flagL>0))[0]
131
- curList=[]
134
+ vv = np.where((D[0:nq, 1] <= thr))[0]
135
+ flagL = np.array(flagL)
136
+ vv0 = np.where((D[0:nq, 1] > thr) & (flagL > 0))[0]
137
+ curList = []
132
138
  for v in vv0:
133
139
  for ii in range(len(vInfoc[v])):
134
- line=vssc[v]+'\t'+vInfoc[v][ii]+'\t'+'query\n'
135
- _=g.write(line)
140
+ line = vssc[v] + "\t" + vInfoc[v][ii] + "\t" + "query\n"
141
+ _ = g.write(line)
136
142
  for v in vv:
137
- tmpI=I[v,]
143
+ tmpI = I[v,]
138
144
  if v not in tmpI:
139
- tmpI[0]=v
140
- idx1=tmpI[0]
141
- idx2=tmpI[1]
142
- c1=vssc[idx1]
143
- c2=vssc[idx2]
144
- info1=vInfoc[idx1]
145
- info2=vInfoc[idx2]
145
+ tmpI[0] = v
146
+ idx1 = tmpI[0]
147
+ idx2 = tmpI[1]
148
+ c1 = vssc[idx1]
149
+ c2 = vssc[idx2]
150
+ info1 = vInfoc[idx1]
151
+ info2 = vInfoc[idx2]
146
152
  for tmpInfo in info1:
147
- tup1=(c1, tmpInfo)
153
+ tup1 = (c1, tmpInfo)
148
154
  if tup1 not in curList:
149
- if idx1<nq:
150
- line1=c1+'\t'+tmpInfo+'\t'+'query\n'
155
+ if idx1 < nq:
156
+ line1 = c1 + "\t" + tmpInfo + "\t" + "query\n"
151
157
  else:
152
- line1=c1+'\t'+tmpInfo+'\t'+'ref\n'
153
- _=g.write(line1)
158
+ line1 = c1 + "\t" + tmpInfo + "\t" + "ref\n"
159
+ _ = g.write(line1)
154
160
  curList.append(tup1)
155
161
  for tmpInfo in info2:
156
- tup2=(c2, tmpInfo)
162
+ tup2 = (c2, tmpInfo)
157
163
  if tup2 not in curList:
158
- if idx2<nq:
159
- line2=c2+'\t'+tmpInfo+'\t'+'query\n'
164
+ if idx2 < nq:
165
+ line2 = c2 + "\t" + tmpInfo + "\t" + "query\n"
160
166
  else:
161
- line2=c2+'\t'+tmpInfo+'\t'+'ref\n'
162
- _=g.write(line2)
167
+ line2 = c2 + "\t" + tmpInfo + "\t" + "ref\n"
168
+ _ = g.write(line2)
163
169
  curList.append(tup2)
164
170
  g.close()
165
- cmd='python3 GIANA4.1.py -f tmp_query.txt -S '+str(thr_s) ## updated to GIANA4.1
166
- p=sp.run(cmd, shell=True)
167
-
168
- def MergeExist(refClusterFile, outFile='queryFinal.txt',queryClusterFile='tmp_query--RotationEncodingBL62.txt', direction='q'):
171
+ cmd = "python3 GIANA4.1.py -f tmp_query.txt -S " + str(
172
+ thr_s
173
+ ) ## updated to GIANA4.1
174
+ p = sp.run(cmd, shell=True)
175
+
176
+
177
+ def MergeExist(
178
+ refClusterFile,
179
+ outFile="queryFinal.txt",
180
+ queryClusterFile="tmp_query--RotationEncodingBL62.txt",
181
+ direction="q",
182
+ ):
169
183
  ## This function compare the query file with ref cluster file and merge the two based on shared TCRs
170
184
  ## If direction is 'q', the overlapping clusters will be added to the query file
171
185
  ## If direction is 'r', the overlapping and non-overlapping clusters will be added to the reference file
172
- refT=pd.read_table(refClusterFile, skiprows=2, delimiter='\t', header=None)
173
- queryT=pd.read_table(queryClusterFile, skiprows=2, delimiter='\t', header=None)
174
- nq=queryT.shape[1]
175
- nr=refT.shape[1]
176
- if nr != nq-1:
186
+ refT = pd.read_table(refClusterFile, skiprows=2, delimiter="\t", header=None)
187
+ queryT = pd.read_table(queryClusterFile, skiprows=2, delimiter="\t", header=None)
188
+ nq = queryT.shape[1]
189
+ nr = refT.shape[1]
190
+ if nr != nq - 1:
177
191
  print("ERROR: Make sure reference and the query samples have the same columns!")
178
192
  print("No query file is generated.")
179
193
  return
180
- gn=np.unique(queryT[1])
181
- queryTs=pd.DataFrame([], columns=queryT.columns)
194
+ gn = np.unique(queryT[1])
195
+ queryTs = pd.DataFrame([], columns=queryT.columns)
182
196
  for nn in gn:
183
- tmp_ddq=queryT.loc[np.where(queryT[1]==nn)[0],:]
184
- cls_lab=np.unique(tmp_ddq[nq-1])
185
- if len(cls_lab)==1:
186
- if cls_lab[0]=='ref':
187
- continue
188
- queryTs=queryTs.append(tmp_ddq)
189
- queryTs.index=range(queryTs.shape[0])
190
- keyr=refT[0]+'_'+refT[2]
191
- keyq=queryTs[0]+'_'+queryTs[2]
192
- vvr=np.where(queryTs[nq-1]=='ref')[0]
193
- vvr_in=np.where(keyr.isin(keyq[vvr]))[0]
194
- gn_r=list(refT.loc[vvr_in,1].drop_duplicates())
195
- ddo=pd.DataFrame([], columns=refT.columns)
197
+ tmp_ddq = queryT.loc[np.where(queryT[1] == nn)[0], :]
198
+ cls_lab = np.unique(tmp_ddq[nq - 1])
199
+ if len(cls_lab) == 1:
200
+ if cls_lab[0] == "ref":
201
+ continue
202
+ queryTs = queryTs.append(tmp_ddq)
203
+ queryTs.index = range(queryTs.shape[0])
204
+ keyr = refT[0] + "_" + refT[2]
205
+ keyq = queryTs[0] + "_" + queryTs[2]
206
+ vvr = np.where(queryTs[nq - 1] == "ref")[0]
207
+ vvr_in = np.where(keyr.isin(keyq[vvr]))[0]
208
+ gn_r = list(refT.loc[vvr_in, 1].drop_duplicates())
209
+ ddo = pd.DataFrame([], columns=refT.columns)
196
210
  for nn in gn_r:
197
- tmp_dd=refT.loc[np.where(refT[1]==nn)[0],:]
198
- tmpkey=tmp_dd[0]+'_'+tmp_dd[2]
199
- vv=np.where(keyq.isin(tmpkey))[0][0]
200
- gq=queryTs[1][vv]
201
- tmp_dd[1]=gq
202
- ddo=ddo.append(tmp_dd)
203
- if direction=='q':
204
- ddo[nq-1]='ref'
211
+ tmp_dd = refT.loc[np.where(refT[1] == nn)[0], :]
212
+ tmpkey = tmp_dd[0] + "_" + tmp_dd[2]
213
+ vv = np.where(keyq.isin(tmpkey))[0][0]
214
+ gq = queryTs[1][vv]
215
+ tmp_dd[1] = gq
216
+ ddo = ddo.append(tmp_dd)
217
+ if direction == "q":
218
+ ddo[nq - 1] = "ref"
205
219
  ## remove groups that contain only ref group
206
- queryTs=queryTs.append(ddo)
207
- queryTs=queryTs.drop_duplicates()
208
- queryTs.to_csv(outFile, sep='\t',header=False,index=False)
209
- # queryTs.index=range(queryTs.shape[0])
210
- if direction=='r':
220
+ queryTs = queryTs.append(ddo)
221
+ queryTs = queryTs.drop_duplicates()
222
+ queryTs.to_csv(outFile, sep="\t", header=False, index=False)
223
+ # queryTs.index=range(queryTs.shape[0])
224
+ if direction == "r":
211
225
  ## to be developed
212
226
  pass
213
-
214
-
215
-
216
-
217
-
218
-
219
-
220
-
221
-
222
-
223
-
224
-
@@ -60,10 +60,12 @@ prepare_clustcr = function(clustcr_dir) {
60
60
  }
61
61
  clustcr_source = '
62
62
  import sys
63
- import pandas as pd
64
- import clustcr
65
63
  import atexit
66
64
 
65
+ import pandas as pd
66
+ from scipy import sparse as scipy_sparse
67
+
68
+
67
69
  @atexit.register
68
70
  def clustcr_exit():
69
71
  import pandas as pd
@@ -78,13 +80,32 @@ def clustcr_exit():
78
80
  sys.stderr.write(f"- sklearn: {sklearn.__version__}\\n")
79
81
  sys.stderr.write(f"- matplotlib: {matplotlib.__version__}\\n")
80
82
 
83
+
84
+ # Monkey-patch scipy.sparse.isspmatrix to adopt latest scipy v1.14
85
+ # If not, an error is raised:
86
+ # numpy.linalg.LinAlgError: 0-dimensional array given.
87
+ # Array must be at least two-dimensional
88
+ scipy_sparse.isspmatrix = lambda x: isinstance(
89
+ x,
90
+ (
91
+ scipy_sparse.spmatrix,
92
+ scipy_sparse.csr_array,
93
+ scipy_sparse.csr_matrix,
94
+ scipy_sparse.csc_array,
95
+ scipy_sparse.csc_matrix,
96
+ ),
97
+ )
98
+
99
+
100
+ import clustcr # noqa: #402
101
+
81
102
  clustcr_dir, clustcr_infile = sys.argv[1:3]
82
103
  cdr3df = pd.read_csv(clustcr_infile, index_col=None)
83
104
  cdr3 = cdr3df.iloc[:, 0]
84
105
 
85
- clustering = clustcr.Clustering(%s)
106
+ clustering = clustcr.Clustering()
86
107
  output = clustering.fit(cdr3)
87
- output.clusters_df.to_csv(clustcr_dir + "/clusters.txt", sep="\\t", index=False)
108
+ output.clusters_df.to_csv(clustcr_dir + "/clusters.txt", sep="\t", index=False)
88
109
  '
89
110
  clustcr_file = file.path(clustcr_dir, "_clustcr.py")
90
111
  cat(sprintf(clustcr_source, clustering_args), file=clustcr_file)
@@ -1,7 +1,7 @@
1
1
  suppressPackageStartupMessages(library(rlang))
2
2
  suppressPackageStartupMessages(library(dplyr))
3
3
  suppressPackageStartupMessages(library(tidyr))
4
- suppressPackageStartupMessages(library(immunarch))
4
+ try(suppressPackageStartupMessages(library(immunarch)))
5
5
 
6
6
  #' Expand a Immunarch object into cell-level
7
7
  #'
@@ -114,3 +114,94 @@ immdata_from_expanded <- function(
114
114
  )
115
115
  out
116
116
  }
117
+
118
+ #' Convert Seurat object to Anndata
119
+ #'
120
+ #' @param sobjfile Seurat object file
121
+ #' @param outfile Output file
122
+ #' @param assay Assay to be used
123
+ #'
124
+ #' @export
125
+ seurat_to_anndata <- function(sobjfile, outfile, assay = NULL, log_info, tmpdir = NULL, log_indent = "") {
126
+ library(Seurat)
127
+ library(SeuratDisk)
128
+ library(hdf5r)
129
+ if (endsWith(sobjfile, ".rds") || endsWith(sobjfile, ".RDS")) {
130
+ library(digest)
131
+
132
+ dig <- digest::digest(sobjfile, algo = "md5")
133
+ dig <- substr(dig, 1, 8)
134
+ assay_name <- ifelse(is.null(assay), "", paste0("_", assay))
135
+ tmpdir <- tmpdir %||% dirname(outfile)
136
+ dir.create(tmpdir, showWarnings = FALSE)
137
+ h5seurat_file <- file.path(
138
+ tmpdir,
139
+ paste0(
140
+ tools::file_path_sans_ext(basename(outfile)),
141
+ assay_name, ".", dig, ".h5seurat"
142
+ )
143
+ )
144
+ if (file.exists(h5seurat_file) &&
145
+ (file.mtime(h5seurat_file) < file.mtime(sobjfile))) {
146
+ file.remove(h5seurat_file)
147
+ }
148
+ if (!file.exists(h5seurat_file)) {
149
+ log_info("{log_indent}Reading RDS file ...")
150
+ sobj <- readRDS(sobjfile)
151
+ assay <- assay %||% DefaultAssay(sobj)
152
+ # In order to convert to h5ad
153
+ # https://github.com/satijalab/seurat/issues/8220#issuecomment-1871874649
154
+ sobj$RNAv3 <- as(object = sobj[[assay]], Class = "Assay")
155
+ DefaultAssay(sobj) <- "RNAv3"
156
+ sobj$RNA <- NULL
157
+ sobj <- RenameAssays(sobj, RNAv3 = "RNA")
158
+
159
+ log_info("{log_indent}Saving to H5Seurat file ...")
160
+ SaveH5Seurat(sobj, h5seurat_file)
161
+ rm(sobj)
162
+ gc()
163
+ sobjfile <- h5seurat_file
164
+ } else {
165
+ log_info("{log_indent}Using existing H5Seurat file ...")
166
+ }
167
+ }
168
+
169
+ if (!endsWith(sobjfile, ".h5seurat")) {
170
+ stop(paste0("Unknown input file format: ",
171
+ tools::file_ext(sobjfile),
172
+ ". Supported formats: .rds, .RDS, .h5seurat"))
173
+ }
174
+
175
+ log_info("{log_indent}Converting to Anndata ...")
176
+ Convert(sobjfile, dest = outfile, assay = assay %||% "RNA", overwrite = TRUE)
177
+
178
+ log_info("{log_indent}Fixing categorical data ...")
179
+ # See: https://github.com/mojaveazure/seurat-disk/issues/183
180
+ H5.create_reference <- function(self, ...) {
181
+ space <- self$get_space()
182
+ do.call("[", c(list(space), list(...)))
183
+ ref_type <- hdf5r::h5const$H5R_OBJECT
184
+ ref_obj <- hdf5r::H5R_OBJECT$new(1, self)
185
+ res <- .Call("R_H5Rcreate", ref_obj$ref, self$id, ".", ref_type,
186
+ space$id, FALSE, PACKAGE = "hdf5r")
187
+ if (res$return_val < 0) {
188
+ stop("Error creating object reference")
189
+ }
190
+ ref_obj$ref <- res$ref
191
+ return(ref_obj)
192
+ }
193
+
194
+ h5ad <- H5File$new(outfile, "r+")
195
+ cats <- names(h5ad[["obs/__categories"]])
196
+ for (cat in cats) {
197
+ catname <- paste0("obs/__categories/", cat)
198
+ obsname <- paste0("obs/", cat)
199
+ ref <- H5.create_reference(h5ad[[catname]])
200
+ h5ad[[obsname]]$create_attr(
201
+ attr_name = "categories",
202
+ robj = ref,
203
+ space = H5S$new(type = "scalar")
204
+ )
205
+ }
206
+ h5ad$close()
207
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biopipen
3
- Version: 0.31.7
3
+ Version: 0.32.0
4
4
  Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
5
5
  License: MIT
6
6
  Author: pwwang
@@ -1,4 +1,4 @@
1
- biopipen/__init__.py,sha256=APQVRwZptBFPacKCHqg_tW4g4--qdUtMJoA6GprSuSI,23
1
+ biopipen/__init__.py,sha256=s1FApnNsg-mmWP3shY6ep6EAjZrNFXHlyBXWcP2KV6Q,23
2
2
  biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
4
4
  biopipen/core/config.toml,sha256=7IXvviRicZ2D1h6x3BVgbLJ96nsh-ikvZ0sVlQepqFE,1944
@@ -22,7 +22,7 @@ biopipen/ns/plot.py,sha256=XzLq0A8qCIQRbxhPEdWhEfbRZ8g3e4KriVz0RP8enNY,18078
22
22
  biopipen/ns/protein.py,sha256=33pzM-gvBTw0jH60mvfqnriM6uw2zj3katZ82nC9owI,3309
23
23
  biopipen/ns/regulatory.py,sha256=gJjGVpJrdv-rg2t5UjK4AGuvtLNymaNYNvoD8PhlbvE,15929
24
24
  biopipen/ns/rnaseq.py,sha256=bKAa6friFWof4yDTWZQahm1MS-lrdetO1GqDKdfxXYc,7708
25
- biopipen/ns/scrna.py,sha256=fXP_h7gchcuk_Jwos0IgY_P8ON6Q995OgKHgdrxfvAY,112868
25
+ biopipen/ns/scrna.py,sha256=viIfqTH_0f7kSpHpKfrUf7HUr_w-V_oRYRj1TwuXYNs,120878
26
26
  biopipen/ns/scrna_metabolic_landscape.py,sha256=6AhaynGG3lNRi96N2tReVT46BJMuEwooSSd2irBoN80,28347
27
27
  biopipen/ns/snp.py,sha256=-Jx5Hsv_7KV7TqLU0nHCaPkMEN0CFdi4tNVlyq0rUZ4,27259
28
28
  biopipen/ns/stats.py,sha256=DlPyK5Vsg6ZEkV9SDS3aAw21eXzvOHgqeZDkXPhg7go,20509
@@ -46,6 +46,7 @@ biopipen/reports/delim/SampleInfo.svelte,sha256=Vpjrdd3AXNDNhx2pzDaFA61xPMYAFKvv
46
46
  biopipen/reports/gsea/FGSEA.svelte,sha256=3gNbZovmRWK6QjtxfgZgmft6LUurVZfQyHBRqyGMosk,405
47
47
  biopipen/reports/gsea/GSEA.svelte,sha256=lYHf8h8RLx3i-jNCEGu_LM-dbYm9ZJDzyAEadsZXzmQ,417
48
48
  biopipen/reports/protein/ProdigySummary.svelte,sha256=WEYPwmcmgtSqpFpTvNPV40yZR-7ERa5LgZni2RXxUZQ,348
49
+ biopipen/reports/scrna/CellCellCommunicationPlots.svelte,sha256=McLQtgzbJGnb9o3GC17sRee96e-naPNPIF8SQyRp-Kc,346
49
50
  biopipen/reports/scrna/CellsDistribution.svelte,sha256=Mg6P0tazpzOxsOAtc-0LGEim5KprPt5KIgoW3TzrxxA,413
50
51
  biopipen/reports/scrna/DimPlots.svelte,sha256=ZLbtN0ioevRyEky4jb_DkDGAcpy_jAhaHfFym5ELEPM,479
51
52
  biopipen/reports/scrna/MarkersFinder.svelte,sha256=77rD1psj0VJykPDhfwS-B8mubvaasREAE6RYR2atTN4,444
@@ -53,7 +54,7 @@ biopipen/reports/scrna/MetaMarkers.svelte,sha256=iIFRKjvVYrM1AtDWqq8UfeS8q23R8FK
53
54
  biopipen/reports/scrna/RadarPlots.svelte,sha256=g_fp9d3vdnzk-egXPhkhhfWXOeG569Rj8rYLRIKmlLc,396
54
55
  biopipen/reports/scrna/ScFGSEA.svelte,sha256=Gqt-XjqsB3XgdR3XukvphwyMExZpScwqgEo7AD-gK6g,491
55
56
  biopipen/reports/scrna/SeuratClusterStats.svelte,sha256=rTsoOCiVm7lEyBYgu2ZVcKNGi4g6PynYoOSx0DHdTHo,429
56
- biopipen/reports/scrna/SeuratMap2Ref.svelte,sha256=SYuoKJR1PuBf9c7WvUXXjmokVo-JxFLqZqiu9kTtLuY,542
57
+ biopipen/reports/scrna/SeuratMap2Ref.svelte,sha256=1g-FEmvR2YMn0OF66qESNBWmGdHQY3u8QuG4M-T7Y4E,659
57
58
  biopipen/reports/scrna/SeuratPreparing.svelte,sha256=CCQ3qodjfoQeMZhSZp7irvSqBt10m7urzcWEOHsZa8M,365
58
59
  biopipen/reports/scrna/TopExpressingGenes.svelte,sha256=h2Or2cKCzuY2cypq1PKrqiHFVMZmFUgFOXoQumvfGSk,471
59
60
  biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte,sha256=4M8V6jDv1_ZviTsRdoYNQixoTnKiD06oPHzVabUTq6k,978
@@ -138,7 +139,10 @@ biopipen/scripts/rnaseq/Simulation-ESCO.R,sha256=68cEHDdJclX8P8Q7ey9yBOfK09M_kxl
138
139
  biopipen/scripts/rnaseq/Simulation-RUVcorr.R,sha256=6C6Ke5RLF0fC2V9WQPoFEdqoDabCnhslZBIyB6zhIxc,1155
139
140
  biopipen/scripts/rnaseq/Simulation.R,sha256=PK9tZS88AcBPStcFalZlMU0KE0gSqFSQvhUoQ-8eg90,871
140
141
  biopipen/scripts/rnaseq/UnitConversion.R,sha256=pr1uZuzjapZKwIupE-xtE2HW-OwdVCGBVaHQ_3ipIDw,11377
141
- biopipen/scripts/scrna/AnnData2Seurat.R,sha256=8VEXHnknoEPWRQ0cj80so6O5FAd1_YlwQ0CP0TwmZVo,2555
142
+ biopipen/scripts/scrna/AnnData2Seurat.R,sha256=PohvXtUziu0_A_HL5HaOYG1wSVdLgMaV8yhzEKDQT84,2814
143
+ biopipen/scripts/scrna/CCPlotR-patch.R,sha256=KpB8fwacBaWaUNjIidcLUkMShLjS4Gq9UY8LUgIITB0,8369
144
+ biopipen/scripts/scrna/CellCellCommunication.py,sha256=1txufcxFsniB9UET_L2RA2qaQZfjidjUbUsese_hoGE,2899
145
+ biopipen/scripts/scrna/CellCellCommunicationPlots.R,sha256=KE-yy7TkMjPz4opfQgosr6NfMQQYeg4mgzfDod-RJrQ,6104
142
146
  biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R,sha256=2lkIwtSw2A21RCUBdo3BVUdAwsLTIq65NDA-ZR1YWE0,9644
143
147
  biopipen/scripts/scrna/CellTypeAnnotation-common.R,sha256=s2exGdJ3WM8mDDeBXGUxuKO1DAGVgRiirHaJD0lPgDw,336
144
148
  biopipen/scripts/scrna/CellTypeAnnotation-direct.R,sha256=hmuk3G4IbF66Sv4RmrcrKn8b4J7ZEBD5PzJHa5EKYB8,2125
@@ -159,7 +163,7 @@ biopipen/scripts/scrna/RadarPlots.R,sha256=4zs0hAm7yq1Ls62f_29koPLqAKCeKbYiztNM-
159
163
  biopipen/scripts/scrna/SCImpute.R,sha256=dSJOHhmJ3x_72LBRXT72dbCti5oiB85CJ-OjWtqONbk,2958
160
164
  biopipen/scripts/scrna/ScFGSEA.R,sha256=MFoJ3i3LFBfsPCxwLPnTh141ZJyrzwnrTuCZIFwvYjU,6318
161
165
  biopipen/scripts/scrna/ScSimulation.R,sha256=b2LtL68ucxLoI57tSEDD0hOSbVHUA_x88Y96eK07N-s,1712
162
- biopipen/scripts/scrna/Seurat2AnnData.R,sha256=G7bcHGffdNlz6Uuy98tQdlahXiPkTDokflp1yTUgcSQ,1578
166
+ biopipen/scripts/scrna/Seurat2AnnData.R,sha256=zXkdHEqv6H8RCw1bEPStvYHVpu5oRnioWxz164MgHxQ,276
163
167
  biopipen/scripts/scrna/SeuratClusterStats-clustree.R,sha256=FkbniQMHiZGrFAIuS8nUfPVJKFofSL-ZnpxTqIhTa54,3058
164
168
  biopipen/scripts/scrna/SeuratClusterStats-dimplots.R,sha256=NEdlJHNXnJZfF7YkefYVWTPO8Z_KAppRAs9rNvB8TXs,2360
165
169
  biopipen/scripts/scrna/SeuratClusterStats-features.R,sha256=DeGo7AkBRq0V3Y3JDaifId6rrr5dwawTzcSAJ3W1lxE,15614
@@ -171,7 +175,7 @@ biopipen/scripts/scrna/SeuratClustering-common.R,sha256=JX4Cn2FC6GOcBqaVyGDD3MM5
171
175
  biopipen/scripts/scrna/SeuratClustering.R,sha256=0OKRBQ5rFuupK7c03_sSt2HMwMdMnCYFqTvkRXFKchs,1706
172
176
  biopipen/scripts/scrna/SeuratFilter.R,sha256=BrYK0MLdaTtQvInMaQsmOt7oH_hlks0M1zykkJtg2lM,509
173
177
  biopipen/scripts/scrna/SeuratLoading.R,sha256=ekWKnHIqtQb3kHVQiVymAHXXqiUxs6KKefjZKjaykmk,900
174
- biopipen/scripts/scrna/SeuratMap2Ref.R,sha256=B0RZ2k1IUKdhe34SjU-8CxvYGj7ub-z7JJaSE0snCok,12040
178
+ biopipen/scripts/scrna/SeuratMap2Ref.R,sha256=MP6ttYv8bK5kzWNoP3DXEad06snotVOqCqdvBHsOasc,12776
175
179
  biopipen/scripts/scrna/SeuratMetadataMutater.R,sha256=PMwG0Xvl_EEVKkicfrIi4arEqpY948PkYLkb59kTAXI,1135
176
180
  biopipen/scripts/scrna/SeuratPreparing-common.R,sha256=WuD7lGS17eAUQWSiIdAoV0EIeqS3Tnkkx-7PbP6Q3tc,16279
177
181
  biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R,sha256=TNN2lfFjpnnO0rguMsG38JYCP1nFUhcPLJ1LqGj-Sc8,6674
@@ -212,10 +216,10 @@ biopipen/scripts/tcr/Attach2Seurat.R,sha256=0KZaBkuPvqOBXq4ZG3pzIIua5HL-161K5dVX
212
216
  biopipen/scripts/tcr/CDR3AAPhyschem.R,sha256=ZxHyn4vJF-1PDbnbozovOipET_QZKqeFrKUibRpLgzw,16825
213
217
  biopipen/scripts/tcr/CloneResidency.R,sha256=KAcFB39vTYsk8IEj44s8oSFVhKjpAdJ8hkpKxtdWzRA,21540
214
218
  biopipen/scripts/tcr/CloneSizeQQPlot.R,sha256=hds1C80Q_W40Ikp-BrFfvh_aBf_V61lz-1YAvkDESyk,4569
215
- biopipen/scripts/tcr/GIANA/GIANA.py,sha256=0qLhgCWxT8K-4JvORA03CzBPTT5pd4Di5B_DgrHXbFA,47198
216
- biopipen/scripts/tcr/GIANA/GIANA4.py,sha256=Z7Q3cUr1Pvmy4CFADN0P7i9g1-HbzWROMqk5HvL_F1Q,45762
219
+ biopipen/scripts/tcr/GIANA/GIANA.py,sha256=jo0d58K57CF4W6mc2Q-hQn9rLl6oLHTsr5JceP8xqN0,54874
220
+ biopipen/scripts/tcr/GIANA/GIANA4.py,sha256=X_iXdgKZ9V5T87jXBdml0H6bwiDjsAqoPKhsdF72lj8,53904
217
221
  biopipen/scripts/tcr/GIANA/Imgt_Human_TRBV.fasta,sha256=XUwDPXJxVH5O9Q0gCL6EILKXEwVyiAZXm4VS2vkPcnQ,15371
218
- biopipen/scripts/tcr/GIANA/query.py,sha256=5NWSEDNrJomMt48tzLGpRwJTZB0zQxvMVTilyG8osX8,7298
222
+ biopipen/scripts/tcr/GIANA/query.py,sha256=YJeCk24Hab7XfQxZ5VNesdjr_LVxPH0KH9rwGm1ePf4,7680
219
223
  biopipen/scripts/tcr/Immunarch-basic.R,sha256=2UAQtGEDKgf5LmPeGlNkkAfQOCtdYpCiBMAn-JHOV4A,3036
220
224
  biopipen/scripts/tcr/Immunarch-clonality.R,sha256=NN6WikD-9BKodgKRAbvM2oMLrhn9FUWjqIb69DpHnAk,3744
221
225
  biopipen/scripts/tcr/Immunarch-diversity.R,sha256=6T8-IXIMyy4YXgIA4TXJThy91FqgLVsljqxd3PxLDys,27136
@@ -232,7 +236,7 @@ biopipen/scripts/tcr/ImmunarchLoading.R,sha256=Vw2oIza3mDJzg9kuo-w5jvwdivk4AtDA6
232
236
  biopipen/scripts/tcr/ImmunarchSplitIdents.R,sha256=FGCeGV0uSmFU91lKkldUAeV4A2m3hHw5X4GNi8ffGzI,1873
233
237
  biopipen/scripts/tcr/SampleDiversity.R,sha256=oipN4-2nQZe8bYjI0lZ0SvZ7T8GZ_FWkpkobi1cwmWE,2664
234
238
  biopipen/scripts/tcr/TCRClusterStats.R,sha256=QhXgfKSh27VHO901hDizyGYTXMYFJxW22StchQUq3uE,12906
235
- biopipen/scripts/tcr/TCRClustering.R,sha256=MRCaFbgMhhW9uimVBrE1Q5VqqxQpnKv5W2ILWmHdkU8,9593
239
+ biopipen/scripts/tcr/TCRClustering.R,sha256=Lt_e32c52gJ9fz1abboGXLIR--Lx2dqW0oFlSOCrCVs,10079
236
240
  biopipen/scripts/tcr/TCRDock.py,sha256=jjzxMWp-hs0LDtA1mVbiWDvUieSO7X-F9yeKGy1LSTM,3026
237
241
  biopipen/scripts/tcr/TESSA.R,sha256=XFC2P_e_Gm83jG5EjzVIW6KcyG5IieAaK7sXDA3_oZ0,6864
238
242
  biopipen/scripts/tcr/TESSA_source/Atchley_factors.csv,sha256=SumqDOqP67P54uM7Cuc5_O_rySTWcGo7eX3psMSPX9s,763
@@ -284,9 +288,9 @@ biopipen/utils/mutate_helpers.R,sha256=Bqy6Oi4rrPEPJw0Jq32bVAwwBfZv7JJL9jFcK5x-c
284
288
  biopipen/utils/plot.R,sha256=fmWnCv6EpOU8NvHFvShbdPRqB659vHcDlJAqWIXM8XQ,4415
285
289
  biopipen/utils/reference.py,sha256=oi5evicLwHxF0KAIPNZohBeHJLJQNWFJH0cr2y5pgcg,5873
286
290
  biopipen/utils/rnaseq.R,sha256=Ro2B2dG-Z2oVaT5tkwp9RHBz4dp_RF-JcizlM5GYXFs,1298
287
- biopipen/utils/single_cell.R,sha256=pJjYP8bIZpNAtTQ32rOXhZxaM1Y-6D-xUcK3pql9tbk,4316
291
+ biopipen/utils/single_cell.R,sha256=3jmTV9-kHTXyBA6GRlRf_Ig0jx_5jRr6E1THoa1869k,7564
288
292
  biopipen/utils/vcf.py,sha256=ajXs0M_QghEctlvUlSRjWQIABVF02wPdYd-0LP4mIsU,9377
289
- biopipen-0.31.7.dist-info/METADATA,sha256=dmsDb7Q7iTWvkZjChqUgVqhb2CiQP4j8OA0jBzRYgGY,882
290
- biopipen-0.31.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
291
- biopipen-0.31.7.dist-info/entry_points.txt,sha256=BYqHGBQJxyFDNLYqgH64ycI5PYwnlqwYcCFsMvJgzAU,653
292
- biopipen-0.31.7.dist-info/RECORD,,
293
+ biopipen-0.32.0.dist-info/METADATA,sha256=1kWhdtPEVLqTrBh-9eK1I-Fb7KVCycmWJTA4_HXQLlw,882
294
+ biopipen-0.32.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
295
+ biopipen-0.32.0.dist-info/entry_points.txt,sha256=BYqHGBQJxyFDNLYqgH64ycI5PYwnlqwYcCFsMvJgzAU,653
296
+ biopipen-0.32.0.dist-info/RECORD,,