custardpy 0.7.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. custardpy-0.7.7/PKG-INFO +124 -0
  2. custardpy-0.7.7/README.md +32 -0
  3. custardpy-0.7.7/custardpy/CannyEdge.py +75 -0
  4. custardpy-0.7.7/custardpy/Cluster.py +198 -0
  5. custardpy-0.7.7/custardpy/DEG_boundary_analysis +155 -0
  6. custardpy-0.7.7/custardpy/DirectionalRelativeFreq.py +102 -0
  7. custardpy-0.7.7/custardpy/DirectionalityIndex.py +68 -0
  8. custardpy-0.7.7/custardpy/HiCmodule.py +136 -0
  9. custardpy-0.7.7/custardpy/InsulationScore.py +129 -0
  10. custardpy-0.7.7/custardpy/PlotModule.py +311 -0
  11. custardpy-0.7.7/custardpy/__init__.py +3 -0
  12. custardpy-0.7.7/custardpy/checkHiCfile.py +23 -0
  13. custardpy-0.7.7/custardpy/convert_JuicerDump_to_dense.py +52 -0
  14. custardpy-0.7.7/custardpy/custardpy_clustering_boundary +255 -0
  15. custardpy-0.7.7/custardpy/custardpy_differential_DRF +236 -0
  16. custardpy-0.7.7/custardpy/drawSquareMulti +115 -0
  17. custardpy-0.7.7/custardpy/drawSquarePair +88 -0
  18. custardpy-0.7.7/custardpy/drawSquareRatioMulti +97 -0
  19. custardpy-0.7.7/custardpy/drawSquareRatioPair +79 -0
  20. custardpy-0.7.7/custardpy/drawTriangleMulti +118 -0
  21. custardpy-0.7.7/custardpy/drawTrianglePair +104 -0
  22. custardpy-0.7.7/custardpy/generateCmap.py +14 -0
  23. custardpy-0.7.7/custardpy/getBoundaryfromInsulationScore +44 -0
  24. custardpy-0.7.7/custardpy/loadData.py +49 -0
  25. custardpy-0.7.7/custardpy/plotCompartmentGenome +73 -0
  26. custardpy-0.7.7/custardpy/plotHiCMatrix +41 -0
  27. custardpy-0.7.7/custardpy/plotHiCfeature +191 -0
  28. custardpy-0.7.7/custardpy/plotHiCfeature_module.py +401 -0
  29. custardpy-0.7.7/custardpy/plotInsulationScore +39 -0
  30. custardpy-0.7.7/custardpy/plotMultiScaleInsulationScore +31 -0
  31. custardpy-0.7.7/custardpy.egg-info/PKG-INFO +124 -0
  32. custardpy-0.7.7/custardpy.egg-info/SOURCES.txt +35 -0
  33. custardpy-0.7.7/custardpy.egg-info/dependency_links.txt +1 -0
  34. custardpy-0.7.7/custardpy.egg-info/requires.txt +8 -0
  35. custardpy-0.7.7/custardpy.egg-info/top_level.txt +1 -0
  36. custardpy-0.7.7/setup.cfg +4 -0
  37. custardpy-0.7.7/setup.py +60 -0
@@ -0,0 +1,124 @@
1
+ Metadata-Version: 2.4
2
+ Name: custardpy
3
+ Version: 0.7.7
4
+ Summary: Hi-C analysis tools by Python3
5
+ Home-page: https://github.com/rnakato/custardpy
6
+ Author: Ryuichiro Nakato
7
+ Author-email: rnakato@iqb.u-tokyo.ac.jp
8
+ License: GPL3.0
9
+ Keywords: Hi-C analysis,3D genome,NGS
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.7
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: numpy>=1.18
15
+ Requires-Dist: pandas>=1.3.0
16
+ Requires-Dist: scipy>=1.3
17
+ Requires-Dist: scikit-learn>=1.0.0
18
+ Requires-Dist: matplotlib>=3.2.2
19
+ Requires-Dist: seaborn>=0.11.1
20
+ Requires-Dist: h1d>=0.2.0
21
+ Requires-Dist: hic-straw>=1.3.0
22
+ Dynamic: author
23
+ Dynamic: author-email
24
+ Dynamic: classifier
25
+ Dynamic: description
26
+ Dynamic: description-content-type
27
+ Dynamic: home-page
28
+ Dynamic: keywords
29
+ Dynamic: license
30
+ Dynamic: requires-dist
31
+ Dynamic: requires-python
32
+ Dynamic: summary
33
+
34
+ # CustardPy: Docker image for 3D genome analysis
35
+
36
+ <img src = "image/CustardPy.jpg" width = 700ptx>
37
+
38
+ This repository contains
39
+ - Source code of **CustardPy** (PyPI),
40
+ - Dockerfile of **CustardPy** Docker image,
41
+ - [Full Manual](https://custardpy.readthedocs.io), and
42
+ - Tutorial data of Hi-C and Micro-C analysis using demo data.
43
+
44
+ ## 0. Changelog
45
+
46
+ See [Changelog](https://github.com/rnakato/CustardPy/blob/main/ChangeLog.md)
47
+
48
+ ## 1. Installation
49
+
50
+ Docker image is available at [DockerHub](https://hub.docker.com/r/rnakato/custardpy).
51
+
52
+ ### 1.1 Docker
53
+
54
+ To use the docker command, type:
55
+
56
+ # Pull docker image
57
+ docker pull rnakato/custardpy
58
+
59
+ # Container login
60
+ docker run --rm -it rnakato/custardpy /bin/bash
61
+ # Execute a command
62
+ docker run --rm -it -v (your directory):/opt/work rnakato/custardpy <command>
63
+
64
+ When calling loops using Juicer HICCUPS, supply the ``--gpus all`` option to allow GPU computation (GPU card needed):
65
+
66
+ docker run --gpus all -it --rm -it -v (your directory):/opt/work rnakato/custardpy call_HiCCUPS.sh
67
+
68
+ - user:password
69
+ - ubuntu:ubuntu
70
+
71
+ ### 1.2 Singularity/Apptainer
72
+
73
+ Singularity is the alternative way to use CustardPy.
74
+ With this command, you can build the singularity file (.sif) of CustardPy:
75
+
76
+ singularity build custardpy.sif docker://rnakato/custardpy
77
+
78
+ Instead, you can download the CustardPy singularity image from our [Dropbox](https://www.dropbox.com/scl/fo/lptb68dirr9wcncy77wsv/h?rlkey=whhcaxuvxd1cz4fqoeyzy63bf&dl=0) (We use singularity version 3.8.5).
79
+
80
+ Then you can run CustardPy with the command:
81
+
82
+ singularity exec custardpy.sif <command>
83
+
84
+ Singularity will automatically mount the current directory. If you want to access the files in the other directory, use the `--bind` option, for instance:
85
+
86
+ singularity exec --bind /work custardpy.sif <command>
87
+
88
+ This command mounts the `/work` directory.
89
+
90
+ When calling loops using Juicer HICCUPS, supply ``--nv`` option to allow GPU computation (GPU card needed):
91
+
92
+ singularity exec --bind /work custardpy.sif call_HiCCUPS.sh
93
+
94
+ ## 2. Quickstart
95
+
96
+ # download Churros/tutorial directory
97
+ git clone https://github.com/rnakato/CustardPy.git
98
+ cd CustardPy/tutorial/Hi-C/
99
+
100
+ # download fastq and genome data and make index
101
+ bash 00_getdata.sh
102
+
103
+ # Execute Juicer pipeline
104
+ bash QuickStart_juicer.sh
105
+
106
+ ## 3. Usage
107
+
108
+ See https://custardpy.readthedocs.io for the detailed Manual.
109
+
110
+ ## 4. Build Docker image from Dockerfile
111
+
112
+ First clone and move to the repository
113
+
114
+ git clone https://github.com/rnakato/CustardPy.git
115
+ cd CustardPy/Docker
116
+
117
+ Then type:
118
+
119
+ docker build -f Dokerfile.<version> -t <account>/custardpy_juicer .
120
+
121
+ ## 5. Citation
122
+ Please cite this reference when using CustardPy in your study.
123
+
124
+ - Nakato R, Sakata T, Wang J, Nagai LAE, Nagaoka Y, Oba GM, Bando M, Shirahige K, Context-dependent perturbations in chromatin folding and the transcriptome by cohesin and related factors, *Nature Communications*, 2023. doi: [10.1038/s41467-023-41316-4](https://www.nature.com/articles/s41467-023-41316-4)
@@ -0,0 +1,32 @@
1
+ # CustardPy (PyPI)
2
+
3
+ The core component of CustardPy can be installed using pip3:
4
+
5
+ ## Requirements
6
+
7
+ The following are required before installing CustardPy:
8
+
9
+ - Python 3.7+
10
+
11
+ ## Installation
12
+
13
+ pip3 install custardpy
14
+
15
+ ## ChangeLog
16
+
17
+ - 0.7.7 (2026.3.16)
18
+ - Bug fix in `plotInsulationScore` where the matplotlib.pyplot was not imported.
19
+
20
+ - 0.7.6 (2025-06-27)
21
+ - Lint check with Ruff.
22
+
23
+ - 0.7.2 (2024-03-05)
24
+ - Fixed the error messages in `plotHiCfeature_module.py`
25
+
26
+ - 0.7.1 (2024-03-03)
27
+ - Added the function `plot_SamplePair_triu`
28
+
29
+ - 0.7.0 (2024-03-03)
30
+ - Added this ChangeLog
31
+ - Create `plotHiCfeature_module.py`
32
+ - Add `__version__` to __init__.py
@@ -0,0 +1,75 @@
1
+ # Copyright(c) Ryuichiro Nakato <rnakato@iqb.u-tokyo.ac.jp>
2
+ # All rights reserved.
3
+
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+
7
+ def getlinalg(gradient, wid):
8
+ data = []
9
+ for i in np.arange(gradient.size - wid):
10
+ array = gradient[i:i+wid]
11
+ data.append(np.linalg.norm(array))
12
+ return data
13
+
14
+ def getMaxGradient(arr, val, order_argrelmax, wid, posi):
15
+ from scipy.signal import argrelmax
16
+ index = argrelmax(arr, order=order_argrelmax)
17
+ index = index[0][np.where(arr[index[0]] > val)]
18
+ index += wid # linalgのindexは元配列よりwidだけずれているため
19
+ index = index[np.where((index < posi - 5) | (index > posi + 5))] # 対角線周辺(±5bin)の勾配は無視
20
+ return index
21
+
22
+ def imshowWithvLine(A, posi, index, title, cm):
23
+ fig = plt.figure(figsize=(6, 6))
24
+ ax1 = fig.add_subplot(1,1,1)
25
+ ax1.imshow(A, clim=(-2, 2), cmap=cm)
26
+ ax1.set_title(title)
27
+ plt.hlines(y=posi, xmin=0, xmax=190, colors='black', linewidths=1)
28
+ for x in index:
29
+ plt.vlines(x=x, ymin=0, ymax=190, colors='green', linewidths=1, linestyles='dashed')
30
+
31
+ def getIndexMatrix(linalg, segment_len, limit_val, order_argrelmax, wid):
32
+ indexMatrix = []
33
+ length = int(linalg.shape[0]/segment_len)
34
+ for i in range(length):
35
+ lin = linalg[i*segment_len:(i+1)*segment_len].mean(axis=0)
36
+ index_merged = getMaxGradient(lin, limit_val, order_argrelmax, wid, (i+0.5)*segment_len)
37
+ indexMatrix.append(index_merged)
38
+
39
+ return indexMatrix
40
+
41
+ def imshowWithBoundary(A, indexMatrix, segment_len, cm):
42
+ fig = plt.figure(figsize=(8, 8))
43
+ ax1 = fig.add_subplot(1,1,1)
44
+ ax1.imshow(A, clim=(-2, 2), cmap=cm)
45
+ for x, array in enumerate(indexMatrix):
46
+ for y in array:
47
+ plt.vlines(x=y, ymin=x*segment_len, ymax=(x+1)*segment_len, colors='green', linewidths=1)
48
+ for x, array in enumerate(indexMatrix):
49
+ for y in array:
50
+ plt.hlines(y=y, xmin=x*segment_len, xmax=(x+1)*segment_len, colors='green', linewidths=1)
51
+
52
+ def getPeakfromIndexMatrix(indexMatrix, order):
53
+ import collections
54
+ flatten = []
55
+ for array in indexMatrix:
56
+ flatten.extend(array)
57
+
58
+ dict = collections.Counter(flatten)
59
+ lists = sorted(dict.items())
60
+ x, y = zip(*lists)
61
+ array = np.zeros(max(x)+1)
62
+ for a, b in zip(x,y):
63
+ array[a] = b
64
+
65
+ from scipy.signal import argrelmax
66
+ index = argrelmax(array, order=order)
67
+ index = index[0]
68
+
69
+ fig = plt.figure(figsize=(12, 4))
70
+ plt.plot(array)
71
+ for x in index:
72
+ plt.vlines(x=x, ymin=0, ymax=array.max(), colors='green', linewidths=1, linestyles='dashed')
73
+ plt.show()
74
+
75
+ return index
@@ -0,0 +1,198 @@
1
+ # Copyright(c) Ryuichiro Nakato <rnakato@iqb.u-tokyo.ac.jp>
2
+ # All rights reserved.
3
+
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+ import pandas as pd
7
+
8
+ def KMeansPlot(matrix, title, ncluster):
9
+ import matplotlib.cm
10
+ from sklearn.cluster import MiniBatchKMeans
11
+ model = MiniBatchKMeans(random_state=0, n_clusters=ncluster, max_iter=10000)
12
+ kmeans = model.fit_predict(matrix)
13
+ color = matplotlib.cm.brg(np.linspace(0,1, np.max(kmeans) - np.min(kmeans)+1))
14
+
15
+ for i in range(np.min(kmeans), np.max(kmeans)+1):
16
+ plt.plot(matrix[kmeans == i][:,0], matrix[kmeans == i][:,1], ".", color=color[i])
17
+ plt.text(matrix[kmeans == i][:,0][0], matrix[kmeans == i][:,1][0], str(i+1), color="black", size=16)
18
+ plt.title(title, size=16)
19
+
20
+ return kmeans
21
+
22
+ # DBSCANでクラスタリング
23
+ def DBSCANPlot(matrix, title, eps):
24
+ from sklearn.cluster import DBSCAN
25
+ import matplotlib.cm
26
+ model = DBSCAN(eps=eps)
27
+ dbscan = model.fit_predict(matrix)
28
+ color = matplotlib.cm.brg(np.linspace(0,1,np.max(dbscan) - np.min(dbscan)+1))
29
+
30
+ for i in range(np.min(dbscan), np.max(dbscan)+1):
31
+ plt.plot(matrix[dbscan == i][:,0], matrix[dbscan == i][:,1], ".", color=color[i])
32
+ plt.text(matrix[dbscan == i][:,0][0], matrix[dbscan == i][:,1][0], str(i+1), color="black", size=16)
33
+ plt.title(title, size=16)
34
+ return dbscan
35
+
36
+ def getSumMatrix(A, boundary):
37
+ submatrix = np.vsplit(A, boundary)
38
+ for i, mat in enumerate(submatrix):
39
+ ms = np.hsplit(mat, boundary)
40
+ for j, m in enumerate(ms):
41
+ if j==0:
42
+ data = m.mean()
43
+ else:
44
+ data = np.r_[data, m.mean()]
45
+ if i==0:
46
+ gs = data
47
+ else:
48
+ gs = np.c_[gs, data]
49
+ return gs
50
+
51
+ def get_ellipse_coords(a=0.0, b=0.0, x=0.0, y=0.0, angle=0.0, k=2):
52
+ """ Draws an ellipse using (360*k + 1) discrete points
53
+ k = 1 means 361 points (degree by degree)
54
+ a = major axis distance,
55
+ b = minor axis distance,
56
+ x = offset along the x-axis
57
+ y = offset along the y-axis
58
+ angle = clockwise rotation [in degrees] of the ellipse;
59
+ * angle=0 : the ellipse is aligned with the positive x-axis
60
+ * angle=30 : rotated 30 degrees clockwise from positive x-axis
61
+
62
+ this function is obtained from : http://scipy-central.org/item/23/2/plot-an-ellipse
63
+ """
64
+ pts = np.zeros((int(180*k+1), 2))
65
+
66
+ beta = -angle * np.pi/180.0
67
+ sin_beta = np.sin(beta)
68
+ cos_beta = np.cos(beta)
69
+ alpha = -np.radians(np.r_[0.:180.:1j*(180*k+1)])
70
+
71
+ sin_alpha = np.sin(alpha)
72
+ cos_alpha = np.cos(alpha)
73
+
74
+ pts[:, 0] = x + (a * cos_alpha * cos_beta - b * sin_alpha * sin_beta)
75
+ pts[:, 1] = y + (a * cos_alpha * sin_beta + b * sin_alpha * cos_beta)
76
+
77
+ return pts
78
+
79
+ def restore_mat(mat, ref_data, columnname):
80
+ a = pd.concat([ref_data, mat], axis=1, join='outer')
81
+ a = a.iloc[:, ref_data.shape[1]:a.shape[1]]
82
+ return a[columnname].unstack()
83
+
84
+ def plotArc(s, e):
85
+ rad = (e - s)/2
86
+ center = (s + e)/2
87
+ pts = get_ellipse_coords(a=rad, b=1.0, x=center)
88
+ plt.plot(pts[:,0], pts[:,1])
89
+
90
+ def plotVsegmentArc(vsegment, s, e, xstart, resolution):
91
+ def getBed(index, xstart, resolution):
92
+ s = index[0]/resolution - xstart
93
+ e = index[-1]/resolution + 1 - xstart
94
+ common = int((s+e)/2)
95
+ return s,e,common
96
+
97
+ if(s>e): return
98
+ sindex = vsegment[s]
99
+ eindex = vsegment[e]
100
+ s1, e1, c1 = getBed(sindex, xstart, resolution)
101
+ s2, e2, c2 = getBed(eindex, xstart, resolution)
102
+ plotArc(c1, c2)
103
+ # plt.axhline(y=0, xmin=s1, xmax=e1)
104
+ # plt.axhline(y=0, xmin=s2, xmax=e2)
105
+
106
+ def plotVArc(s, e, xstart, resolution):
107
+ def getBed(index, xstart, resolution):
108
+ s = index/resolution - xstart
109
+ e = index/resolution + 1 - xstart
110
+ common = int((s+e)/2)
111
+ return s,e,common
112
+
113
+ if(s>e): return
114
+ s1, e1, c1 = getBed(s, xstart, resolution)
115
+ s2, e2, c2 = getBed(e, xstart, resolution)
116
+ plotArc(c1, c2)
117
+ plt.axhline(y=0, xmin=s1, xmax=e1)
118
+ plt.axhline(y=0, xmin=s2, xmax=e2)
119
+
120
+ def getHead(labels):
121
+ nlabels = len(labels)
122
+ a = []
123
+ for i in range(nlabels):
124
+ for j in range(i+1,nlabels):
125
+ a.append(labels[i] + "-" + labels[j])
126
+ return a
127
+
128
+ def get_corr_allcluster(sub3d, ncluster, kmeans, labels):
129
+ def getmat(df, nlabels):
130
+ corr_mat = df.corr(method='spearman')
131
+ corr_mat = corr_mat.values
132
+ a = []
133
+ for i in range(nlabels-1):
134
+ for x in corr_mat[i,i+1:nlabels]: a.append(x)
135
+ return a
136
+
137
+ nlabels = len(labels)
138
+ for cl in range(ncluster):
139
+ df = pd.DataFrame(sub3d[kmeans==cl])
140
+ a = getmat(df, nlabels)
141
+ if cl==0:
142
+ mat = a
143
+ else:
144
+ mat = np.c_[mat, a]
145
+
146
+ df = pd.DataFrame(sub3d)
147
+ a = getmat(df, nlabels)
148
+ mat = np.c_[mat, a]
149
+
150
+ corr_allcluster = pd.DataFrame(mat.T, columns=getHead(labels))
151
+ corr_allcluster = corr_allcluster.rename(index={ncluster: 'All'})
152
+ return corr_allcluster
153
+
154
+
155
+ def draw_samples_whole(matrix, labels, cm):
156
+ nsample = matrix.shape[0]
157
+ plt.figure(figsize=(16, 6))
158
+ for i in range(nsample):
159
+ ax = plt.subplot(1, nsample+1, i+1)
160
+ plt.imshow(matrix[i], clim=(-2, 2), cmap=cm)
161
+ ax.set_title(labels[i])
162
+ plt.tight_layout()
163
+ plt.show()
164
+
165
+ def addzero_to_3dmatrix(mat, difflength, resolution):
166
+ diff = difflength / resolution
167
+ lim_pzero = 0.1
168
+ mat[np.isnan(mat)] = 0
169
+ index_zero = np.sum(np.sum(mat, axis=0)>0, axis=1)/mat.shape[1] < lim_pzero
170
+ mat[:,index_zero] = 0
171
+ mat[:,:,index_zero] = 0
172
+ for i in range(mat.shape[1]):
173
+ for j in range(mat.shape[1]):
174
+ if j < i: mat[:,i,j] = 0
175
+ if j > i + diff: mat[:,i,j] = 0
176
+
177
+ def make_refdata(ref_matrix, ref, labels, resolution, difflim, cm):
178
+ import copy
179
+ matrix = copy.deepcopy(ref_matrix)
180
+ # 0が多い行・列を0に
181
+ # 10M以上離れた領域も0に
182
+ addzero_to_3dmatrix(matrix, 10000000, resolution)
183
+
184
+ draw_samples_whole(matrix, labels, cm)
185
+
186
+ ref_data = matrix.reshape(ref_matrix.shape[0], ref_matrix.shape[1]*ref_matrix.shape[2]).T
187
+ ref_data = pd.DataFrame(ref_data, index=ref.index, columns=labels)
188
+ ref_data.index.names = ['position1', 'position2']
189
+ return ref_data
190
+
191
+ def make_sub3d(_ref_data):
192
+ # 0を1つでも含む行を削除
193
+ mat = _ref_data
194
+ nonzero = np.logical_not((mat == 0).any(axis=1))
195
+ mat = mat[nonzero]
196
+ # plt.imshow(restore_mat(sub3d, _ref_data, "Rad21").iloc[550:650,550:650], clim=(-2, 2), cmap=cm)
197
+ # plt.show()
198
+ return mat
@@ -0,0 +1,155 @@
1
+ #! /usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # Copyright(c) Ryuichiro Nakato <rnakato@iqb.u-tokyo.ac.jp>
4
+ # All rights reserved.
5
+
6
+ import argparse
7
+ import os
8
+ import sys
9
+ import pandas as pd
10
+ import subprocess
11
+ import numpy as np
12
+ import random
13
+ from pybedtools import BedTool
14
+ import matplotlib.pyplot as plt
15
+ import matplotlib.patches as mpatches
16
+
17
+ def calculate_ratio(border_bed, gene_bed, allgene_bed):
18
+ gene_all = allgene_bed.intersect(border_bed, u=True)
19
+ gene_deg = gene_bed.intersect(border_bed, u=True)
20
+
21
+ gene_num = len(gene_all)
22
+ deg_num = len(gene_deg)
23
+
24
+ return deg_num / gene_num if gene_num != 0 else 0
25
+
26
+ def back_function(border, gene_bed, permutation_times, len_border, allgene_bed):
27
+ dist_randomRatio = []
28
+ for _ in range(permutation_times):
29
+ randomBorder = border.sample(len_border)
30
+ randomBorder_bed = BedTool.from_dataframe(randomBorder)
31
+ randomRatio = calculate_ratio(randomBorder_bed, gene_bed, allgene_bed)
32
+ dist_randomRatio.append(randomRatio)
33
+
34
+ d = np.array(dist_randomRatio)
35
+ return [np.quantile(d, quantile) for quantile in [0.25, 0.75, 0.05, 0.95, 0.025, 0.975]]
36
+
37
+ def plot_graph(df, outputname):
38
+ plt.rcParams['font.size'] = '12'
39
+
40
+ plt.plot(df["Distance"]/1000,df["Ratio"],"m")
41
+ plt.fill_between(df["Distance"]/1000,df["low50"],df["high50"],color="grey",alpha=0.5)
42
+ plt.fill_between(df["Distance"]/1000,df["low90"],df["high90"],color="grey",alpha=0.3)
43
+ plt.fill_between(df["Distance"]/1000,df["low95"],df["high95"],color="grey",alpha=0.1)
44
+ plt.xlabel("Distance from TAD boundary (kb)",fontsize=15)
45
+ plt.ylabel("Fraction of DEGs",fontsize=15)
46
+
47
+ q50 = mpatches.Patch(color='grey',alpha=0.5,label='50% quantile')
48
+ q90 = mpatches.Patch(color='grey',alpha=0.3,label='90% quantile')
49
+ q95 = mpatches.Patch(color='grey',alpha=0.1,label='95% quantile')
50
+ plt.legend(handles=[q50, q90, q95],fontsize=12)
51
+
52
+ plt.savefig(outputname)
53
+
54
+ def set_border(border, i):
55
+ border_temp = border.copy()
56
+ border_temp[1] = np.maximum(border_temp[1] - i, 0)
57
+ border_temp[2] = border_temp[2] + i
58
+ return border_temp
59
+
60
+ def permutation_test_ratio(border, allborder, gene_bed, allgene_bed, permutation_times, max_distance, distance_step):
61
+ select_ratios = []
62
+ random_ratios = []
63
+ positions = []
64
+
65
+ for i in range(0, max_distance +1, distance_step):
66
+ print(f"Distance {i} bp")
67
+ border_temp = set_border(border, i)
68
+ allborder_temp = set_border(allborder, i)
69
+
70
+ border_temp_bed = BedTool.from_dataframe(border_temp)
71
+ select_ratios.append(calculate_ratio(border_temp_bed, gene_bed, allgene_bed))
72
+
73
+ len_border = len(border)
74
+ random_ratios.append(back_function(allborder_temp, gene_bed, permutation_times, len_border, allgene_bed))
75
+ positions.append(i)
76
+
77
+ random_ratios = np.array(random_ratios) # convert list of tuples to numpy array
78
+ random_ratios = random_ratios.transpose() # transpose array to get separate arrays for each quantile
79
+
80
+ return select_ratios, random_ratios, positions
81
+
82
+ def main():
83
+ parser = argparse.ArgumentParser()
84
+ tp = lambda x:list(map(str, x.split(':')))
85
+ parser.add_argument("--border_test", help="<TAD boundary to be tested (BED format)>", type=str, default=None)
86
+ parser.add_argument("--border_control", help="<TAD boundary as background (BED format)>", type=str, default=None)
87
+ parser.add_argument("--gene_test", help="<Genes to be tested (BED format)>", type=str, default=None)
88
+ parser.add_argument("--gene_control", help="<Genes as background (BED format)>", type=str, default=None)
89
+ parser.add_argument("-o", "--output", help="Output name (*.pdf or *.png, default: output.pdf)", type=str, default="output.pdf")
90
+ parser.add_argument("-n", help="Number of permutation (default: 1000)", type=int, default=1000)
91
+ parser.add_argument("--maxdistance", help="Max distance (bp, default: 300000)", type=int, default=300000)
92
+ parser.add_argument("--step", help="Step of distance (bp, default: 10000)", type=int, default=10000)
93
+
94
+ args = parser.parse_args()
95
+ # print(args)
96
+
97
+ if args.border_test is None:
98
+ print ("Error: specify --border_test.")
99
+ parser.print_help()
100
+ exit()
101
+ if args.border_control is None:
102
+ print ("Error: specify --border_control.")
103
+ parser.print_help()
104
+ exit()
105
+ if args.gene_test is None:
106
+ print ("Error: specify --gene_test.")
107
+ parser.print_help()
108
+ exit()
109
+ if args.gene_control is None:
110
+ print ("Error: specify --gene_control.")
111
+ parser.print_help()
112
+ exit()
113
+
114
+ print (" TAD boundary to be tested: " + args.border_test)
115
+ print (" TAD boundary as background: " + args.border_control)
116
+ print (" Genes to be tested: " + args.gene_test)
117
+ print (" Genes as background: " + args.gene_control)
118
+ print (" Permutation time: " + str(args.n))
119
+ print (" Max distance: " + str(args.maxdistance) + " bp")
120
+ print (" Step of distance: " + str(args.step) + " bp")
121
+ print (" Output file: " + args.output)
122
+
123
+ border = pd.read_csv(args.border_test, sep="\t", header=None)
124
+ allborder = pd.read_csv(args.border_control, sep="\t", header=None)
125
+ gene_bed = BedTool(args.gene_test)
126
+ allgene_bed = BedTool(args.gene_control)
127
+ permutation_times = args.n
128
+ outputname = args.output
129
+ max_distance = args.maxdistance
130
+ distance_step = args.step
131
+
132
+ select_ratios, random_ratios, positions = permutation_test_ratio(border,
133
+ allborder,
134
+ gene_bed,
135
+ allgene_bed,
136
+ permutation_times,
137
+ max_distance,
138
+ distance_step)
139
+
140
+ df = pd.DataFrame({
141
+ 'Distance': positions,
142
+ 'Ratio': select_ratios,
143
+ 'low50': random_ratios[0],
144
+ 'high50': random_ratios[1],
145
+ 'low90': random_ratios[2],
146
+ 'high90': random_ratios[3],
147
+ 'low95': random_ratios[4],
148
+ 'high95': random_ratios[5]
149
+ })
150
+
151
+ plot_graph(df, outputname)
152
+
153
+
154
+ if(__name__ == '__main__'):
155
+ main()
@@ -0,0 +1,102 @@
1
+ #! /usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # Copyright(c) Ryuichiro Nakato <rnakato@iqb.u-tokyo.ac.jp>
4
+ # All rights reserved.
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from scipy import ndimage
9
+ import argparse
10
+
11
+ def make3dmatrixRatio(samples, smoooth=3):
12
+ n = len(samples)
13
+ Ct = ndimage.median_filter(samples[0].getlog(isNonZero=False), smoooth)
14
+ x, y = Ct.shape
15
+ for i, sample in enumerate(samples[1:]):
16
+ if i==0:
17
+ data = sample.getlog(isNonZero=False)
18
+ Matrix = ndimage.median_filter(data - Ct, smoooth)
19
+ else:
20
+ data = sample.getlog(isNonZero=False)
21
+ M = ndimage.median_filter(data - Ct, smoooth)
22
+ Matrix = np.concatenate((Matrix, M))
23
+ Matrix = Matrix.reshape(n-1,x,y)
24
+ return Matrix
25
+
26
+ def getDirectionalRelativeFreq(mat, resolution, strand, *,
27
+ startdistance=0, distance=2000000):
28
+ if (startdistance >= distance):
29
+ print ("getDirectionalRelativeFreq: Error: startdistance > enddistance")
30
+ exit(1)
31
+
32
+ arraysize = mat.shape[0]
33
+ array = np.zeros(arraysize)
34
+ nbin = int(distance/resolution)
35
+ startbin = int(startdistance/resolution) +1
36
+ for i in range(nbin, arraysize - nbin):
37
+ if (strand == "+"):
38
+ val = mat[i+startbin:i+nbin+1, i].mean()
39
+ else:
40
+ val = mat[i, i-nbin:i-startbin+1].mean()
41
+ array[i] = val
42
+
43
+ return array
44
+
45
+ class DirectionalRelativeFreq:
46
+ def __init__(self, mat, resolution, *, startdistance=0, distance=2000000):
47
+ self.arrayplus = getDirectionalRelativeFreq(mat, resolution, "+", startdistance=startdistance, distance=distance)
48
+ self.arrayminus = getDirectionalRelativeFreq(mat, resolution, "-", startdistance=startdistance, distance=distance)
49
+
50
+ def getarrayplus(self):
51
+ return self.arrayplus
52
+
53
+ def getarrayminus(self):
54
+ return self.arrayminus
55
+
56
+ def getarraydiff(self):
57
+ return self.arrayplus - self.arrayminus
58
+
59
+ def output_DRF(args):
60
+ from custardpy.HiCmodule import JuicerMatrix
61
+ resolution = args.resolution
62
+ samples = []
63
+ samples.append(JuicerMatrix("RPM", args.control, resolution))
64
+ samples.append(JuicerMatrix("RPM", args.input, resolution))
65
+
66
+ smooth_median_filter = 3
67
+ EnrichMatrices = make3dmatrixRatio(samples, smooth_median_filter)
68
+
69
+ # import pdb; pdb.set_trace()
70
+
71
+ drf = DirectionalRelativeFreq(EnrichMatrices[0], resolution)
72
+ if (args.drf_right):
73
+ array = drf.getarrayplus()
74
+ elif (args.drf_left):
75
+ array = drf.getarrayminus()
76
+ else:
77
+ array = drf.getarraydiff()
78
+
79
+ df = pd.DataFrame(array)
80
+ df.columns = ["DRF"]
81
+ df["chr"] = args.chr
82
+ df["start"] = np.arange(len(array)) * resolution
83
+ df["end"] = df["start"] + resolution
84
+ df = df.loc[:,["chr","start","end","DRF"]]
85
+
86
+ df.to_csv(args.output + ".bedGraph", sep="\t", header=False, index=False)
87
+ # np.savetxt(args.output, array, fmt="%0.6f")
88
+
89
+ if(__name__ == '__main__'):
90
+ parser = argparse.ArgumentParser()
91
+ parser.add_argument("input", help="Input matrix", type=str)
92
+ parser.add_argument("control", help="Control matrix", type=str)
93
+ parser.add_argument("output", help="Output prefix", type=str)
94
+ parser.add_argument("chr", help="chromosome", type=str)
95
+ parser.add_argument("resolution", help="Resolution of the input matrix", type=int)
96
+ parser.add_argument("--drf_right", help="(with --drf) plot DirectionalRelativeFreq (Right)", action='store_true')
97
+ parser.add_argument("--drf_left", help="(with --drf) plot DirectionalRelativeFreq (Left)", action='store_true')
98
+
99
+ args = parser.parse_args()
100
+ print(args)
101
+
102
+ output_DRF(args)