custardpy 0.7.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- custardpy-0.7.7/PKG-INFO +124 -0
- custardpy-0.7.7/README.md +32 -0
- custardpy-0.7.7/custardpy/CannyEdge.py +75 -0
- custardpy-0.7.7/custardpy/Cluster.py +198 -0
- custardpy-0.7.7/custardpy/DEG_boundary_analysis +155 -0
- custardpy-0.7.7/custardpy/DirectionalRelativeFreq.py +102 -0
- custardpy-0.7.7/custardpy/DirectionalityIndex.py +68 -0
- custardpy-0.7.7/custardpy/HiCmodule.py +136 -0
- custardpy-0.7.7/custardpy/InsulationScore.py +129 -0
- custardpy-0.7.7/custardpy/PlotModule.py +311 -0
- custardpy-0.7.7/custardpy/__init__.py +3 -0
- custardpy-0.7.7/custardpy/checkHiCfile.py +23 -0
- custardpy-0.7.7/custardpy/convert_JuicerDump_to_dense.py +52 -0
- custardpy-0.7.7/custardpy/custardpy_clustering_boundary +255 -0
- custardpy-0.7.7/custardpy/custardpy_differential_DRF +236 -0
- custardpy-0.7.7/custardpy/drawSquareMulti +115 -0
- custardpy-0.7.7/custardpy/drawSquarePair +88 -0
- custardpy-0.7.7/custardpy/drawSquareRatioMulti +97 -0
- custardpy-0.7.7/custardpy/drawSquareRatioPair +79 -0
- custardpy-0.7.7/custardpy/drawTriangleMulti +118 -0
- custardpy-0.7.7/custardpy/drawTrianglePair +104 -0
- custardpy-0.7.7/custardpy/generateCmap.py +14 -0
- custardpy-0.7.7/custardpy/getBoundaryfromInsulationScore +44 -0
- custardpy-0.7.7/custardpy/loadData.py +49 -0
- custardpy-0.7.7/custardpy/plotCompartmentGenome +73 -0
- custardpy-0.7.7/custardpy/plotHiCMatrix +41 -0
- custardpy-0.7.7/custardpy/plotHiCfeature +191 -0
- custardpy-0.7.7/custardpy/plotHiCfeature_module.py +401 -0
- custardpy-0.7.7/custardpy/plotInsulationScore +39 -0
- custardpy-0.7.7/custardpy/plotMultiScaleInsulationScore +31 -0
- custardpy-0.7.7/custardpy.egg-info/PKG-INFO +124 -0
- custardpy-0.7.7/custardpy.egg-info/SOURCES.txt +35 -0
- custardpy-0.7.7/custardpy.egg-info/dependency_links.txt +1 -0
- custardpy-0.7.7/custardpy.egg-info/requires.txt +8 -0
- custardpy-0.7.7/custardpy.egg-info/top_level.txt +1 -0
- custardpy-0.7.7/setup.cfg +4 -0
- custardpy-0.7.7/setup.py +60 -0
custardpy-0.7.7/PKG-INFO
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: custardpy
|
|
3
|
+
Version: 0.7.7
|
|
4
|
+
Summary: Hi-C analysis tools by Python3
|
|
5
|
+
Home-page: https://github.com/rnakato/custardpy
|
|
6
|
+
Author: Ryuichiro Nakato
|
|
7
|
+
Author-email: rnakato@iqb.u-tokyo.ac.jp
|
|
8
|
+
License: GPL3.0
|
|
9
|
+
Keywords: Hi-C analysis,3D genome,NGS
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Requires-Python: >=3.7
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
Requires-Dist: numpy>=1.18
|
|
15
|
+
Requires-Dist: pandas>=1.3.0
|
|
16
|
+
Requires-Dist: scipy>=1.3
|
|
17
|
+
Requires-Dist: scikit-learn>=1.0.0
|
|
18
|
+
Requires-Dist: matplotlib>=3.2.2
|
|
19
|
+
Requires-Dist: seaborn>=0.11.1
|
|
20
|
+
Requires-Dist: h1d>=0.2.0
|
|
21
|
+
Requires-Dist: hic-straw>=1.3.0
|
|
22
|
+
Dynamic: author
|
|
23
|
+
Dynamic: author-email
|
|
24
|
+
Dynamic: classifier
|
|
25
|
+
Dynamic: description
|
|
26
|
+
Dynamic: description-content-type
|
|
27
|
+
Dynamic: home-page
|
|
28
|
+
Dynamic: keywords
|
|
29
|
+
Dynamic: license
|
|
30
|
+
Dynamic: requires-dist
|
|
31
|
+
Dynamic: requires-python
|
|
32
|
+
Dynamic: summary
|
|
33
|
+
|
|
34
|
+
# CustardPy: Docker image for 3D genome analysis
|
|
35
|
+
|
|
36
|
+
<img src = "image/CustardPy.jpg" width = 700ptx>
|
|
37
|
+
|
|
38
|
+
This repository contains
|
|
39
|
+
- Source code of **CustardPy** (PyPI),
|
|
40
|
+
- Dockerfile of **CustardPy** Docker image,
|
|
41
|
+
- [Full Manual](https://custardpy.readthedocs.io), and
|
|
42
|
+
- Tutorial data of Hi-C and Micro-C analysis using demo data.
|
|
43
|
+
|
|
44
|
+
## 0. Changelog
|
|
45
|
+
|
|
46
|
+
See [Changelog](https://github.com/rnakato/CustardPy/blob/main/ChangeLog.md)
|
|
47
|
+
|
|
48
|
+
## 1. Installation
|
|
49
|
+
|
|
50
|
+
Docker image is available at [DockerHub](https://hub.docker.com/r/rnakato/custardpy).
|
|
51
|
+
|
|
52
|
+
### 1.1 Docker
|
|
53
|
+
|
|
54
|
+
To use the docker command, type:
|
|
55
|
+
|
|
56
|
+
# Pull docker image
|
|
57
|
+
docker pull rnakato/custardpy
|
|
58
|
+
|
|
59
|
+
# Container login
|
|
60
|
+
docker run --rm -it rnakato/custardpy /bin/bash
|
|
61
|
+
# Execute a command
|
|
62
|
+
docker run --rm -it -v (your directory):/opt/work rnakato/custardpy <command>
|
|
63
|
+
|
|
64
|
+
When calling loops using Juicer HICCUPS, supply the ``--gpus all`` option to allow GPU computation (GPU card needed):
|
|
65
|
+
|
|
66
|
+
docker run --gpus all -it --rm -it -v (your directory):/opt/work rnakato/custardpy call_HiCCUPS.sh
|
|
67
|
+
|
|
68
|
+
- user:password
|
|
69
|
+
- ubuntu:ubuntu
|
|
70
|
+
|
|
71
|
+
### 1.2 Singularity/Apptainer
|
|
72
|
+
|
|
73
|
+
Singularity is the alternative way to use CustardPy.
|
|
74
|
+
With this command, you can build the singularity file (.sif) of CustardPy:
|
|
75
|
+
|
|
76
|
+
singularity build custardpy.sif docker://rnakato/custardpy
|
|
77
|
+
|
|
78
|
+
Instead, you can download the CustardPy singularity image from our [Dropbox](https://www.dropbox.com/scl/fo/lptb68dirr9wcncy77wsv/h?rlkey=whhcaxuvxd1cz4fqoeyzy63bf&dl=0) (We use singularity version 3.8.5).
|
|
79
|
+
|
|
80
|
+
Then you can run CustardPy with the command:
|
|
81
|
+
|
|
82
|
+
singularity exec custardpy.sif <command>
|
|
83
|
+
|
|
84
|
+
Singularity will automatically mount the current directory. If you want to access the files in the other directory, use the `--bind` option, for instance:
|
|
85
|
+
|
|
86
|
+
singularity exec --bind /work custardpy.sif <command>
|
|
87
|
+
|
|
88
|
+
This command mounts the `/work` directory.
|
|
89
|
+
|
|
90
|
+
When calling loops using Juicer HICCUPS, supply ``--nv`` option to allow GPU computation (GPU card needed):
|
|
91
|
+
|
|
92
|
+
singularity exec --bind /work custardpy.sif call_HiCCUPS.sh
|
|
93
|
+
|
|
94
|
+
## 2. Quickstart
|
|
95
|
+
|
|
96
|
+
# download Churros/tutorial directory
|
|
97
|
+
git clone https://github.com/rnakato/CustardPy.git
|
|
98
|
+
cd CustardPy/tutorial/Hi-C/
|
|
99
|
+
|
|
100
|
+
# download fastq and genome data and make index
|
|
101
|
+
bash 00_getdata.sh
|
|
102
|
+
|
|
103
|
+
# Execute Juicer pipeline
|
|
104
|
+
bash QuickStart_juicer.sh
|
|
105
|
+
|
|
106
|
+
## 3. Usage
|
|
107
|
+
|
|
108
|
+
See https://custardpy.readthedocs.io for the detailed Manual.
|
|
109
|
+
|
|
110
|
+
## 4. Build Docker image from Dockerfile
|
|
111
|
+
|
|
112
|
+
First clone and move to the repository
|
|
113
|
+
|
|
114
|
+
git clone https://github.com/rnakato/CustardPy.git
|
|
115
|
+
cd CustardPy/Docker
|
|
116
|
+
|
|
117
|
+
Then type:
|
|
118
|
+
|
|
119
|
+
docker build -f Dokerfile.<version> -t <account>/custardpy_juicer .
|
|
120
|
+
|
|
121
|
+
## 5. Citation
|
|
122
|
+
Please cite this reference when using CustardPy in your study.
|
|
123
|
+
|
|
124
|
+
- Nakato R, Sakata T, Wang J, Nagai LAE, Nagaoka Y, Oba GM, Bando M, Shirahige K, Context-dependent perturbations in chromatin folding and the transcriptome by cohesin and related factors, *Nature Communications*, 2023. doi: [10.1038/s41467-023-41316-4](https://www.nature.com/articles/s41467-023-41316-4)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# CustardPy (PyPI)
|
|
2
|
+
|
|
3
|
+
The core component of CustardPy can be installed using pip3:
|
|
4
|
+
|
|
5
|
+
## Requirements
|
|
6
|
+
|
|
7
|
+
The following are required before installing CustardPy:
|
|
8
|
+
|
|
9
|
+
- Python 3.7+
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
pip3 install custardpy
|
|
14
|
+
|
|
15
|
+
## ChangeLog
|
|
16
|
+
|
|
17
|
+
- 0.7.7 (2026.3.16)
|
|
18
|
+
- Bug fix in `plotInsulationScore` where the matplotlib.pyplot was not imported.
|
|
19
|
+
|
|
20
|
+
- 0.7.6 (2025-06-27)
|
|
21
|
+
- Lint check with Ruff.
|
|
22
|
+
|
|
23
|
+
- 0.7.2 (2024-03-05)
|
|
24
|
+
- Fixed the error messages in `plotHiCfeature_module.py`
|
|
25
|
+
|
|
26
|
+
- 0.7.1 (2024-03-03)
|
|
27
|
+
- Added the function `plot_SamplePair_triu`
|
|
28
|
+
|
|
29
|
+
- 0.7.0 (2024-03-03)
|
|
30
|
+
- Added this ChangeLog
|
|
31
|
+
- Create `plotHiCfeature_module.py`
|
|
32
|
+
- Add `__version__` to __init__.py
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Copyright(c) Ryuichiro Nakato <rnakato@iqb.u-tokyo.ac.jp>
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
6
|
+
|
|
7
|
+
def getlinalg(gradient, wid):
|
|
8
|
+
data = []
|
|
9
|
+
for i in np.arange(gradient.size - wid):
|
|
10
|
+
array = gradient[i:i+wid]
|
|
11
|
+
data.append(np.linalg.norm(array))
|
|
12
|
+
return data
|
|
13
|
+
|
|
14
|
+
def getMaxGradient(arr, val, order_argrelmax, wid, posi):
|
|
15
|
+
from scipy.signal import argrelmax
|
|
16
|
+
index = argrelmax(arr, order=order_argrelmax)
|
|
17
|
+
index = index[0][np.where(arr[index[0]] > val)]
|
|
18
|
+
index += wid # linalgのindexは元配列よりwidだけずれているため
|
|
19
|
+
index = index[np.where((index < posi - 5) | (index > posi + 5))] # 対角線周辺(±5bin)の勾配は無視
|
|
20
|
+
return index
|
|
21
|
+
|
|
22
|
+
def imshowWithvLine(A, posi, index, title, cm):
|
|
23
|
+
fig = plt.figure(figsize=(6, 6))
|
|
24
|
+
ax1 = fig.add_subplot(1,1,1)
|
|
25
|
+
ax1.imshow(A, clim=(-2, 2), cmap=cm)
|
|
26
|
+
ax1.set_title(title)
|
|
27
|
+
plt.hlines(y=posi, xmin=0, xmax=190, colors='black', linewidths=1)
|
|
28
|
+
for x in index:
|
|
29
|
+
plt.vlines(x=x, ymin=0, ymax=190, colors='green', linewidths=1, linestyles='dashed')
|
|
30
|
+
|
|
31
|
+
def getIndexMatrix(linalg, segment_len, limit_val, order_argrelmax, wid):
|
|
32
|
+
indexMatrix = []
|
|
33
|
+
length = int(linalg.shape[0]/segment_len)
|
|
34
|
+
for i in range(length):
|
|
35
|
+
lin = linalg[i*segment_len:(i+1)*segment_len].mean(axis=0)
|
|
36
|
+
index_merged = getMaxGradient(lin, limit_val, order_argrelmax, wid, (i+0.5)*segment_len)
|
|
37
|
+
indexMatrix.append(index_merged)
|
|
38
|
+
|
|
39
|
+
return indexMatrix
|
|
40
|
+
|
|
41
|
+
def imshowWithBoundary(A, indexMatrix, segment_len, cm):
|
|
42
|
+
fig = plt.figure(figsize=(8, 8))
|
|
43
|
+
ax1 = fig.add_subplot(1,1,1)
|
|
44
|
+
ax1.imshow(A, clim=(-2, 2), cmap=cm)
|
|
45
|
+
for x, array in enumerate(indexMatrix):
|
|
46
|
+
for y in array:
|
|
47
|
+
plt.vlines(x=y, ymin=x*segment_len, ymax=(x+1)*segment_len, colors='green', linewidths=1)
|
|
48
|
+
for x, array in enumerate(indexMatrix):
|
|
49
|
+
for y in array:
|
|
50
|
+
plt.hlines(y=y, xmin=x*segment_len, xmax=(x+1)*segment_len, colors='green', linewidths=1)
|
|
51
|
+
|
|
52
|
+
def getPeakfromIndexMatrix(indexMatrix, order):
|
|
53
|
+
import collections
|
|
54
|
+
flatten = []
|
|
55
|
+
for array in indexMatrix:
|
|
56
|
+
flatten.extend(array)
|
|
57
|
+
|
|
58
|
+
dict = collections.Counter(flatten)
|
|
59
|
+
lists = sorted(dict.items())
|
|
60
|
+
x, y = zip(*lists)
|
|
61
|
+
array = np.zeros(max(x)+1)
|
|
62
|
+
for a, b in zip(x,y):
|
|
63
|
+
array[a] = b
|
|
64
|
+
|
|
65
|
+
from scipy.signal import argrelmax
|
|
66
|
+
index = argrelmax(array, order=order)
|
|
67
|
+
index = index[0]
|
|
68
|
+
|
|
69
|
+
fig = plt.figure(figsize=(12, 4))
|
|
70
|
+
plt.plot(array)
|
|
71
|
+
for x in index:
|
|
72
|
+
plt.vlines(x=x, ymin=0, ymax=array.max(), colors='green', linewidths=1, linestyles='dashed')
|
|
73
|
+
plt.show()
|
|
74
|
+
|
|
75
|
+
return index
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# Copyright(c) Ryuichiro Nakato <rnakato@iqb.u-tokyo.ac.jp>
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
def KMeansPlot(matrix, title, ncluster):
|
|
9
|
+
import matplotlib.cm
|
|
10
|
+
from sklearn.cluster import MiniBatchKMeans
|
|
11
|
+
model = MiniBatchKMeans(random_state=0, n_clusters=ncluster, max_iter=10000)
|
|
12
|
+
kmeans = model.fit_predict(matrix)
|
|
13
|
+
color = matplotlib.cm.brg(np.linspace(0,1, np.max(kmeans) - np.min(kmeans)+1))
|
|
14
|
+
|
|
15
|
+
for i in range(np.min(kmeans), np.max(kmeans)+1):
|
|
16
|
+
plt.plot(matrix[kmeans == i][:,0], matrix[kmeans == i][:,1], ".", color=color[i])
|
|
17
|
+
plt.text(matrix[kmeans == i][:,0][0], matrix[kmeans == i][:,1][0], str(i+1), color="black", size=16)
|
|
18
|
+
plt.title(title, size=16)
|
|
19
|
+
|
|
20
|
+
return kmeans
|
|
21
|
+
|
|
22
|
+
# DBSCANでクラスタリング
|
|
23
|
+
def DBSCANPlot(matrix, title, eps):
|
|
24
|
+
from sklearn.cluster import DBSCAN
|
|
25
|
+
import matplotlib.cm
|
|
26
|
+
model = DBSCAN(eps=eps)
|
|
27
|
+
dbscan = model.fit_predict(matrix)
|
|
28
|
+
color = matplotlib.cm.brg(np.linspace(0,1,np.max(dbscan) - np.min(dbscan)+1))
|
|
29
|
+
|
|
30
|
+
for i in range(np.min(dbscan), np.max(dbscan)+1):
|
|
31
|
+
plt.plot(matrix[dbscan == i][:,0], matrix[dbscan == i][:,1], ".", color=color[i])
|
|
32
|
+
plt.text(matrix[dbscan == i][:,0][0], matrix[dbscan == i][:,1][0], str(i+1), color="black", size=16)
|
|
33
|
+
plt.title(title, size=16)
|
|
34
|
+
return dbscan
|
|
35
|
+
|
|
36
|
+
def getSumMatrix(A, boundary):
|
|
37
|
+
submatrix = np.vsplit(A, boundary)
|
|
38
|
+
for i, mat in enumerate(submatrix):
|
|
39
|
+
ms = np.hsplit(mat, boundary)
|
|
40
|
+
for j, m in enumerate(ms):
|
|
41
|
+
if j==0:
|
|
42
|
+
data = m.mean()
|
|
43
|
+
else:
|
|
44
|
+
data = np.r_[data, m.mean()]
|
|
45
|
+
if i==0:
|
|
46
|
+
gs = data
|
|
47
|
+
else:
|
|
48
|
+
gs = np.c_[gs, data]
|
|
49
|
+
return gs
|
|
50
|
+
|
|
51
|
+
def get_ellipse_coords(a=0.0, b=0.0, x=0.0, y=0.0, angle=0.0, k=2):
|
|
52
|
+
""" Draws an ellipse using (360*k + 1) discrete points
|
|
53
|
+
k = 1 means 361 points (degree by degree)
|
|
54
|
+
a = major axis distance,
|
|
55
|
+
b = minor axis distance,
|
|
56
|
+
x = offset along the x-axis
|
|
57
|
+
y = offset along the y-axis
|
|
58
|
+
angle = clockwise rotation [in degrees] of the ellipse;
|
|
59
|
+
* angle=0 : the ellipse is aligned with the positive x-axis
|
|
60
|
+
* angle=30 : rotated 30 degrees clockwise from positive x-axis
|
|
61
|
+
|
|
62
|
+
this function is obtained from : http://scipy-central.org/item/23/2/plot-an-ellipse
|
|
63
|
+
"""
|
|
64
|
+
pts = np.zeros((int(180*k+1), 2))
|
|
65
|
+
|
|
66
|
+
beta = -angle * np.pi/180.0
|
|
67
|
+
sin_beta = np.sin(beta)
|
|
68
|
+
cos_beta = np.cos(beta)
|
|
69
|
+
alpha = -np.radians(np.r_[0.:180.:1j*(180*k+1)])
|
|
70
|
+
|
|
71
|
+
sin_alpha = np.sin(alpha)
|
|
72
|
+
cos_alpha = np.cos(alpha)
|
|
73
|
+
|
|
74
|
+
pts[:, 0] = x + (a * cos_alpha * cos_beta - b * sin_alpha * sin_beta)
|
|
75
|
+
pts[:, 1] = y + (a * cos_alpha * sin_beta + b * sin_alpha * cos_beta)
|
|
76
|
+
|
|
77
|
+
return pts
|
|
78
|
+
|
|
79
|
+
def restore_mat(mat, ref_data, columnname):
|
|
80
|
+
a = pd.concat([ref_data, mat], axis=1, join='outer')
|
|
81
|
+
a = a.iloc[:, ref_data.shape[1]:a.shape[1]]
|
|
82
|
+
return a[columnname].unstack()
|
|
83
|
+
|
|
84
|
+
def plotArc(s, e):
|
|
85
|
+
rad = (e - s)/2
|
|
86
|
+
center = (s + e)/2
|
|
87
|
+
pts = get_ellipse_coords(a=rad, b=1.0, x=center)
|
|
88
|
+
plt.plot(pts[:,0], pts[:,1])
|
|
89
|
+
|
|
90
|
+
def plotVsegmentArc(vsegment, s, e, xstart, resolution):
|
|
91
|
+
def getBed(index, xstart, resolution):
|
|
92
|
+
s = index[0]/resolution - xstart
|
|
93
|
+
e = index[-1]/resolution + 1 - xstart
|
|
94
|
+
common = int((s+e)/2)
|
|
95
|
+
return s,e,common
|
|
96
|
+
|
|
97
|
+
if(s>e): return
|
|
98
|
+
sindex = vsegment[s]
|
|
99
|
+
eindex = vsegment[e]
|
|
100
|
+
s1, e1, c1 = getBed(sindex, xstart, resolution)
|
|
101
|
+
s2, e2, c2 = getBed(eindex, xstart, resolution)
|
|
102
|
+
plotArc(c1, c2)
|
|
103
|
+
# plt.axhline(y=0, xmin=s1, xmax=e1)
|
|
104
|
+
# plt.axhline(y=0, xmin=s2, xmax=e2)
|
|
105
|
+
|
|
106
|
+
def plotVArc(s, e, xstart, resolution):
|
|
107
|
+
def getBed(index, xstart, resolution):
|
|
108
|
+
s = index/resolution - xstart
|
|
109
|
+
e = index/resolution + 1 - xstart
|
|
110
|
+
common = int((s+e)/2)
|
|
111
|
+
return s,e,common
|
|
112
|
+
|
|
113
|
+
if(s>e): return
|
|
114
|
+
s1, e1, c1 = getBed(s, xstart, resolution)
|
|
115
|
+
s2, e2, c2 = getBed(e, xstart, resolution)
|
|
116
|
+
plotArc(c1, c2)
|
|
117
|
+
plt.axhline(y=0, xmin=s1, xmax=e1)
|
|
118
|
+
plt.axhline(y=0, xmin=s2, xmax=e2)
|
|
119
|
+
|
|
120
|
+
def getHead(labels):
|
|
121
|
+
nlabels = len(labels)
|
|
122
|
+
a = []
|
|
123
|
+
for i in range(nlabels):
|
|
124
|
+
for j in range(i+1,nlabels):
|
|
125
|
+
a.append(labels[i] + "-" + labels[j])
|
|
126
|
+
return a
|
|
127
|
+
|
|
128
|
+
def get_corr_allcluster(sub3d, ncluster, kmeans, labels):
|
|
129
|
+
def getmat(df, nlabels):
|
|
130
|
+
corr_mat = df.corr(method='spearman')
|
|
131
|
+
corr_mat = corr_mat.values
|
|
132
|
+
a = []
|
|
133
|
+
for i in range(nlabels-1):
|
|
134
|
+
for x in corr_mat[i,i+1:nlabels]: a.append(x)
|
|
135
|
+
return a
|
|
136
|
+
|
|
137
|
+
nlabels = len(labels)
|
|
138
|
+
for cl in range(ncluster):
|
|
139
|
+
df = pd.DataFrame(sub3d[kmeans==cl])
|
|
140
|
+
a = getmat(df, nlabels)
|
|
141
|
+
if cl==0:
|
|
142
|
+
mat = a
|
|
143
|
+
else:
|
|
144
|
+
mat = np.c_[mat, a]
|
|
145
|
+
|
|
146
|
+
df = pd.DataFrame(sub3d)
|
|
147
|
+
a = getmat(df, nlabels)
|
|
148
|
+
mat = np.c_[mat, a]
|
|
149
|
+
|
|
150
|
+
corr_allcluster = pd.DataFrame(mat.T, columns=getHead(labels))
|
|
151
|
+
corr_allcluster = corr_allcluster.rename(index={ncluster: 'All'})
|
|
152
|
+
return corr_allcluster
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def draw_samples_whole(matrix, labels, cm):
|
|
156
|
+
nsample = matrix.shape[0]
|
|
157
|
+
plt.figure(figsize=(16, 6))
|
|
158
|
+
for i in range(nsample):
|
|
159
|
+
ax = plt.subplot(1, nsample+1, i+1)
|
|
160
|
+
plt.imshow(matrix[i], clim=(-2, 2), cmap=cm)
|
|
161
|
+
ax.set_title(labels[i])
|
|
162
|
+
plt.tight_layout()
|
|
163
|
+
plt.show()
|
|
164
|
+
|
|
165
|
+
def addzero_to_3dmatrix(mat, difflength, resolution):
|
|
166
|
+
diff = difflength / resolution
|
|
167
|
+
lim_pzero = 0.1
|
|
168
|
+
mat[np.isnan(mat)] = 0
|
|
169
|
+
index_zero = np.sum(np.sum(mat, axis=0)>0, axis=1)/mat.shape[1] < lim_pzero
|
|
170
|
+
mat[:,index_zero] = 0
|
|
171
|
+
mat[:,:,index_zero] = 0
|
|
172
|
+
for i in range(mat.shape[1]):
|
|
173
|
+
for j in range(mat.shape[1]):
|
|
174
|
+
if j < i: mat[:,i,j] = 0
|
|
175
|
+
if j > i + diff: mat[:,i,j] = 0
|
|
176
|
+
|
|
177
|
+
def make_refdata(ref_matrix, ref, labels, resolution, difflim, cm):
|
|
178
|
+
import copy
|
|
179
|
+
matrix = copy.deepcopy(ref_matrix)
|
|
180
|
+
# 0が多い行・列を0に
|
|
181
|
+
# 10M以上離れた領域も0に
|
|
182
|
+
addzero_to_3dmatrix(matrix, 10000000, resolution)
|
|
183
|
+
|
|
184
|
+
draw_samples_whole(matrix, labels, cm)
|
|
185
|
+
|
|
186
|
+
ref_data = matrix.reshape(ref_matrix.shape[0], ref_matrix.shape[1]*ref_matrix.shape[2]).T
|
|
187
|
+
ref_data = pd.DataFrame(ref_data, index=ref.index, columns=labels)
|
|
188
|
+
ref_data.index.names = ['position1', 'position2']
|
|
189
|
+
return ref_data
|
|
190
|
+
|
|
191
|
+
def make_sub3d(_ref_data):
|
|
192
|
+
# 0を1つでも含む行を削除
|
|
193
|
+
mat = _ref_data
|
|
194
|
+
nonzero = np.logical_not((mat == 0).any(axis=1))
|
|
195
|
+
mat = mat[nonzero]
|
|
196
|
+
# plt.imshow(restore_mat(sub3d, _ref_data, "Rad21").iloc[550:650,550:650], clim=(-2, 2), cmap=cm)
|
|
197
|
+
# plt.show()
|
|
198
|
+
return mat
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
#! /usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Copyright(c) Ryuichiro Nakato <rnakato@iqb.u-tokyo.ac.jp>
|
|
4
|
+
# All rights reserved.
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import pandas as pd
|
|
10
|
+
import subprocess
|
|
11
|
+
import numpy as np
|
|
12
|
+
import random
|
|
13
|
+
from pybedtools import BedTool
|
|
14
|
+
import matplotlib.pyplot as plt
|
|
15
|
+
import matplotlib.patches as mpatches
|
|
16
|
+
|
|
17
|
+
def calculate_ratio(border_bed, gene_bed, allgene_bed):
|
|
18
|
+
gene_all = allgene_bed.intersect(border_bed, u=True)
|
|
19
|
+
gene_deg = gene_bed.intersect(border_bed, u=True)
|
|
20
|
+
|
|
21
|
+
gene_num = len(gene_all)
|
|
22
|
+
deg_num = len(gene_deg)
|
|
23
|
+
|
|
24
|
+
return deg_num / gene_num if gene_num != 0 else 0
|
|
25
|
+
|
|
26
|
+
def back_function(border, gene_bed, permutation_times, len_border, allgene_bed):
|
|
27
|
+
dist_randomRatio = []
|
|
28
|
+
for _ in range(permutation_times):
|
|
29
|
+
randomBorder = border.sample(len_border)
|
|
30
|
+
randomBorder_bed = BedTool.from_dataframe(randomBorder)
|
|
31
|
+
randomRatio = calculate_ratio(randomBorder_bed, gene_bed, allgene_bed)
|
|
32
|
+
dist_randomRatio.append(randomRatio)
|
|
33
|
+
|
|
34
|
+
d = np.array(dist_randomRatio)
|
|
35
|
+
return [np.quantile(d, quantile) for quantile in [0.25, 0.75, 0.05, 0.95, 0.025, 0.975]]
|
|
36
|
+
|
|
37
|
+
def plot_graph(df, outputname):
|
|
38
|
+
plt.rcParams['font.size'] = '12'
|
|
39
|
+
|
|
40
|
+
plt.plot(df["Distance"]/1000,df["Ratio"],"m")
|
|
41
|
+
plt.fill_between(df["Distance"]/1000,df["low50"],df["high50"],color="grey",alpha=0.5)
|
|
42
|
+
plt.fill_between(df["Distance"]/1000,df["low90"],df["high90"],color="grey",alpha=0.3)
|
|
43
|
+
plt.fill_between(df["Distance"]/1000,df["low95"],df["high95"],color="grey",alpha=0.1)
|
|
44
|
+
plt.xlabel("Distance from TAD boundary (kb)",fontsize=15)
|
|
45
|
+
plt.ylabel("Fraction of DEGs",fontsize=15)
|
|
46
|
+
|
|
47
|
+
q50 = mpatches.Patch(color='grey',alpha=0.5,label='50% quantile')
|
|
48
|
+
q90 = mpatches.Patch(color='grey',alpha=0.3,label='90% quantile')
|
|
49
|
+
q95 = mpatches.Patch(color='grey',alpha=0.1,label='95% quantile')
|
|
50
|
+
plt.legend(handles=[q50, q90, q95],fontsize=12)
|
|
51
|
+
|
|
52
|
+
plt.savefig(outputname)
|
|
53
|
+
|
|
54
|
+
def set_border(border, i):
|
|
55
|
+
border_temp = border.copy()
|
|
56
|
+
border_temp[1] = np.maximum(border_temp[1] - i, 0)
|
|
57
|
+
border_temp[2] = border_temp[2] + i
|
|
58
|
+
return border_temp
|
|
59
|
+
|
|
60
|
+
def permutation_test_ratio(border, allborder, gene_bed, allgene_bed, permutation_times, max_distance, distance_step):
|
|
61
|
+
select_ratios = []
|
|
62
|
+
random_ratios = []
|
|
63
|
+
positions = []
|
|
64
|
+
|
|
65
|
+
for i in range(0, max_distance +1, distance_step):
|
|
66
|
+
print(f"Distance {i} bp")
|
|
67
|
+
border_temp = set_border(border, i)
|
|
68
|
+
allborder_temp = set_border(allborder, i)
|
|
69
|
+
|
|
70
|
+
border_temp_bed = BedTool.from_dataframe(border_temp)
|
|
71
|
+
select_ratios.append(calculate_ratio(border_temp_bed, gene_bed, allgene_bed))
|
|
72
|
+
|
|
73
|
+
len_border = len(border)
|
|
74
|
+
random_ratios.append(back_function(allborder_temp, gene_bed, permutation_times, len_border, allgene_bed))
|
|
75
|
+
positions.append(i)
|
|
76
|
+
|
|
77
|
+
random_ratios = np.array(random_ratios) # convert list of tuples to numpy array
|
|
78
|
+
random_ratios = random_ratios.transpose() # transpose array to get separate arrays for each quantile
|
|
79
|
+
|
|
80
|
+
return select_ratios, random_ratios, positions
|
|
81
|
+
|
|
82
|
+
def main():
|
|
83
|
+
parser = argparse.ArgumentParser()
|
|
84
|
+
tp = lambda x:list(map(str, x.split(':')))
|
|
85
|
+
parser.add_argument("--border_test", help="<TAD boundary to be tested (BED format)>", type=str, default=None)
|
|
86
|
+
parser.add_argument("--border_control", help="<TAD boundary as background (BED format)>", type=str, default=None)
|
|
87
|
+
parser.add_argument("--gene_test", help="<Genes to be tested (BED format)>", type=str, default=None)
|
|
88
|
+
parser.add_argument("--gene_control", help="<Genes as background (BED format)>", type=str, default=None)
|
|
89
|
+
parser.add_argument("-o", "--output", help="Output name (*.pdf or *.png, default: output.pdf)", type=str, default="output.pdf")
|
|
90
|
+
parser.add_argument("-n", help="Number of permutation (default: 1000)", type=int, default=1000)
|
|
91
|
+
parser.add_argument("--maxdistance", help="Max distance (bp, default: 300000)", type=int, default=300000)
|
|
92
|
+
parser.add_argument("--step", help="Step of distance (bp, default: 10000)", type=int, default=10000)
|
|
93
|
+
|
|
94
|
+
args = parser.parse_args()
|
|
95
|
+
# print(args)
|
|
96
|
+
|
|
97
|
+
if args.border_test is None:
|
|
98
|
+
print ("Error: specify --border_test.")
|
|
99
|
+
parser.print_help()
|
|
100
|
+
exit()
|
|
101
|
+
if args.border_control is None:
|
|
102
|
+
print ("Error: specify --border_control.")
|
|
103
|
+
parser.print_help()
|
|
104
|
+
exit()
|
|
105
|
+
if args.gene_test is None:
|
|
106
|
+
print ("Error: specify --gene_test.")
|
|
107
|
+
parser.print_help()
|
|
108
|
+
exit()
|
|
109
|
+
if args.gene_control is None:
|
|
110
|
+
print ("Error: specify --gene_control.")
|
|
111
|
+
parser.print_help()
|
|
112
|
+
exit()
|
|
113
|
+
|
|
114
|
+
print (" TAD boundary to be tested: " + args.border_test)
|
|
115
|
+
print (" TAD boundary as background: " + args.border_control)
|
|
116
|
+
print (" Genes to be tested: " + args.gene_test)
|
|
117
|
+
print (" Genes as background: " + args.gene_control)
|
|
118
|
+
print (" Permutation time: " + str(args.n))
|
|
119
|
+
print (" Max distance: " + str(args.maxdistance) + " bp")
|
|
120
|
+
print (" Step of distance: " + str(args.step) + " bp")
|
|
121
|
+
print (" Output file: " + args.output)
|
|
122
|
+
|
|
123
|
+
border = pd.read_csv(args.border_test, sep="\t", header=None)
|
|
124
|
+
allborder = pd.read_csv(args.border_control, sep="\t", header=None)
|
|
125
|
+
gene_bed = BedTool(args.gene_test)
|
|
126
|
+
allgene_bed = BedTool(args.gene_control)
|
|
127
|
+
permutation_times = args.n
|
|
128
|
+
outputname = args.output
|
|
129
|
+
max_distance = args.maxdistance
|
|
130
|
+
distance_step = args.step
|
|
131
|
+
|
|
132
|
+
select_ratios, random_ratios, positions = permutation_test_ratio(border,
|
|
133
|
+
allborder,
|
|
134
|
+
gene_bed,
|
|
135
|
+
allgene_bed,
|
|
136
|
+
permutation_times,
|
|
137
|
+
max_distance,
|
|
138
|
+
distance_step)
|
|
139
|
+
|
|
140
|
+
df = pd.DataFrame({
|
|
141
|
+
'Distance': positions,
|
|
142
|
+
'Ratio': select_ratios,
|
|
143
|
+
'low50': random_ratios[0],
|
|
144
|
+
'high50': random_ratios[1],
|
|
145
|
+
'low90': random_ratios[2],
|
|
146
|
+
'high90': random_ratios[3],
|
|
147
|
+
'low95': random_ratios[4],
|
|
148
|
+
'high95': random_ratios[5]
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
plot_graph(df, outputname)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
if(__name__ == '__main__'):
|
|
155
|
+
main()
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
#! /usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Copyright(c) Ryuichiro Nakato <rnakato@iqb.u-tokyo.ac.jp>
|
|
4
|
+
# All rights reserved.
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from scipy import ndimage
|
|
9
|
+
import argparse
|
|
10
|
+
|
|
11
|
+
def make3dmatrixRatio(samples, smoooth=3):
|
|
12
|
+
n = len(samples)
|
|
13
|
+
Ct = ndimage.median_filter(samples[0].getlog(isNonZero=False), smoooth)
|
|
14
|
+
x, y = Ct.shape
|
|
15
|
+
for i, sample in enumerate(samples[1:]):
|
|
16
|
+
if i==0:
|
|
17
|
+
data = sample.getlog(isNonZero=False)
|
|
18
|
+
Matrix = ndimage.median_filter(data - Ct, smoooth)
|
|
19
|
+
else:
|
|
20
|
+
data = sample.getlog(isNonZero=False)
|
|
21
|
+
M = ndimage.median_filter(data - Ct, smoooth)
|
|
22
|
+
Matrix = np.concatenate((Matrix, M))
|
|
23
|
+
Matrix = Matrix.reshape(n-1,x,y)
|
|
24
|
+
return Matrix
|
|
25
|
+
|
|
26
|
+
def getDirectionalRelativeFreq(mat, resolution, strand, *,
|
|
27
|
+
startdistance=0, distance=2000000):
|
|
28
|
+
if (startdistance >= distance):
|
|
29
|
+
print ("getDirectionalRelativeFreq: Error: startdistance > enddistance")
|
|
30
|
+
exit(1)
|
|
31
|
+
|
|
32
|
+
arraysize = mat.shape[0]
|
|
33
|
+
array = np.zeros(arraysize)
|
|
34
|
+
nbin = int(distance/resolution)
|
|
35
|
+
startbin = int(startdistance/resolution) +1
|
|
36
|
+
for i in range(nbin, arraysize - nbin):
|
|
37
|
+
if (strand == "+"):
|
|
38
|
+
val = mat[i+startbin:i+nbin+1, i].mean()
|
|
39
|
+
else:
|
|
40
|
+
val = mat[i, i-nbin:i-startbin+1].mean()
|
|
41
|
+
array[i] = val
|
|
42
|
+
|
|
43
|
+
return array
|
|
44
|
+
|
|
45
|
+
class DirectionalRelativeFreq:
|
|
46
|
+
def __init__(self, mat, resolution, *, startdistance=0, distance=2000000):
|
|
47
|
+
self.arrayplus = getDirectionalRelativeFreq(mat, resolution, "+", startdistance=startdistance, distance=distance)
|
|
48
|
+
self.arrayminus = getDirectionalRelativeFreq(mat, resolution, "-", startdistance=startdistance, distance=distance)
|
|
49
|
+
|
|
50
|
+
def getarrayplus(self):
|
|
51
|
+
return self.arrayplus
|
|
52
|
+
|
|
53
|
+
def getarrayminus(self):
|
|
54
|
+
return self.arrayminus
|
|
55
|
+
|
|
56
|
+
def getarraydiff(self):
|
|
57
|
+
return self.arrayplus - self.arrayminus
|
|
58
|
+
|
|
59
|
+
def output_DRF(args):
|
|
60
|
+
from custardpy.HiCmodule import JuicerMatrix
|
|
61
|
+
resolution = args.resolution
|
|
62
|
+
samples = []
|
|
63
|
+
samples.append(JuicerMatrix("RPM", args.control, resolution))
|
|
64
|
+
samples.append(JuicerMatrix("RPM", args.input, resolution))
|
|
65
|
+
|
|
66
|
+
smooth_median_filter = 3
|
|
67
|
+
EnrichMatrices = make3dmatrixRatio(samples, smooth_median_filter)
|
|
68
|
+
|
|
69
|
+
# import pdb; pdb.set_trace()
|
|
70
|
+
|
|
71
|
+
drf = DirectionalRelativeFreq(EnrichMatrices[0], resolution)
|
|
72
|
+
if (args.drf_right):
|
|
73
|
+
array = drf.getarrayplus()
|
|
74
|
+
elif (args.drf_left):
|
|
75
|
+
array = drf.getarrayminus()
|
|
76
|
+
else:
|
|
77
|
+
array = drf.getarraydiff()
|
|
78
|
+
|
|
79
|
+
df = pd.DataFrame(array)
|
|
80
|
+
df.columns = ["DRF"]
|
|
81
|
+
df["chr"] = args.chr
|
|
82
|
+
df["start"] = np.arange(len(array)) * resolution
|
|
83
|
+
df["end"] = df["start"] + resolution
|
|
84
|
+
df = df.loc[:,["chr","start","end","DRF"]]
|
|
85
|
+
|
|
86
|
+
df.to_csv(args.output + ".bedGraph", sep="\t", header=False, index=False)
|
|
87
|
+
# np.savetxt(args.output, array, fmt="%0.6f")
|
|
88
|
+
|
|
89
|
+
if(__name__ == '__main__'):
|
|
90
|
+
parser = argparse.ArgumentParser()
|
|
91
|
+
parser.add_argument("input", help="Input matrix", type=str)
|
|
92
|
+
parser.add_argument("control", help="Control matrix", type=str)
|
|
93
|
+
parser.add_argument("output", help="Output prefix", type=str)
|
|
94
|
+
parser.add_argument("chr", help="chromosome", type=str)
|
|
95
|
+
parser.add_argument("resolution", help="Resolution of the input matrix", type=int)
|
|
96
|
+
parser.add_argument("--drf_right", help="(with --drf) plot DirectionalRelativeFreq (Right)", action='store_true')
|
|
97
|
+
parser.add_argument("--drf_left", help="(with --drf) plot DirectionalRelativeFreq (Left)", action='store_true')
|
|
98
|
+
|
|
99
|
+
args = parser.parse_args()
|
|
100
|
+
print(args)
|
|
101
|
+
|
|
102
|
+
output_DRF(args)
|