pymaftools 0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pymaftools-0.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 xu62u4u6
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.1
2
+ Name: pymaftools
3
+ Version: 0.1
4
+ Summary: pymaftools is a Python package for handling and analyzing Mutation Annotation Format (MAF) files. It provides utilities for data manipulation and visualization, including classes for MAF parsing and oncoplot generation.
5
+ Home-page: https://github.com/xu62u4u6/pymaftools
6
+ Author: xu62u4u6
7
+ Author-email: 199928ltyos@gmail.com
8
+ License: MIT
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+
15
+
16
+ # pymaftools
17
+
18
+ `pymaftools` is a Python package designed to handle and analyze MAF (Mutation Annotation Format) files. It provides utilities for working with mutation data, including the `MAF` and `PivotTable` classes for data manipulation, and functions for visualizing mutation data with oncoplots.
19
+
20
+ ## Features
21
+
22
+ - **MAF Class**: A utility to load, parse, and manipulate MAF files.
23
+ - **PivotTable Class**: A custom pivot table implementation for summarizing mutation frequencies and sorting genes and samples.
24
+ - **Oncoplot Visualization**: Generate oncoplot visualizations with mutation data and frequencies.
25
+
26
+ ## Installation
27
+
28
+ ### Using pip (from PyPI)
29
+ You can install the `pymaftools` package directly from PyPI using pip:
30
+
31
+ ```bash
32
+ pip install pymaftools
33
+ ```
34
+
35
+ ### Using GitHub (for the latest version)
36
+ To install directly from GitHub (if you want the latest changes):
37
+
38
+ ```bash
39
+ pip install git+https://github.com/xu62u4u6/pymaftools.git
40
+ ```
41
+
42
+
43
+ ## Usage
44
+
45
+ ### Importing the Package
46
+
47
+ ```python
48
+ from pymaftools.maf_utils import MAF, PivotTable
49
+ from pymaftools.maf_plots import create_oncoplot
50
+ ```
51
+
52
+ ### Example
53
+
54
+ ```python
55
+ # Load the MAF file
56
+ all_case_maf = MAF.read_maf("path_to_maf_file.maf")
57
+
58
+ # Filter to keep only nonsynonymous mutations
59
+ filtered_all_case_maf = all_case_maf.filter_maf(MAF.nonsynonymous_types)
60
+
61
+ # Convert to pivot table (genes x samples table, mutation classification as values)
62
+ pivot_table = filtered_all_case_maf.to_pivot_table()
63
+
64
+ # Calculate mutation frequencies
65
+ pivot_table = pivot_table.add_freq()
66
+
67
+ # Sort the pivot table (by gene frequency and sample mutation count)
68
+ sorted_pivot_table = (pivot_table
69
+ .sort_genes_by_freq()
70
+ .sort_samples_by_mutations()
71
+ )
72
+
73
+ # Generate an oncoplot to show the top 50 genes with the highest mutation frequencies
74
+ create_oncoplot(sorted_pivot_table.top(50),
75
+ figsize=(26, 15),
76
+ ax_main_range=(0, 28),
77
+ ax_freq_range=(28, 29),
78
+ ax_legend_range=(29, 31),
79
+ mutation_counts=True)
80
+
81
+ ```
82
+
83
+ ### Requirements
84
+ Python 3.x
85
+ pandas, numpy, matplotlib, seaborn
86
+ ### License
87
+ This project is licensed under the MIT License - see the LICENSE file for details.
88
+
89
+ ### Author
90
+ xu62u4u6
91
+
@@ -0,0 +1,77 @@
1
+
2
+ # pymaftools
3
+
4
+ `pymaftools` is a Python package designed to handle and analyze MAF (Mutation Annotation Format) files. It provides utilities for working with mutation data, including the `MAF` and `PivotTable` classes for data manipulation, and functions for visualizing mutation data with oncoplots.
5
+
6
+ ## Features
7
+
8
+ - **MAF Class**: A utility to load, parse, and manipulate MAF files.
9
+ - **PivotTable Class**: A custom pivot table implementation for summarizing mutation frequencies and sorting genes and samples.
10
+ - **Oncoplot Visualization**: Generate oncoplot visualizations with mutation data and frequencies.
11
+
12
+ ## Installation
13
+
14
+ ### Using pip (from PyPI)
15
+ You can install the `pymaftools` package directly from PyPI using pip:
16
+
17
+ ```bash
18
+ pip install pymaftools
19
+ ```
20
+
21
+ ### Using GitHub (for the latest version)
22
+ To install directly from GitHub (if you want the latest changes):
23
+
24
+ ```bash
25
+ pip install git+https://github.com/xu62u4u6/pymaftools.git
26
+ ```
27
+
28
+
29
+ ## Usage
30
+
31
+ ### Importing the Package
32
+
33
+ ```python
34
+ from pymaftools.maf_utils import MAF, PivotTable
35
+ from pymaftools.maf_plots import create_oncoplot
36
+ ```
37
+
38
+ ### Example
39
+
40
+ ```python
41
+ # Load the MAF file
42
+ all_case_maf = MAF.read_maf("path_to_maf_file.maf")
43
+
44
+ # Filter to keep only nonsynonymous mutations
45
+ filtered_all_case_maf = all_case_maf.filter_maf(MAF.nonsynonymous_types)
46
+
47
+ # Convert to pivot table (genes x samples table, mutation classification as values)
48
+ pivot_table = filtered_all_case_maf.to_pivot_table()
49
+
50
+ # Calculate mutation frequencies
51
+ pivot_table = pivot_table.add_freq()
52
+
53
+ # Sort the pivot table (by gene frequency and sample mutation count)
54
+ sorted_pivot_table = (pivot_table
55
+ .sort_genes_by_freq()
56
+ .sort_samples_by_mutations()
57
+ )
58
+
59
+ # Generate an oncoplot to show the top 50 genes with the highest mutation frequencies
60
+ create_oncoplot(sorted_pivot_table.top(50),
61
+ figsize=(26, 15),
62
+ ax_main_range=(0, 28),
63
+ ax_freq_range=(28, 29),
64
+ ax_legend_range=(29, 31),
65
+ mutation_counts=True)
66
+
67
+ ```
68
+
69
+ ### Requirements
70
+ Python 3.x
71
+ pandas, numpy, matplotlib, seaborn
72
+ ### License
73
+ This project is licensed under the MIT License - see the LICENSE file for details.
74
+
75
+ ### Author
76
+ xu62u4u6
77
+
@@ -0,0 +1,3 @@
1
+ # pyMAF/__init__.py
2
+ from .maf_utils import MAF, PivotTable
3
+ from .maf_plots import create_oncoplot, plot_bar, plot_heatmap, plot_freq, plot_legend
@@ -0,0 +1,180 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import os
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import matplotlib.colors as mcolors
7
+ from matplotlib.patches import Rectangle
8
+ from typing import Union
9
+
10
+ target_col = [
11
+ "Hugo_Symbol",
12
+ "Start_Position",
13
+ "End_Position",
14
+ "Reference_Allele",
15
+ "Tumor_Seq_Allele1",
16
+ "Tumor_Seq_Allele2"
17
+ ]
18
+
19
+
20
+ def create_oncoplot(pivot_table,
21
+ color_map=None,
22
+ mutation_counts : Union[bool, pd.Series]=True,
23
+ figsize=(18, 16),
24
+ wspace=0.5,
25
+ hspace=0.01,
26
+ freq_columns=["freq"],
27
+ ax_main_range=(0, 24),
28
+ ax_freq_range=(24, 28),
29
+ ax_legend_range=(29, 31),
30
+ square=False,
31
+ show_frame=False,
32
+ bar_annot_fontsize=7):
33
+
34
+ # freq_columns = freq_columns or [f"{sample_type}_freq" for sample_type in ["A", "T", "S"]] + ['all_freq']
35
+ heatmap_data = pivot_table#sorted_df.drop(columns=freq_columns)
36
+ freq_data = pivot_table.gene_metadata[freq_columns].values
37
+
38
+ # 預設的顏色映射
39
+ color_map = color_map or {
40
+ 'False': '#FFFFFF', # 白色 (無突變)
41
+ 'Missense_Mutation': 'gray', # 淺灰色
42
+ 'Frame_Shift_Ins':'#FF4500', # 較深色紅
43
+ 'Frame_Shift_Del': '#4682B4', # 較深色藍
44
+ 'In_Frame_Ins': '#FF707A', # 淺色紅
45
+ 'In_Frame_Del':'#ADD8E6', # 淺色藍
46
+ 'Nonsense_Mutation': '#90EE90', # 低飽和度綠色
47
+ 'Splice_Site': '#CB704D', # 低飽和度咖啡色
48
+ 'Multi_Hit': '#000000', # 黑色 (多重突變)
49
+ "Silent": "#eeeeee",
50
+ "3'UTR": "#bbbbcc",
51
+ "5'UTR": "#bbbbcc",
52
+ "IGR": "#bbbbcc",
53
+ "Intron": "#bbbbcc",
54
+ "RNA": "#bbbbcc",
55
+ }
56
+
57
+ fig = plt.figure(figsize=figsize)
58
+ gs = plt.GridSpec(2, 32, height_ratios=[1, 12], wspace=wspace, hspace=hspace)
59
+
60
+ if mutation_counts is not None:
61
+ if mutation_counts == True:
62
+ mutation_counts = pivot_table.sample_metadata.mutations_count.values
63
+ ax_bar = fig.add_subplot(gs[0, ax_main_range[0]:ax_main_range[1]]) # Bar chart
64
+ plot_bar(ax_bar, mutation_counts, fontsize=bar_annot_fontsize)
65
+ else:
66
+ ax_bar = None # 如果沒有 bar chart, 不繪製上方區域
67
+
68
+ ax_main = fig.add_subplot(gs[1, ax_main_range[0]:ax_main_range[1]]) # Main heatmap
69
+ ax_freq = fig.add_subplot(gs[1, ax_freq_range[0]:ax_freq_range[1]]) # Frequency heatmap
70
+ ax_legend = fig.add_subplot(gs[1, ax_legend_range[0]:ax_legend_range[1]]) # Legend
71
+
72
+ plot_heatmap(ax_main, heatmap_data, color_map, square=square, show_frame=show_frame)
73
+ plot_freq(ax_freq, freq_data, freq_columns, square=square)
74
+ plot_legend(ax_legend, color_map)
75
+
76
+ ax_main.set_xlabel("Mutations")
77
+ #if mutation_counts is None:
78
+ #gs.tight_layout(fig, rect=[0, 0, 1, 0.9]) # 調整繪圖佈局,避免空白區域過多
79
+ #else:
80
+ #plt.tight_layout()
81
+
82
+
83
+ def plot_bar(ax_bar, mutation_counts, fontsize=6):
84
+
85
+ x = np.arange(len(mutation_counts))
86
+ width = 0.95
87
+
88
+ # Create bars
89
+ tmbs = np.where(mutation_counts == 0, 0, mutation_counts/40)
90
+ ax_bar.bar(x, tmbs, width=width, color='gray', edgecolor='white')
91
+
92
+ # Set x-axis limits to exactly match the heatmap
93
+ # The -0.5 ensures the bars align perfectly with heatmap cells
94
+ ax_bar.set_xlim(-0.5, len(mutation_counts) - 0.5)
95
+
96
+ # 在柱子上添加數值標籤
97
+ for i, tmb in enumerate(tmbs):
98
+ ax_bar.text(i, tmb + 2, f"{tmb:.1f}", ha='center', fontsize=fontsize)
99
+
100
+ # 隐藏柱状图的边框和刻度
101
+ ax_bar.spines['top'].set_visible(False)
102
+ ax_bar.spines['right'].set_visible(False)
103
+ ax_bar.spines['left'].set_visible(True)
104
+ ax_bar.spines['bottom'].set_visible(False)
105
+ ax_bar.set_xticks([])
106
+ ax_bar.set_xlabel('TMB')
107
+
108
+
109
+ def plot_heatmap(ax_main, heatmap_data, color_map, linecolor="white", square=True, show_frame=False):
110
+
111
+ # 創建數值映射
112
+ def color_encode(val):
113
+ return color_map.get(val, '#FFFFFF')
114
+
115
+ # 轉換數據
116
+ data_matrix = heatmap_data.map(color_encode)
117
+
118
+ # 創建熱圖
119
+ sns.heatmap(
120
+ heatmap_data.notna(),
121
+ cmap=['white', 'grey'], # 使用白色和灰色表示數據存在與否
122
+ cbar=False,
123
+ linewidths=1,
124
+ linecolor=linecolor,
125
+ ax=ax_main,
126
+ square=square
127
+ )
128
+ ax_main.set_yticklabels(ax_main.get_yticklabels(), rotation=0)
129
+
130
+ # 添加顏色
131
+ for i in range(data_matrix.shape[0]):
132
+ for j in range(data_matrix.shape[1]):
133
+ ax_main.add_patch(plt.Rectangle(
134
+ (j, i), 1, 1,
135
+ fill=True,
136
+ facecolor=data_matrix.iloc[i, j],
137
+ edgecolor=linecolor,
138
+ lw=1
139
+ ))
140
+
141
+ # 添加每三個樣本的淺色框
142
+ if show_frame:
143
+ for i in range(0, heatmap_data.shape[1], 3): # 每三個樣本
144
+ rect = Rectangle((i, -0.5), 3, heatmap_data.shape[0] + 1,
145
+ linewidth=1, edgecolor='lightgray', facecolor='none')
146
+ ax_main.add_patch(rect)
147
+
148
+ def plot_freq(ax_freq, freq_data, freq_columns, square=True, show_frame=True):
149
+ # 繪製頻率熱圖
150
+ sns.heatmap(freq_data,
151
+ cmap='Blues',
152
+ linewidths=0.5,
153
+ ax=ax_freq,
154
+ cbar=False, # 不顯示頻率熱圖的colorbar
155
+ vmin=0,
156
+ vmax=freq_data.max(),
157
+ alpha=0.8,
158
+ square=square) # 根據頻率數據的最大值設置vmax
159
+
160
+ # 隱藏頻率熱圖的索引
161
+ ax_freq.set_xticks([]) # 隱藏 x 軸的標籤
162
+ ax_freq.set_yticks([]) # 隱藏 y 軸的標籤
163
+
164
+ # 設置頻率熱圖的標籤和數值,並隱藏索引
165
+ for i in range(freq_data.shape[0]): # 每行
166
+ for j in range(freq_data.shape[1]): # 每列
167
+ value = freq_data[i, j]
168
+ color = 'black' if value < 0.6 * freq_data.max() else 'white' # 高频率用白色,低频率用黑色
169
+ ax_freq.text(j + 0.5, i + 0.5, f"{value:.2f}",
170
+ va='center', ha='center', color=color)
171
+
172
+ ax_freq.set_title('Frequency', pad=20) # 頻率熱圖的標題
173
+ ax_freq.set_xticks(np.arange(len(freq_columns))+0.5) # 設置 x 軸刻度數量
174
+ ax_freq.set_xticklabels(freq_columns, rotation=90) # 設置 x 軸標籤並旋轉90度
175
+
176
+ def plot_legend(ax_legend, color_map):
177
+ # 修正圖例
178
+ legend_elements = [Rectangle((0, 0), 1, 1, color=color_map[key], label=key) for key in color_map.keys()]
179
+ ax_legend.legend(handles=legend_elements, title="Variant Types", loc='center', fontsize='small', frameon=False)
180
+ ax_legend.axis('off') # 隱藏圖例軸的坐標系
@@ -0,0 +1,158 @@
1
+ import pandas as pd
2
+
3
+ class PivotTable(pd.DataFrame):
4
+ # columns: gene or mutation, row: sample or case
5
+ _metadata = ["gene_metadata", "sample_metadata"]
6
+ def __init__(self, data, mutations_count: pd.Series=None, *args, **kwargs):
7
+ super().__init__(data, *args, **kwargs)
8
+ self.gene_metadata = pd.DataFrame(index=self.index)
9
+ self.sample_metadata = pd.DataFrame(index=self.columns)
10
+
11
+ @property
12
+ def _constructor(self):
13
+ return PivotTable
14
+
15
+ @staticmethod
16
+ def calculate_frequency(df: pd.DataFrame) -> pd.Series:
17
+ return (df != False).sum(axis=1) / df.shape[1]
18
+
19
+ def add_freq(self, groups: dict={}) -> "PivotTable":
20
+ """
21
+ example:
22
+ groups: {"S": pd.dataframe,
23
+ "A": pd.dataframe....}
24
+ groupname: subset of pivot table
25
+ """
26
+ pivot_table = self.copy()
27
+ freq_data = pd.DataFrame()
28
+ for group in groups.keys():
29
+ freq_data[f"{group}_freq"] = PivotTable.calculate_frequency(groups[group])
30
+ freq_data["freq"] = PivotTable.calculate_frequency(pivot_table)
31
+ pivot_table.gene_metadata[freq_data.columns] = freq_data
32
+ return pivot_table
33
+
34
+ def sort_genes_by_freq(self, by="freq", ascending=False):
35
+ pivot_table = self.copy()
36
+ sorted_index = pivot_table.gene_metadata.sort_values(by=by, ascending=ascending).index
37
+
38
+ # sort pivot table
39
+ pivot_table = pivot_table.loc[sorted_index]
40
+
41
+ # also sort gene_metadata
42
+ pivot_table.gene_metadata = pivot_table.gene_metadata.loc[sorted_index]
43
+ return pivot_table
44
+
45
+ def sort_samples_by_mutations(self, top: int = 10):
46
+ def binary_sort_key(column: pd.Series) -> int:
47
+ # binary column to int
48
+ binary_str = "".join(column.astype(int).astype(str))
49
+ return int(binary_str, 2)
50
+
51
+ # tmp_pivot_table = pivot_table.drop(columns=freq_columns)
52
+ pivot_table = self.copy()
53
+ binary_pivot_table = pivot_table != False
54
+ mutations_weight = binary_pivot_table.head(top).apply(binary_sort_key, axis=0)
55
+ pivot_table.sample_metadata["mutations_weight"] = mutations_weight
56
+ sorted_samples = (mutations_weight
57
+ .sort_values(ascending=False)
58
+ .index)
59
+
60
+ # sort by order
61
+ pivot_table = pivot_table.loc[:, sorted_samples]
62
+ pivot_table.sample_metadata = pivot_table.sample_metadata.loc[sorted_samples, :]
63
+ return pivot_table
64
+
65
+ def top(self, n_top = 50):
66
+ pivot_table = self.copy()
67
+ pivot_table = pivot_table.head(n_top)
68
+ pivot_table.gene_metadata = pivot_table.gene_metadata.head(n_top)
69
+ return pivot_table
70
+
71
+ class MAF(pd.DataFrame):
72
+ index_col = [
73
+ "Hugo_Symbol",
74
+ "Start_Position",
75
+ "End_Position",
76
+ "Reference_Allele",
77
+ "Tumor_Seq_Allele1",
78
+ "Tumor_Seq_Allele2"
79
+ ]
80
+
81
+ # GDC MAF file fields:
82
+ # https://docs.gdc.cancer.gov/Encyclopedia/pages/Mutation_Annotation_Format_TCGAv2/
83
+
84
+ vaild_variant_classfication = [
85
+ "Frame_Shift_Del",
86
+ "Frame_Shift_Ins",
87
+ "In_Frame_Del",
88
+ "In_Frame_Ins",
89
+ "Missense_Mutation",
90
+ "Nonsense_Mutation",
91
+ "Silent",
92
+ "Splice_Site",
93
+ "Translation_Start_Site",
94
+ "Nonstop_Mutation",
95
+ "3'UTR",
96
+ "3'Flank",
97
+ "5'UTR",
98
+ "5'Flank",
99
+ "IGR",
100
+ "Intron",
101
+ "RNA",
102
+ "Targeted_Region"
103
+ ]
104
+
105
+ nonsynonymous_types = [
106
+ "Frame_Shift_Del", "Frame_Shift_Ins", "In_Frame_Del", "In_Frame_Ins",
107
+ "Missense_Mutation", "Nonsense_Mutation", "Splice_Site",
108
+ "Translation_Start_Site", "Nonstop_Mutation"
109
+ ]
110
+
111
+ @classmethod
112
+ def read_maf(cls, maf_path, case_ID, preffix="", suffix=""):
113
+ maf = cls(pd.read_csv(maf_path, skiprows=1, sep="\t"))
114
+ maf["case_ID"] = f"{preffix}{case_ID}{suffix}"
115
+ maf.index = maf.loc[:, cls.target_col].apply(lambda row: "|".join(row.astype(str)), axis=1) # concat column
116
+ maf = maf.filter_maf(cls.vaild_variant_classfication)
117
+ return cls(maf)
118
+
119
+ def __init__(self, *args, **kwargs):
120
+ super().__init__(*args, **kwargs)
121
+
122
+ @property
123
+ def _constructor(self):
124
+ # make sure returned object is MAF type
125
+ return MAF
126
+
127
+ def filter_maf(self, mutation_types):
128
+ return self[self.Variant_Classification.isin(mutation_types)]
129
+
130
+ # def calculate_frequency(self) -> pd.Series:
131
+ # return (self != False).sum(axis=1) / self.shape[1]
132
+
133
+ @staticmethod
134
+ def merge_mutations(column):
135
+ if (column == False).all() :
136
+ return False
137
+ # Get unique non-False mutation types
138
+ unique_mutations = column[column != False].unique()
139
+ if len(unique_mutations) > 1:
140
+ return "Multi_Hit"
141
+ elif len(unique_mutations) == 1:
142
+ return unique_mutations[0]
143
+
144
+ def to_pivot_table(self) -> PivotTable:
145
+ pivot_table = self.pivot_table(
146
+ values="Variant_Classification",
147
+ index="Hugo_Symbol",
148
+ columns="case_ID",
149
+ aggfunc=MAF.merge_mutations
150
+ ).fillna(False)
151
+ pivot_table = PivotTable(pivot_table)
152
+ pivot_table.sample_metadata["mutations_count"] = self.mutations_count
153
+ pivot_table.sample_metadata["TMB"] = self.mutations_count / 40
154
+ return pivot_table
155
+
156
+ @property
157
+ def mutations_count(self) -> pd.Series:
158
+ return self.groupby(self.case_ID).size()
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.1
2
+ Name: pymaftools
3
+ Version: 0.1
4
+ Summary: pymaftools is a Python package for handling and analyzing Mutation Annotation Format (MAF) files. It provides utilities for data manipulation and visualization, including classes for MAF parsing and oncoplot generation.
5
+ Home-page: https://github.com/xu62u4u6/pymaftools
6
+ Author: xu62u4u6
7
+ Author-email: 199928ltyos@gmail.com
8
+ License: MIT
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+
15
+
16
+ # pymaftools
17
+
18
+ `pymaftools` is a Python package designed to handle and analyze MAF (Mutation Annotation Format) files. It provides utilities for working with mutation data, including the `MAF` and `PivotTable` classes for data manipulation, and functions for visualizing mutation data with oncoplots.
19
+
20
+ ## Features
21
+
22
+ - **MAF Class**: A utility to load, parse, and manipulate MAF files.
23
+ - **PivotTable Class**: A custom pivot table implementation for summarizing mutation frequencies and sorting genes and samples.
24
+ - **Oncoplot Visualization**: Generate oncoplot visualizations with mutation data and frequencies.
25
+
26
+ ## Installation
27
+
28
+ ### Using pip (from PyPI)
29
+ You can install the `pymaftools` package directly from PyPI using pip:
30
+
31
+ ```bash
32
+ pip install pymaftools
33
+ ```
34
+
35
+ ### Using GitHub (for the latest version)
36
+ To install directly from GitHub (if you want the latest changes):
37
+
38
+ ```bash
39
+ pip install git+https://github.com/xu62u4u6/pymaftools.git
40
+ ```
41
+
42
+
43
+ ## Usage
44
+
45
+ ### Importing the Package
46
+
47
+ ```python
48
+ from pymaftools.maf_utils import MAF, PivotTable
49
+ from pymaftools.maf_plots import create_oncoplot
50
+ ```
51
+
52
+ ### Example
53
+
54
+ ```python
55
+ # Load the MAF file
56
+ all_case_maf = MAF.read_maf("path_to_maf_file.maf")
57
+
58
+ # Filter to keep only nonsynonymous mutations
59
+ filtered_all_case_maf = all_case_maf.filter_maf(MAF.nonsynonymous_types)
60
+
61
+ # Convert to pivot table (genes x samples table, mutation classification as values)
62
+ pivot_table = filtered_all_case_maf.to_pivot_table()
63
+
64
+ # Calculate mutation frequencies
65
+ pivot_table = pivot_table.add_freq()
66
+
67
+ # Sort the pivot table (by gene frequency and sample mutation count)
68
+ sorted_pivot_table = (pivot_table
69
+ .sort_genes_by_freq()
70
+ .sort_samples_by_mutations()
71
+ )
72
+
73
+ # Generate an oncoplot to show the top 50 genes with the highest mutation frequencies
74
+ create_oncoplot(sorted_pivot_table.top(50),
75
+ figsize=(26, 15),
76
+ ax_main_range=(0, 28),
77
+ ax_freq_range=(28, 29),
78
+ ax_legend_range=(29, 31),
79
+ mutation_counts=True)
80
+
81
+ ```
82
+
83
+ ### Requirements
84
+ Python 3.x
85
+ pandas, numpy, matplotlib, seaborn
86
+ ### License
87
+ This project is licensed under the MIT License - see the LICENSE file for details.
88
+
89
+ ### Author
90
+ xu62u4u6
91
+
@@ -0,0 +1,11 @@
1
+ LICENSE
2
+ README.md
3
+ setup.py
4
+ pymaftools/__init__.py
5
+ pymaftools/maf_plots.py
6
+ pymaftools/maf_utils.py
7
+ pymaftools.egg-info/PKG-INFO
8
+ pymaftools.egg-info/SOURCES.txt
9
+ pymaftools.egg-info/dependency_links.txt
10
+ pymaftools.egg-info/requires.txt
11
+ pymaftools.egg-info/top_level.txt
@@ -0,0 +1,4 @@
1
+ pandas
2
+ numpy
3
+ matplotlib
4
+ seaborn
@@ -0,0 +1 @@
1
+ pymaftools
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,25 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name='pymaftools',
5
+ version='0.1',
6
+ author = "xu62u4u6",
7
+ author_email="199928ltyos@gmail.com",
8
+ description='pymaftools is a Python package for handling and analyzing Mutation Annotation Format (MAF) files. It provides utilities for data manipulation and visualization, including classes for MAF parsing and oncoplot generation.',
9
+ long_description=open('README.md').read(),
10
+ long_description_content_type='text/markdown',
11
+ packages=find_packages(),
12
+ install_requires=[
13
+ 'pandas',
14
+ 'numpy',
15
+ 'matplotlib',
16
+ 'seaborn',
17
+ ],
18
+ classifiers = [
19
+ "Programming Language :: Python :: 3",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Operating System :: OS Independent",
22
+ ],
23
+ license='MIT',
24
+ url='https://github.com/xu62u4u6/pymaftools',
25
+ )