pyplogo 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyplogo-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,27 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyplogo
3
+ Version: 0.1.0
4
+ Summary: A professional tool for protein secondary structure analysis and visualization. Offers a complete solution from structure extraction, multi-theme customization to high-quality publication-ready plotting.
5
+ License: MIT
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: biopython>=1.86
9
+ Requires-Dist: matplotlib>=3.10.8
10
+ Requires-Dist: numpy>=2.2.6
11
+
12
+ \# pyplogo-toolkit
13
+
14
+ PyPLogo工具包:专业的蛋白质二级结构分析可视化工具。提供从结构提取、多主题定制到高质量出版级绘图的完整解决方案。
15
+
16
+
17
+
18
+ PyPLogo Toolkit: A professional tool for protein secondary structure analysis and visualization. Offers a complete solution from structure extraction, multi-theme customization to high-quality publication-ready plotting.
19
+
20
+ <img width="1190" height="790" alt="output-1" src="https://github.com/user-attachments/assets/32ea6516-c587-44ea-9a30-b1a5a79923e1" />
21
+
22
+ <img width="2987" height="1335" alt="output-2" src="https://github.com/user-attachments/assets/bd4a6dc9-a04a-4a95-8114-5c8adfb8266b" />
23
+
24
+ <img width="1190" height="790" alt="output" src="https://github.com/user-attachments/assets/4b34c491-f050-4336-9c99-5d2c9da98381" />
25
+
26
+
27
+
@@ -0,0 +1,16 @@
1
+ \# pyplogo-toolkit
2
+
3
+ PyPLogo工具包:专业的蛋白质二级结构分析可视化工具。提供从结构提取、多主题定制到高质量出版级绘图的完整解决方案。
4
+
5
+
6
+
7
+ PyPLogo Toolkit: A professional tool for protein secondary structure analysis and visualization. Offers a complete solution from structure extraction, multi-theme customization to high-quality publication-ready plotting.
8
+
9
+ <img width="1190" height="790" alt="output-1" src="https://github.com/user-attachments/assets/32ea6516-c587-44ea-9a30-b1a5a79923e1" />
10
+
11
+ <img width="2987" height="1335" alt="output-2" src="https://github.com/user-attachments/assets/bd4a6dc9-a04a-4a95-8114-5c8adfb8266b" />
12
+
13
+ <img width="1190" height="790" alt="output" src="https://github.com/user-attachments/assets/4b34c491-f050-4336-9c99-5d2c9da98381" />
14
+
15
+
16
+
@@ -0,0 +1,59 @@
1
+ """
2
+ PyPLogo - Protein Secondary Structure Visualization Tool
3
+ A Python package for extracting and visualizing protein secondary structures
4
+ from PDB/CIF files using DSSP with publication-quality output.
5
+ """
6
+
7
+ __version__ = "0.1.0"
8
+ __author__ = "Your Name"
9
+ __email__ = "your.email@example.com"
10
+
11
+ # Import main classes for easier access
12
+ from .visualizers.secondary_structure import SecondaryStructureVisualizer
13
+ from .visualizers.themes import ScientificTheme, NatureTheme, \
14
+ DesertTheme, ArcticTheme, TropicalTheme, GemstoneTheme, VintageTheme, \
15
+ SpaceTheme, SpringTheme, AutumnTheme, CoralReefTheme, AuroraTheme, \
16
+ MetalTheme, CandyTheme, OasisTheme, StarlightTheme, SakuraTheme, \
17
+ VolcanoTheme, JadeTheme, TwilightTheme, OnyxTheme, LavenderTheme, \
18
+ HoneyTheme, MintTheme, CoralTheme, AmethystTheme, LemonTheme, \
19
+ SapphireTheme, RubyTheme, EmeraldTheme, TopazTheme, MalachiteTheme, \
20
+ OpalTheme, PearlTheme
21
+
22
+ # Define public API
23
+ __all__ = [
24
+ 'SecondaryStructureVisualizer',
25
+ 'ScientificTheme',
26
+ 'NatureTheme',
27
+ 'DesertTheme',
28
+ 'ArcticTheme',
29
+ 'TropicalTheme',
30
+ 'GemstoneTheme',
31
+ 'VintageTheme',
32
+ 'SpaceTheme',
33
+ 'SpringTheme',
34
+ 'AutumnTheme',
35
+ 'CoralReefTheme',
36
+ 'AuroraTheme',
37
+ 'MetalTheme',
38
+ 'CandyTheme',
39
+ 'OasisTheme',
40
+ 'StarlightTheme',
41
+ 'SakuraTheme',
42
+ 'VolcanoTheme',
43
+ 'JadeTheme',
44
+ 'TwilightTheme',
45
+ 'OnyxTheme',
46
+ 'LavenderTheme',
47
+ 'HoneyTheme',
48
+ 'MintTheme',
49
+ 'CoralTheme',
50
+ 'AmethystTheme',
51
+ 'LemonTheme',
52
+ 'SapphireTheme',
53
+ 'RubyTheme',
54
+ 'EmeraldTheme',
55
+ 'TopazTheme',
56
+ 'MalachiteTheme',
57
+ 'OpalTheme',
58
+ 'PearlTheme'
59
+ ]
@@ -0,0 +1,8 @@
1
+ """
2
+ Data module for PyPLogo
3
+ Contains data structures for amino acid properties and characteristics
4
+ """
5
+
6
+ from .aa_properties import AA_PROPERTIES, AA_COLORS, SS_COLORS
7
+
8
+ __all__ = ['AA_PROPERTIES', 'AA_COLORS', 'SS_COLORS']
@@ -0,0 +1,46 @@
1
+ """
2
+ Amino acid properties and characteristics
3
+ """
4
+
5
+ AA_PROPERTIES = {
6
+ 'A': {'name': 'Alanine', 'type': 'hydrophobic', 'weight': 89.1},
7
+ 'C': {'name': 'Cysteine', 'type': 'polar', 'weight': 121.2},
8
+ 'D': {'name': 'Aspartic Acid', 'type': 'acidic', 'weight': 133.1},
9
+ 'E': {'name': 'Glutamic Acid', 'type': 'acidic', 'weight': 147.1},
10
+ 'F': {'name': 'Phenylalanine', 'type': 'hydrophobic', 'weight': 165.2},
11
+ 'G': {'name': 'Glycine', 'type': 'special', 'weight': 75.1},
12
+ 'H': {'name': 'Histidine', 'type': 'basic', 'weight': 155.2},
13
+ 'I': {'name': 'Isoleucine', 'type': 'hydrophobic', 'weight': 131.2},
14
+ 'K': {'name': 'Lysine', 'type': 'basic', 'weight': 146.2},
15
+ 'L': {'name': 'Leucine', 'type': 'hydrophobic', 'weight': 131.2},
16
+ 'M': {'name': 'Methionine', 'type': 'hydrophobic', 'weight': 149.2},
17
+ 'N': {'name': 'Asparagine', 'type': 'polar', 'weight': 132.1},
18
+ 'P': {'name': 'Proline', 'type': 'special', 'weight': 115.1},
19
+ 'Q': {'name': 'Glutamine', 'type': 'polar', 'weight': 146.2},
20
+ 'R': {'name': 'Arginine', 'type': 'basic', 'weight': 174.2},
21
+ 'S': {'name': 'Serine', 'type': 'polar', 'weight': 105.1},
22
+ 'T': {'name': 'Threonine', 'type': 'polar', 'weight': 119.1},
23
+ 'V': {'name': 'Valine', 'type': 'hydrophobic', 'weight': 117.1},
24
+ 'W': {'name': 'Tryptophan', 'type': 'hydrophobic', 'weight': 204.2},
25
+ 'Y': {'name': 'Tyrosine', 'type': 'polar', 'weight': 181.2}
26
+ }
27
+
28
+ # Color schemes for amino acids
29
+ AA_COLORS = {
30
+ 'hydrophobic': '#FF6B6B',
31
+ 'polar': '#4ECDC4',
32
+ 'acidic': '#FFE66D',
33
+ 'basic': '#45B7D1',
34
+ 'special': '#96CEB4'
35
+ }
36
+
37
+ # Secondary structure color scheme
38
+ SS_COLORS = {
39
+ 'H': '#FF6B6B', # α-helix - red
40
+ 'E': '#FFE66D', # β-strand - yellow
41
+ 'C': '#45B7D1', # coil - blue
42
+ 'G': '#FF8E72', # 3₁₀-helix - orange
43
+ 'I': '#C44D58', # π-helix - dark red
44
+ 'T': '#4ECDC4', # turn - teal
45
+ 'S': '#96CEB4' # bend - green
46
+ }
@@ -0,0 +1,8 @@
1
+ """
2
+ Extractors module for PyPLogo
3
+ Contains classes for extracting secondary structure from protein structure files
4
+ """
5
+
6
+ from .structure_based import StructureExtractor
7
+
8
+ __all__ = ['StructureExtractor']
@@ -0,0 +1,325 @@
1
+ import os
2
+ import warnings
3
+ import tempfile
4
+ import numpy as np
5
+ from Bio.PDB import PDBParser, MMCIFParser, DSSP
6
+ from Bio.PDB.PDBExceptions import PDBConstructionWarning
7
+ from Bio import Align
8
+ from Bio.Align import substitution_matrices
9
+ from dataclasses import dataclass, field
10
+ from typing import List, Tuple
11
+
12
+ @dataclass
13
+ class StructureData:
14
+ """Container for structure extraction results"""
15
+ sequence: str
16
+ secondary_structure: str
17
+ source_file: str
18
+ method: str
19
+ disulfide_bonds: List[Tuple[int, int]] = field(default_factory=list) # 新增二硫键字段
20
+
21
+ class StructureExtractor:
22
+ """Extract secondary structure and disulfide bonds from PDB/CIF files using DSSP"""
23
+
24
+ def __init__(self):
25
+ """Initialize structure extractor"""
26
+ self.ss_mapping = {
27
+ 'H': 'H', 'G': 'G', 'I': 'I', # Helices
28
+ 'E': 'E', 'B': 'E', # Strands
29
+ 'T': 'T', 'S': 'S', # Turns and bends
30
+ ' ': 'C', '-': 'C' # Coil
31
+ }
32
+
33
+ # 设置字典文件路径
34
+ base_dir = os.path.dirname(os.path.abspath(__file__))
35
+ dict_dir = os.path.join(base_dir, '..', 'data', 'dssp_dicts')
36
+ dict_dir = os.path.abspath(dict_dir)
37
+ os.environ['LIBCIFPP_DATA_DIR'] = dict_dir
38
+ print(f"设置 DSSP 字典路径: {dict_dir}")
39
+
40
+ def _run_dssp(self, model, file_path):
41
+ """运行 DSSP 并处理可能的错误"""
42
+ try:
43
+ # 使用 mkdssp
44
+ return DSSP(model, file_path, dssp='mkdssp')
45
+ except Exception as e:
46
+ # 尝试替代方法
47
+ try:
48
+ # 尝试使用标准 dssp
49
+ return DSSP(model, file_path)
50
+ except Exception as e2:
51
+ # 尝试直接调用 mkdssp 可执行文件
52
+ try:
53
+ from Bio.PDB.DSSP import dssp_dict_from_pdb_file
54
+ return dssp_dict_from_pdb_file(file_path, DSSP='mkdssp')
55
+ except Exception as e3:
56
+ try:
57
+ # 尝试使用标准 dssp
58
+ return dssp_dict_from_pdb_file(file_path)
59
+ except Exception as e4:
60
+ raise RuntimeError(f"DSSP failed to produce an output: {str(e4)}") from e4
61
+
62
+ def from_pdb(self, file_path: str, chain_id: str = 'A', input_sequence: str = None) -> StructureData:
63
+ """从PDB文件提取二级结构和二硫键"""
64
+ try:
65
+ # 转换为绝对路径
66
+ file_path = os.path.abspath(file_path)
67
+
68
+ # 检查文件是否需要修复
69
+ with open(file_path, 'r') as f:
70
+ first_line = f.readline()
71
+
72
+ # 如果缺少HEADER,创建修复版本
73
+ if not first_line.startswith('HEADER'):
74
+ print("检测到PDB文件缺少HEADER记录,正在修复...")
75
+ file_path = self._fix_pdb_header(file_path)
76
+
77
+ with warnings.catch_warnings():
78
+ warnings.simplefilter("ignore", PDBConstructionWarning)
79
+ parser = PDBParser(QUIET=True)
80
+ structure = parser.get_structure('protein', file_path)
81
+ model = structure[0]
82
+
83
+ # 运行DSSP
84
+ dssp_result = self._run_dssp(model, file_path)
85
+
86
+ # 提取结构数据
87
+ result = self._extract_from_dssp(dssp_result, chain_id, file_path)
88
+
89
+ # 提取二硫键信息
90
+ result.disulfide_bonds = self._extract_disulfide_bonds(model, chain_id)
91
+
92
+ # 序列匹配
93
+ if input_sequence:
94
+ matched_ss = self.match_sequence(
95
+ result.sequence,
96
+ result.secondary_structure,
97
+ input_sequence
98
+ )
99
+ return StructureData(
100
+ sequence=input_sequence,
101
+ secondary_structure=matched_ss,
102
+ source_file=file_path,
103
+ method="dssp (matched)",
104
+ disulfide_bonds=result.disulfide_bonds
105
+ )
106
+
107
+ return result
108
+
109
+ except Exception as e:
110
+ raise ValueError(f"Error processing PDB file: {e}")
111
+
112
+ def _fix_pdb_header(self, original_file):
113
+ """为PDB文件添加缺失的HEADER记录"""
114
+ import tempfile
115
+
116
+ # 读取原始内容
117
+ with open(original_file, 'r') as f:
118
+ content = f.read()
119
+
120
+ # 添加HEADER记录
121
+ header = "HEADER PROTEIN 01-JAN-70 FIXED_PDB\n"
122
+ fixed_content = header + content
123
+
124
+ # 创建临时文件
125
+ temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.pdb', delete=False)
126
+ temp_file.write(fixed_content)
127
+ temp_file.close()
128
+
129
+ return temp_file.name
130
+
131
+ def from_cif(self, file_path: str, chain_id: str = 'A', input_sequence: str = None) -> StructureData:
132
+ """从 CIF 文件提取二级结构和二硫键,可选自定义序列"""
133
+ try:
134
+ with warnings.catch_warnings():
135
+ warnings.simplefilter("ignore", PDBConstructionWarning)
136
+ parser = MMCIFParser(QUIET=True)
137
+ structure = parser.get_structure('protein', file_path)
138
+ model = structure[0] # 使用第一个模型
139
+
140
+ # 运行 DSSP
141
+ dssp = self._run_dssp(model, file_path)
142
+
143
+ # 提取结构数据
144
+ result = self._extract_from_dssp(dssp, chain_id, file_path)
145
+
146
+ # 提取二硫键信息
147
+ result.disulfide_bonds = self._extract_disulfide_bonds(model, chain_id)
148
+
149
+ # 如果提供了自定义序列,进行匹配
150
+ if input_sequence:
151
+ matched_ss = self.match_sequence(
152
+ result.sequence,
153
+ result.secondary_structure,
154
+ input_sequence
155
+ )
156
+ return StructureData(
157
+ sequence=input_sequence,
158
+ secondary_structure=matched_ss,
159
+ source_file=file_path,
160
+ method="dssp (matched)",
161
+ disulfide_bonds=result.disulfide_bonds # 保留二硫键信息
162
+ )
163
+
164
+ return result
165
+
166
+ except Exception as e:
167
+ raise ValueError(f"Error processing CIF file: {e}")
168
+
169
+ def _extract_from_dssp(self, dssp, chain_id: str, file_path: str) -> StructureData:
170
+ """从 DSSP 对象提取数据"""
171
+ sequence = []
172
+ secondary_structure = []
173
+
174
+ # 处理不同的 DSSP 返回类型
175
+ if isinstance(dssp, dict):
176
+ # 来自 dssp_dict_from_pdb_file 的返回类型
177
+ for key, value in dssp.items():
178
+ if key[0] == chain_id:
179
+ aa = value[1]
180
+ ss = value[2]
181
+
182
+ # 跳过非标准氨基酸
183
+ if aa not in 'ACDEFGHIKLMNPQRSTVWY':
184
+ continue
185
+
186
+ sequence.append(aa)
187
+ secondary_structure.append(self.ss_mapping.get(ss, 'C'))
188
+ else:
189
+ # 来自 DSSP 类的返回类型
190
+ for key in dssp.keys():
191
+ if key[0] == chain_id:
192
+ aa = dssp[key][1]
193
+ ss = dssp[key][2]
194
+
195
+ # 跳过非标准氨基酸
196
+ if aa not in 'ACDEFGHIKLMNPQRSTVWY':
197
+ continue
198
+
199
+ sequence.append(aa)
200
+ secondary_structure.append(self.ss_mapping.get(ss, 'C'))
201
+
202
+ if not sequence:
203
+ raise ValueError(f"No data extracted for chain {chain_id}. "
204
+ f"Check if chain exists and contains standard amino acids.")
205
+
206
+ return StructureData(
207
+ sequence=''.join(sequence),
208
+ secondary_structure=''.join(secondary_structure),
209
+ source_file=file_path,
210
+ method="dssp"
211
+ )
212
+
213
+ def _extract_disulfide_bonds(self, model, chain_id: str) -> List[Tuple[int, int]]:
214
+ """从结构中提取二硫键信息"""
215
+ disulfide_bonds = []
216
+
217
+ # 获取指定链
218
+ chain = None
219
+ for c in model:
220
+ if c.id == chain_id:
221
+ chain = c
222
+ break
223
+
224
+ if not chain:
225
+ return disulfide_bonds
226
+
227
+ # 提取半胱氨酸残基
228
+ cysteines = []
229
+ for residue in chain:
230
+ resname = residue.get_resname().strip()
231
+ if resname == 'CYS' and self._is_amino_acid(residue):
232
+ cysteines.append(residue)
233
+
234
+ # 检测二硫键 (距离小于3.0Å)
235
+ for i, cys1 in enumerate(cysteines):
236
+ for j, cys2 in enumerate(cysteines):
237
+ if i >= j:
238
+ continue
239
+
240
+ # 计算SG原子间距离
241
+ sg1 = None
242
+ sg2 = None
243
+ for atom in cys1:
244
+ if atom.get_name() == 'SG':
245
+ sg1 = atom
246
+ for atom in cys2:
247
+ if atom.get_name() == 'SG':
248
+ sg2 = atom
249
+
250
+ if sg1 and sg2:
251
+ distance = sg1 - sg2
252
+ if distance < 3.0: # 二硫键典型距离
253
+ # 获取残基索引 (基于PDB文件中的序号)
254
+ res_id1 = cys1.get_id()[1]
255
+ res_id2 = cys2.get_id()[1]
256
+ disulfide_bonds.append((res_id1, res_id2))
257
+
258
+ return disulfide_bonds
259
+
260
+ def _is_amino_acid(self, residue):
261
+ """检查残基是否是标准氨基酸"""
262
+ try:
263
+ return residue.get_resname() in [
264
+ 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY',
265
+ 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER',
266
+ 'THR', 'TRP', 'TYR', 'VAL'
267
+ ]
268
+ except:
269
+ return False
270
+
271
+ def match_sequence(self, structure_sequence, structure_ss, input_sequence):
272
+ """
273
+ 匹配输入序列与结构序列,生成对应的二级结构字符串
274
+
275
+ Args:
276
+ structure_sequence: 从结构文件中提取的序列
277
+ structure_ss: 从结构文件中提取的二级结构
278
+ input_sequence: 用户输入的序列
279
+
280
+ Returns:
281
+ 与输入序列长度匹配的二级结构字符串
282
+ """
283
+ # 如果输入序列为空,直接返回结构序列
284
+ if not input_sequence:
285
+ return structure_ss
286
+
287
+ # 使用局部序列比对找到最佳匹配
288
+ aligner = Align.PairwiseAligner()
289
+ aligner.mode = 'local'
290
+ aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
291
+ alignments = aligner.align(structure_sequence, input_sequence)
292
+
293
+ if not alignments:
294
+ # 如果没有找到匹配,返回全coil结构
295
+ return 'C' * len(input_sequence)
296
+
297
+ # 取最佳比对结果
298
+ best_alignment = alignments[0]
299
+
300
+ # 提取比对信息
301
+ aligned_target = best_alignment.target
302
+ aligned_query = best_alignment.query
303
+
304
+ # 创建与输入序列等长的二级结构数组,初始化为coil
305
+ matched_ss = ['C'] * len(input_sequence)
306
+
307
+ # 遍历比对结果,填充匹配位置的二级结构
308
+ target_idx = 0
309
+ query_idx = 0
310
+
311
+ for t_char, q_char in zip(aligned_target, aligned_query):
312
+ if t_char != '-' and q_char != '-':
313
+ # 匹配位置,使用结构中的二级结构
314
+ if target_idx < len(structure_ss):
315
+ matched_ss[query_idx] = structure_ss[target_idx]
316
+ target_idx += 1
317
+ query_idx += 1
318
+ elif t_char == '-' and q_char != '-':
319
+ # 插入位置,保持为coil
320
+ query_idx += 1
321
+ elif t_char != '-' and q_char == '-':
322
+ # 删除位置,跳过
323
+ target_idx += 1
324
+
325
+ return ''.join(matched_ss)
@@ -0,0 +1,23 @@
1
+ """
2
+ Utilities module for PyPLogo
3
+ Contains utility functions for parsing, formatting, and handling protein data
4
+ """
5
+
6
+ from .parsers import parse_fasta, validate_sequence, calculate_sequence_stats
7
+ from .formatters import (format_sequence, format_secondary_structure,
8
+ format_confidence_scores, create_sequence_chunks,
9
+ calculate_sequence_position, format_legend,
10
+ normalize_confidence_scores)
11
+
12
+ __all__ = [
13
+ 'parse_fasta',
14
+ 'validate_sequence',
15
+ 'calculate_sequence_stats',
16
+ 'format_sequence',
17
+ 'format_secondary_structure',
18
+ 'format_confidence_scores',
19
+ 'create_sequence_chunks',
20
+ 'calculate_sequence_position',
21
+ 'format_legend',
22
+ 'normalize_confidence_scores'
23
+ ]
@@ -0,0 +1,136 @@
1
+ """
2
+ Utility functions for formatting and formatting protein sequences and structures.
3
+ """
4
+
5
+ from typing import List, Tuple, Dict, Union
6
+ import numpy as np
7
+
8
+ def format_sequence(sequence: str, per_line: int = 50) -> List[str]:
9
+ """
10
+ Format amino acid sequence into multiple lines
11
+
12
+ Args:
13
+ sequence: Amino acid sequence
14
+ per_line: Number of amino acids per line
15
+
16
+ Returns:
17
+ List of formatted sequence lines
18
+ """
19
+ if not sequence:
20
+ return []
21
+
22
+ # 将序列分割成指定长度的块
23
+ chunks = []
24
+ for i in range(0, len(sequence), per_line):
25
+ chunk = sequence[i:i + per_line]
26
+ chunks.append(chunk)
27
+
28
+ return chunks
29
+
30
+ def format_secondary_structure(ss_string: str, line_length: int = 50) -> List[str]:
31
+ """
32
+ Format secondary structure string into multiple lines
33
+
34
+ Args:
35
+ ss_string: Secondary structure string
36
+ line_length: Number of characters per line
37
+
38
+ Returns:
39
+ List of formatted secondary structure lines
40
+ """
41
+ return [ss_string[i:i+line_length]
42
+ for i in range(0, len(ss_string), line_length)]
43
+
44
+ def format_confidence_scores(confidence: np.ndarray,
45
+ line_length: int = 50) -> List[np.ndarray]:
46
+ """
47
+ Format confidence scores into multiple lines
48
+
49
+ Args:
50
+ confidence: Confidence scores array
51
+ line_length: Number of scores per line
52
+
53
+ Returns:
54
+ List of formatted confidence arrays
55
+ """
56
+ return [confidence[i:i+line_length]
57
+ for i in range(0, len(confidence), line_length)]
58
+
59
+ def create_sequence_chunks(sequence: str, chunk_size: int = 50) -> List[Tuple[int, int]]:
60
+ """
61
+ Create chunks for sequence processing
62
+
63
+ Args:
64
+ sequence: Amino acid sequence
65
+ chunk_size: Size of each chunk
66
+
67
+ Returns:
68
+ List of (start, end) tuples for each chunk
69
+ """
70
+ return [(i, min(i + chunk_size, len(sequence)))
71
+ for i in range(0, len(sequence), chunk_size)]
72
+
73
+ def calculate_sequence_position(residue_index: int,
74
+ chunk_size: int = 50,
75
+ line_index: int = 0) -> Tuple[int, int]:
76
+ """
77
+ Calculate visual position for a residue
78
+
79
+ Args:
80
+ residue_index: Index of residue in sequence
81
+ chunk_size: Number of residues per line
82
+ line_index: Starting line index
83
+
84
+ Returns:
85
+ Tuple of (line_number, position_in_line)
86
+ """
87
+ line_number = line_index + (residue_index // chunk_size)
88
+ position_in_line = residue_index % chunk_size
89
+ return line_number, position_in_line
90
+
91
+ def format_legend(ss_types: List[str],
92
+ color_map: Dict[str, str]) -> str:
93
+ """
94
+ Create formatted legend for secondary structure types
95
+
96
+ Args:
97
+ ss_types: List of secondary structure types
98
+ color_map: Mapping from SS type to color code
99
+
100
+ Returns:
101
+ Formatted legend string
102
+ """
103
+ legend_lines = []
104
+ for ss_type in ss_types:
105
+ color = color_map.get(ss_type, '#000000')
106
+ legend_lines.append(f"{ss_type}: {color}")
107
+ return "\n".join(legend_lines)
108
+
109
+ def normalize_confidence_scores(scores: np.ndarray) -> np.ndarray:
110
+ """
111
+ Normalize confidence scores to 0-1 range
112
+
113
+ Args:
114
+ scores: Raw confidence scores
115
+
116
+ Returns:
117
+ Normalized scores
118
+ """
119
+ if not scores:
120
+ return []
121
+
122
+ # 转换为列表处理(如果输入是numpy数组)
123
+ scores_list = list(scores)
124
+
125
+ # 处理空列表
126
+ if len(scores_list) == 0:
127
+ return []
128
+
129
+ # 找到最大值进行标准化
130
+ max_score = max(scores_list)
131
+
132
+ if max_score == 0:
133
+ return [0.0] * len(scores_list)
134
+
135
+ normalized = [score / max_score for score in scores_list]
136
+ return normalized