RNApolis 0.4.17__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rnapolis/parser_v2.py ADDED
@@ -0,0 +1,202 @@
1
+ from typing import IO, Union
2
+
3
+ import pandas as pd
4
+ from mmcif.io.IoAdapterPy import IoAdapterPy
5
+
6
+
7
+ def parse_pdb_atoms(content: Union[str, IO[str]]) -> pd.DataFrame:
8
+ """
9
+ Parse PDB file content and extract ATOM and HETATM records into a pandas DataFrame.
10
+
11
+ Parameters:
12
+ -----------
13
+ content : Union[str, IO[str]]
14
+ Content of a PDB file as a string or file-like object
15
+
16
+ Returns:
17
+ --------
18
+ pd.DataFrame
19
+ DataFrame containing parsed ATOM and HETATM records with columns corresponding to PDB format
20
+ """
21
+ records = []
22
+
23
+ # Handle both string content and file-like objects
24
+ if isinstance(content, str):
25
+ lines = content.splitlines()
26
+ else:
27
+ # Read all lines from the file-like object
28
+ content.seek(0) # Ensure we're at the beginning of the file
29
+ lines = content.readlines()
30
+ # Convert bytes to string if needed
31
+ if isinstance(lines[0], bytes):
32
+ lines = [line.decode("utf-8") for line in lines]
33
+
34
+ for line in lines:
35
+ record_type = line[:6].strip()
36
+
37
+ # Only process ATOM and HETATM records
38
+ if record_type not in ["ATOM", "HETATM"]:
39
+ continue
40
+
41
+ # Parse fields according to PDB format specification
42
+ icode = line[26:27].strip()
43
+ record = {
44
+ "record_type": record_type,
45
+ "serial": line[6:11].strip(),
46
+ "name": line[12:16].strip(),
47
+ "altLoc": line[16:17].strip(),
48
+ "resName": line[17:20].strip(),
49
+ "chainID": line[21:22].strip(),
50
+ "resSeq": line[22:26].strip(),
51
+ "iCode": None if not icode else icode, # Convert empty string to None
52
+ "x": line[30:38].strip(),
53
+ "y": line[38:46].strip(),
54
+ "z": line[46:54].strip(),
55
+ "occupancy": line[54:60].strip(),
56
+ "tempFactor": line[60:66].strip(),
57
+ "element": line[76:78].strip(),
58
+ "charge": line[78:80].strip(),
59
+ }
60
+
61
+ records.append(record)
62
+
63
+ # Create DataFrame from records
64
+ if not records:
65
+ # Return empty DataFrame with correct columns if no records found
66
+ return pd.DataFrame(
67
+ columns=[
68
+ "record_type",
69
+ "serial",
70
+ "name",
71
+ "altLoc",
72
+ "resName",
73
+ "chainID",
74
+ "resSeq",
75
+ "iCode",
76
+ "x",
77
+ "y",
78
+ "z",
79
+ "occupancy",
80
+ "tempFactor",
81
+ "element",
82
+ "charge",
83
+ ]
84
+ )
85
+
86
+ df = pd.DataFrame(records)
87
+
88
+ # Convert numeric columns to appropriate types
89
+ numeric_columns = ["serial", "resSeq", "x", "y", "z", "occupancy", "tempFactor"]
90
+ for col in numeric_columns:
91
+ df[col] = pd.to_numeric(df[col], errors="coerce")
92
+
93
+ # Convert categorical columns
94
+ categorical_columns = [
95
+ "record_type",
96
+ "name",
97
+ "altLoc",
98
+ "resName",
99
+ "chainID",
100
+ "element",
101
+ "charge",
102
+ ]
103
+ for col in categorical_columns:
104
+ df[col] = df[col].astype("category")
105
+
106
+ # Add format attribute to the DataFrame
107
+ df.attrs["format"] = "PDB"
108
+
109
+ return df
110
+
111
+
112
+ def parse_cif_atoms(content: Union[str, IO[str]]) -> pd.DataFrame:
113
+ """
114
+ Parse mmCIF file content and extract atom_site records into a pandas DataFrame.
115
+
116
+ Parameters:
117
+ -----------
118
+ content : Union[str, IO[str]]
119
+ Content of a mmCIF file as a string or file-like object
120
+
121
+ Returns:
122
+ --------
123
+ pd.DataFrame
124
+ DataFrame containing parsed atom_site records with columns corresponding to mmCIF format
125
+ """
126
+ adapter = IoAdapterPy()
127
+
128
+ # Handle both string content and file-like objects
129
+ if isinstance(content, str):
130
+ # Create a temporary file to use with the adapter
131
+ import tempfile
132
+
133
+ with tempfile.NamedTemporaryFile(mode="w+", suffix=".cif") as temp_file:
134
+ temp_file.write(content)
135
+ temp_file.flush()
136
+ data = adapter.readFile(temp_file.name)
137
+ else:
138
+ # Assume it's a file-like object with a name attribute
139
+ data = adapter.readFile(content.name)
140
+
141
+ # Get the atom_site category
142
+ category = data[0].getObj("atom_site")
143
+
144
+ if not category:
145
+ # Return empty DataFrame if no atom_site category found
146
+ return pd.DataFrame()
147
+
148
+ # Extract attribute names and data rows
149
+ attributes = category.getAttributeList()
150
+ rows = category.getRowList()
151
+
152
+ # Create a list of dictionaries for each atom
153
+ records = []
154
+ for row in rows:
155
+ record = dict(zip(attributes, row))
156
+
157
+ # Convert "?" or "." in insertion code to None
158
+ if "pdbx_PDB_ins_code" in record:
159
+ if record["pdbx_PDB_ins_code"] in ["?", ".", ""]:
160
+ record["pdbx_PDB_ins_code"] = None
161
+
162
+ records.append(record)
163
+
164
+ # Create DataFrame from records
165
+ df = pd.DataFrame(records)
166
+
167
+ # Convert numeric columns to appropriate types
168
+ numeric_columns = [
169
+ "id",
170
+ "auth_seq_id",
171
+ "Cartn_x",
172
+ "Cartn_y",
173
+ "Cartn_z",
174
+ "occupancy",
175
+ "B_iso_or_equiv",
176
+ "pdbx_formal_charge",
177
+ ]
178
+
179
+ for col in numeric_columns:
180
+ if col in df.columns:
181
+ df[col] = pd.to_numeric(df[col], errors="coerce")
182
+
183
+ # Convert categorical columns
184
+ categorical_columns = [
185
+ "group_PDB",
186
+ "type_symbol",
187
+ "label_atom_id",
188
+ "label_comp_id",
189
+ "label_asym_id",
190
+ "auth_atom_id",
191
+ "auth_comp_id",
192
+ "auth_asym_id",
193
+ ]
194
+
195
+ for col in categorical_columns:
196
+ if col in df.columns:
197
+ df[col] = df[col].astype("category")
198
+
199
+ # Add format attribute to the DataFrame
200
+ df.attrs["format"] = "mmCIF"
201
+
202
+ return df
@@ -0,0 +1,618 @@
1
+ import string
2
+ from functools import cached_property
3
+ from typing import List, Optional
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ # Constants
9
+ AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT = 1.6
10
+
11
+
12
+ def calculate_torsion_angle(
13
+ a1: np.ndarray, a2: np.ndarray, a3: np.ndarray, a4: np.ndarray
14
+ ) -> float:
15
+ """
16
+ Calculate the torsion angle between four points in 3D space.
17
+
18
+ Parameters:
19
+ -----------
20
+ a1, a2, a3, a4 : np.ndarray
21
+ 3D coordinates of the four atoms
22
+
23
+ Returns:
24
+ --------
25
+ float
26
+ Torsion angle in radians
27
+ """
28
+ # Calculate vectors between points
29
+ v1 = a2 - a1
30
+ v2 = a3 - a2
31
+ v3 = a4 - a3
32
+
33
+ # Calculate normal vectors
34
+ n1 = np.cross(v1, v2)
35
+ n2 = np.cross(v2, v3)
36
+
37
+ # Normalize normal vectors
38
+ n1_norm = np.linalg.norm(n1)
39
+ n2_norm = np.linalg.norm(n2)
40
+
41
+ # Check for collinearity
42
+ if n1_norm < 1e-6 or n2_norm < 1e-6:
43
+ return float("nan")
44
+
45
+ n1 = n1 / n1_norm
46
+ n2 = n2 / n2_norm
47
+
48
+ # Calculate the angle using dot product
49
+ m1 = np.cross(n1, v2 / np.linalg.norm(v2))
50
+ x = np.dot(n1, n2)
51
+ y = np.dot(m1, n2)
52
+
53
+ # Return angle in radians
54
+ angle = np.arctan2(y, x)
55
+
56
+ return angle
57
+
58
+
59
+ class Structure:
60
+ """
61
+ A class representing a molecular structure parsed from PDB or mmCIF format.
62
+
63
+ This class takes a DataFrame created by parser_v2 functions and provides
64
+ methods to access and manipulate the structure data.
65
+ """
66
+
67
+ def __init__(self, atoms: pd.DataFrame):
68
+ """
69
+ Initialize a Structure object with atom data.
70
+
71
+ Parameters:
72
+ -----------
73
+ atoms : pd.DataFrame
74
+ DataFrame containing atom data, as created by parse_pdb_atoms or parse_cif_atoms
75
+ """
76
+ self.atoms = atoms
77
+ self.format = atoms.attrs.get("format", "unknown")
78
+
79
+ @cached_property
80
+ def residues(self) -> List["Residue"]:
81
+ """
82
+ Group atoms by residue and return a list of Residue objects.
83
+
84
+ The grouping logic depends on the format of the input data:
85
+ - For PDB: group by (chainID, resSeq, iCode)
86
+ - For mmCIF: group by (label_asym_id, label_seq_id) if present,
87
+ otherwise by (auth_asym_id, auth_seq_id, pdbx_PDB_ins_code)
88
+
89
+ Returns:
90
+ --------
91
+ List[Residue]
92
+ List of Residue objects, each representing a single residue
93
+ """
94
+ if self.format == "PDB":
95
+ # Group by chain ID, residue sequence number, and insertion code
96
+ groupby_cols = ["chainID", "resSeq", "iCode"]
97
+
98
+ # Filter out columns that don't exist in the DataFrame
99
+ groupby_cols = [col for col in groupby_cols if col in self.atoms.columns]
100
+
101
+ # Group atoms by residue
102
+ grouped = self.atoms.groupby(groupby_cols, dropna=False, observed=False)
103
+
104
+ elif self.format == "mmCIF":
105
+ # Prefer auth_* columns if they exist
106
+ if (
107
+ "auth_asym_id" in self.atoms.columns
108
+ and "auth_seq_id" in self.atoms.columns
109
+ ):
110
+ groupby_cols = ["auth_asym_id", "auth_seq_id"]
111
+
112
+ # Add insertion code if it exists
113
+ if "pdbx_PDB_ins_code" in self.atoms.columns:
114
+ groupby_cols.append("pdbx_PDB_ins_code")
115
+ else:
116
+ # Fall back to label_* columns
117
+ groupby_cols = ["label_asym_id", "label_seq_id"]
118
+
119
+ # Add insertion code if it exists
120
+ if "pdbx_PDB_ins_code" in self.atoms.columns:
121
+ groupby_cols.append("pdbx_PDB_ins_code")
122
+
123
+ # Group atoms by residue
124
+ grouped = self.atoms.groupby(groupby_cols, dropna=False, observed=False)
125
+
126
+ else:
127
+ # For unknown formats, return an empty list
128
+ return []
129
+
130
+ # Convert groups to a list of DataFrames
131
+ residue_dfs = []
132
+ for _, group in grouped:
133
+ # Create a copy of the group DataFrame
134
+ residue_df = group.copy()
135
+
136
+ # Preserve the format attribute
137
+ residue_df.attrs["format"] = self.format
138
+
139
+ residue_dfs.append(residue_df)
140
+
141
+ # Convert groups to a list of Residue objects
142
+ residues = []
143
+ for _, group in grouped:
144
+ # Create a copy of the group DataFrame
145
+ residue_df = group.copy()
146
+
147
+ # Preserve the format attribute
148
+ residue_df.attrs["format"] = self.format
149
+
150
+ # Create a Residue object
151
+ residues.append(Residue(residue_df))
152
+
153
+ return residues
154
+
155
+ @cached_property
156
+ def connected_residues(self) -> List[List["Residue"]]:
157
+ """
158
+ Find segments of connected residues in the structure.
159
+
160
+ Returns:
161
+ --------
162
+ List[List[Residue]]
163
+ List of segments, where each segment is a list of connected residues
164
+ """
165
+ # Group residues by chain
166
+ residues_by_chain = {}
167
+ for residue in self.residues:
168
+ chain_id = residue.chain_id
169
+ if chain_id not in residues_by_chain:
170
+ residues_by_chain[chain_id] = []
171
+ residues_by_chain[chain_id].append(residue)
172
+
173
+ # Sort residues in each chain by residue number
174
+ for chain_id in residues_by_chain:
175
+ residues_by_chain[chain_id].sort(
176
+ key=lambda r: (r.residue_number, r.insertion_code or "")
177
+ )
178
+
179
+ # Find connected segments in each chain
180
+ segments = []
181
+ for chain_id, chain_residues in residues_by_chain.items():
182
+ current_segment = []
183
+
184
+ for residue in chain_residues:
185
+ if not current_segment:
186
+ # Start a new segment
187
+ current_segment.append(residue)
188
+ else:
189
+ # Check if this residue is connected to the previous one
190
+ prev_residue = current_segment[-1]
191
+ if prev_residue.is_connected(residue):
192
+ current_segment.append(residue)
193
+ else:
194
+ # End the current segment and start a new one
195
+ if (
196
+ len(current_segment) > 1
197
+ ): # Only add segments with at least 2 residues
198
+ segments.append(current_segment)
199
+ current_segment = [residue]
200
+
201
+ # Add the last segment if it has at least 2 residues
202
+ if len(current_segment) > 1:
203
+ segments.append(current_segment)
204
+
205
+ return segments
206
+
207
+ @cached_property
208
+ def torsion_angles(self) -> pd.DataFrame:
209
+ """
210
+ Calculate torsion angles for all connected residues in the structure.
211
+
212
+ Returns:
213
+ --------
214
+ pd.DataFrame
215
+ DataFrame containing torsion angle values for each residue
216
+ """
217
+ # Find connected segments
218
+ segments = self.connected_residues
219
+
220
+ # Prepare data for the DataFrame
221
+ data = []
222
+
223
+ # Define the torsion angles to calculate
224
+ torsion_definitions = {
225
+ "alpha": [("O3'", -1), ("P", 0), ("O5'", 0), ("C5'", 0)],
226
+ "beta": [("P", 0), ("O5'", 0), ("C5'", 0), ("C4'", 0)],
227
+ "gamma": [("O5'", 0), ("C5'", 0), ("C4'", 0), ("C3'", 0)],
228
+ "delta": [("C5'", 0), ("C4'", 0), ("C3'", 0), ("O3'", 0)],
229
+ "epsilon": [("C4'", 0), ("C3'", 0), ("O3'", 0), ("P", 1)],
230
+ "zeta": [("C3'", 0), ("O3'", 0), ("P", 1), ("O5'", 1)],
231
+ "chi": None, # Will be handled separately due to purine/pyrimidine difference
232
+ }
233
+
234
+ # Process each segment
235
+ for segment in segments:
236
+ for i, residue in enumerate(segment):
237
+ # Prepare row data
238
+ row = {
239
+ "chain_id": residue.chain_id,
240
+ "residue_number": residue.residue_number,
241
+ "insertion_code": residue.insertion_code,
242
+ "residue_name": residue.residue_name,
243
+ }
244
+
245
+ # Calculate standard torsion angles
246
+ for angle_name, atoms_def in torsion_definitions.items():
247
+ if angle_name == "chi":
248
+ continue # Skip chi for now
249
+
250
+ if angle_name == "alpha" and i == 0:
251
+ continue # Skip alpha for the second residue
252
+
253
+ if angle_name in ["epsilon", "zeta"] and i == len(segment) - 1:
254
+ continue # Skip epsilon and zeta for the second-to-last residue
255
+
256
+ # Get the atoms for this angle
257
+ atoms = []
258
+ valid = True
259
+
260
+ for atom_name, offset in atoms_def:
261
+ res_idx = i + offset
262
+ if 0 <= res_idx < len(segment):
263
+ atom = segment[res_idx].find_atom(atom_name)
264
+ if atom is not None:
265
+ atoms.append(atom.coordinates)
266
+ else:
267
+ valid = False
268
+ break
269
+ else:
270
+ valid = False
271
+ break
272
+
273
+ # Calculate the angle if all atoms were found
274
+ if valid and len(atoms) == 4:
275
+ angle = calculate_torsion_angle(
276
+ atoms[0], atoms[1], atoms[2], atoms[3]
277
+ )
278
+ row[angle_name] = angle
279
+ else:
280
+ row[angle_name] = None
281
+
282
+ # Calculate chi angle based on residue type
283
+ # Pyrimidines: O4'-C1'-N1-C2
284
+ # Purines: O4'-C1'-N9-C4
285
+ purine_bases = ["A", "G", "DA", "DG"]
286
+ pyrimidine_bases = ["C", "U", "T", "DC", "DT"]
287
+
288
+ o4_prime = residue.find_atom("O4'")
289
+ c1_prime = residue.find_atom("C1'")
290
+
291
+ if o4_prime is not None and c1_prime is not None:
292
+ if residue.residue_name in purine_bases:
293
+ n9 = residue.find_atom("N9")
294
+ c4 = residue.find_atom("C4")
295
+ if n9 is not None and c4 is not None:
296
+ chi = calculate_torsion_angle(
297
+ o4_prime.coordinates,
298
+ c1_prime.coordinates,
299
+ n9.coordinates,
300
+ c4.coordinates,
301
+ )
302
+ row["chi"] = chi
303
+ elif residue.residue_name in pyrimidine_bases:
304
+ n1 = residue.find_atom("N1")
305
+ c2 = residue.find_atom("C2")
306
+ if n1 is not None and c2 is not None:
307
+ chi = calculate_torsion_angle(
308
+ o4_prime.coordinates,
309
+ c1_prime.coordinates,
310
+ n1.coordinates,
311
+ c2.coordinates,
312
+ )
313
+ row["chi"] = chi
314
+
315
+ data.append(row)
316
+
317
+ # Create DataFrame
318
+ if not data:
319
+ # Return empty DataFrame with correct columns
320
+ return pd.DataFrame(
321
+ columns=[
322
+ "chain_id",
323
+ "residue_number",
324
+ "insertion_code",
325
+ "residue_name",
326
+ "alpha",
327
+ "beta",
328
+ "gamma",
329
+ "delta",
330
+ "epsilon",
331
+ "zeta",
332
+ "chi",
333
+ ]
334
+ )
335
+
336
+ df = pd.DataFrame(data)
337
+
338
+ # Ensure all angle columns exist
339
+ for angle in ["alpha", "beta", "gamma", "delta", "epsilon", "zeta", "chi"]:
340
+ if angle not in df.columns:
341
+ df[angle] = None
342
+
343
+ # Reorder columns to ensure consistent order
344
+ ordered_columns = [
345
+ "chain_id",
346
+ "residue_number",
347
+ "insertion_code",
348
+ "residue_name",
349
+ "alpha",
350
+ "beta",
351
+ "gamma",
352
+ "delta",
353
+ "epsilon",
354
+ "zeta",
355
+ "chi",
356
+ ]
357
+ df = df[ordered_columns]
358
+
359
+ return df
360
+
361
+
362
+ class Residue:
363
+ """
364
+ A class representing a single residue in a molecular structure.
365
+
366
+ This class encapsulates a DataFrame containing atoms belonging to a single residue
367
+ and provides methods to access residue properties.
368
+ """
369
+
370
+ def __init__(self, residue_df: pd.DataFrame):
371
+ """
372
+ Initialize a Residue object with atom data for a single residue.
373
+
374
+ Parameters:
375
+ -----------
376
+ residue_df : pd.DataFrame
377
+ DataFrame containing atom data for a single residue
378
+ """
379
+ self.atoms = residue_df
380
+ self.format = residue_df.attrs.get("format", "unknown")
381
+
382
+ @cached_property
383
+ def chain_id(self) -> str:
384
+ """Get the chain identifier for this residue."""
385
+ if self.format == "PDB":
386
+ return self.atoms["chainID"].iloc[0]
387
+ elif self.format == "mmCIF":
388
+ if "auth_asym_id" in self.atoms.columns:
389
+ return self.atoms["auth_asym_id"].iloc[0]
390
+ else:
391
+ return self.atoms["label_asym_id"].iloc[0]
392
+ return ""
393
+
394
+ @cached_property
395
+ def residue_number(self) -> int:
396
+ """Get the residue sequence number."""
397
+ if self.format == "PDB":
398
+ return int(self.atoms["resSeq"].iloc[0])
399
+ elif self.format == "mmCIF":
400
+ if "auth_seq_id" in self.atoms.columns:
401
+ return int(self.atoms["auth_seq_id"].iloc[0])
402
+ else:
403
+ return int(self.atoms["label_seq_id"].iloc[0])
404
+ return 0
405
+
406
+ @cached_property
407
+ def insertion_code(self) -> Optional[str]:
408
+ """Get the insertion code, if any."""
409
+ if self.format == "PDB":
410
+ icode = self.atoms["iCode"].iloc[0]
411
+ return icode if pd.notna(icode) else None
412
+ elif self.format == "mmCIF":
413
+ if "pdbx_PDB_ins_code" in self.atoms.columns:
414
+ icode = self.atoms["pdbx_PDB_ins_code"].iloc[0]
415
+ return icode if pd.notna(icode) else None
416
+ return None
417
+
418
+ @cached_property
419
+ def residue_name(self) -> str:
420
+ """Get the residue name (e.g., 'A', 'G', 'C', 'U', etc.)."""
421
+ if self.format == "PDB":
422
+ return self.atoms["resName"].iloc[0]
423
+ elif self.format == "mmCIF":
424
+ if "auth_comp_id" in self.atoms.columns:
425
+ return self.atoms["auth_comp_id"].iloc[0]
426
+ else:
427
+ return self.atoms["label_comp_id"].iloc[0]
428
+ return ""
429
+
430
+ @cached_property
431
+ def atoms_list(self) -> List["Atom"]:
432
+ """Get a list of all atoms in this residue."""
433
+ return [Atom(self.atoms.iloc[i], self.format) for i in range(len(self.atoms))]
434
+
435
+ def find_atom(self, atom_name: str) -> Optional["Atom"]:
436
+ """
437
+ Find an atom by name in this residue.
438
+
439
+ Parameters:
440
+ -----------
441
+ atom_name : str
442
+ Name of the atom to find
443
+
444
+ Returns:
445
+ --------
446
+ Optional[Atom]
447
+ The Atom object, or None if not found
448
+ """
449
+ if self.format == "PDB":
450
+ mask = self.atoms["name"] == atom_name
451
+ atoms_df = self.atoms[mask]
452
+ if len(atoms_df) > 0:
453
+ return Atom(atoms_df.iloc[0], self.format)
454
+ elif self.format == "mmCIF":
455
+ if "auth_atom_id" in self.atoms.columns:
456
+ mask = self.atoms["auth_atom_id"] == atom_name
457
+ atoms_df = self.atoms[mask]
458
+ if len(atoms_df) > 0:
459
+ return Atom(atoms_df.iloc[0], self.format)
460
+ else:
461
+ mask = self.atoms["label_atom_id"] == atom_name
462
+ atoms_df = self.atoms[mask]
463
+ if len(atoms_df) > 0:
464
+ return Atom(atoms_df.iloc[0], self.format)
465
+ return None
466
+
467
+ def is_connected(self, next_residue_candidate: "Residue") -> bool:
468
+ """
469
+ Check if this residue is connected to the next residue candidate.
470
+
471
+ The connection is determined by the distance between the O3' atom of this residue
472
+ and the P atom of the next residue. If the distance is less than 1.5 times the
473
+ average O-P covalent bond distance, the residues are considered connected.
474
+
475
+ Parameters:
476
+ -----------
477
+ next_residue_candidate : Residue
478
+ The residue to check for connection
479
+
480
+ Returns:
481
+ --------
482
+ bool
483
+ True if the residues are connected, False otherwise
484
+ """
485
+ o3p = self.find_atom("O3'")
486
+ p = next_residue_candidate.find_atom("P")
487
+
488
+ if o3p is not None and p is not None:
489
+ distance = np.linalg.norm(o3p.coordinates - p.coordinates).item()
490
+ return distance < 1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT
491
+
492
+ return False
493
+
494
+ def __str__(self) -> str:
495
+ """String representation of the residue."""
496
+ # Start with chain ID and residue name
497
+ if self.chain_id.isspace() or not self.chain_id:
498
+ builder = f"{self.residue_name}"
499
+ else:
500
+ builder = f"{self.chain_id}.{self.residue_name}"
501
+
502
+ # Add a separator if the residue name ends with a digit
503
+ if len(self.residue_name) > 0 and self.residue_name[-1] in string.digits:
504
+ builder += "/"
505
+
506
+ # Add residue number
507
+ builder += f"{self.residue_number}"
508
+
509
+ # Add insertion code if present
510
+ if self.insertion_code is not None:
511
+ builder += f"^{self.insertion_code}"
512
+
513
+ return builder
514
+
515
+ def __repr__(self) -> str:
516
+ """Detailed string representation of the residue."""
517
+ return f"Residue({self.__str__()}, {len(self.atoms)} atoms)"
518
+
519
+
520
+ class Atom:
521
+ """
522
+ A class representing a single atom in a molecular structure.
523
+
524
+ This class encapsulates a pandas Series containing data for a single atom
525
+ and provides methods to access atom properties.
526
+ """
527
+
528
+ def __init__(self, atom_data: pd.Series, format: str):
529
+ """
530
+ Initialize an Atom object with atom data.
531
+
532
+ Parameters:
533
+ -----------
534
+ atom_data : pd.Series
535
+ Series containing data for a single atom
536
+ format : str
537
+ Format of the data ('PDB' or 'mmCIF')
538
+ """
539
+ self.data = atom_data
540
+ self.format = format
541
+
542
+ @cached_property
543
+ def name(self) -> str:
544
+ """Get the atom name."""
545
+ if self.format == "PDB":
546
+ return self.data["name"]
547
+ elif self.format == "mmCIF":
548
+ if "auth_atom_id" in self.data:
549
+ return self.data["auth_atom_id"]
550
+ else:
551
+ return self.data["label_atom_id"]
552
+ return ""
553
+
554
+ @cached_property
555
+ def element(self) -> str:
556
+ """Get the element symbol."""
557
+ if self.format == "PDB":
558
+ return self.data["element"]
559
+ elif self.format == "mmCIF":
560
+ if "type_symbol" in self.data:
561
+ return self.data["type_symbol"]
562
+ return ""
563
+
564
+ @cached_property
565
+ def coordinates(self) -> np.ndarray:
566
+ """Get the 3D coordinates of the atom."""
567
+ if self.format == "PDB":
568
+ return np.array([self.data["x"], self.data["y"], self.data["z"]])
569
+ elif self.format == "mmCIF":
570
+ return np.array(
571
+ [self.data["Cartn_x"], self.data["Cartn_y"], self.data["Cartn_z"]]
572
+ )
573
+ return np.array([0.0, 0.0, 0.0])
574
+
575
+ @cached_property
576
+ def occupancy(self) -> float:
577
+ """Get the occupancy value."""
578
+ if self.format == "PDB":
579
+ return (
580
+ float(self.data["occupancy"])
581
+ if pd.notna(self.data["occupancy"])
582
+ else 1.0
583
+ )
584
+ elif self.format == "mmCIF":
585
+ if "occupancy" in self.data:
586
+ return (
587
+ float(self.data["occupancy"])
588
+ if pd.notna(self.data["occupancy"])
589
+ else 1.0
590
+ )
591
+ return 1.0
592
+
593
+ @cached_property
594
+ def temperature_factor(self) -> float:
595
+ """Get the temperature factor (B-factor)."""
596
+ if self.format == "PDB":
597
+ return (
598
+ float(self.data["tempFactor"])
599
+ if pd.notna(self.data["tempFactor"])
600
+ else 0.0
601
+ )
602
+ elif self.format == "mmCIF":
603
+ if "B_iso_or_equiv" in self.data:
604
+ return (
605
+ float(self.data["B_iso_or_equiv"])
606
+ if pd.notna(self.data["B_iso_or_equiv"])
607
+ else 0.0
608
+ )
609
+ return 0.0
610
+
611
+ def __str__(self) -> str:
612
+ """String representation of the atom."""
613
+ return f"{self.name} ({self.element})"
614
+
615
+ def __repr__(self) -> str:
616
+ """Detailed string representation of the atom."""
617
+ coords = self.coordinates
618
+ return f"Atom({self.name}, {self.element}, [{coords[0]:.3f}, {coords[1]:.3f}, {coords[2]:.3f}])"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: RNApolis
3
- Version: 0.4.17
3
+ Version: 0.5.0
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -6,13 +6,15 @@ rnapolis/mmcif_pdbx_v50.dic,sha256=5QFx1ssDaehR4_DQ-tS9VQux262SiLXaqcwmwwejF5c,5
6
6
  rnapolis/molecule_filter.py,sha256=jgcpJxx_oXEBX0d30v4k_FdwRouRUPUsEtCYWgLGpD4,7310
7
7
  rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
8
8
  rnapolis/parser.py,sha256=3g4mtFvpiEENFcSBBtx_E_x1vJPF9BujWnts0kb9XjE,16340
9
+ rnapolis/parser_v2.py,sha256=L85dRYlh_aOcSvt2ZtRJYFhYa0bwvYgoTQi9kUSqDGQ,5803
9
10
  rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
10
11
  rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
12
+ rnapolis/tertiary_v2.py,sha256=GuTSEtbkMlYks6XA-P8pbLaT4M1cVS1T8gb8zcaGRzQ,21250
11
13
  rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
12
14
  rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
13
- RNApolis-0.4.17.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
14
- RNApolis-0.4.17.dist-info/METADATA,sha256=NXwscUxsO3lpMD3eukldViwH6JUSFlEC9ExXwirgfLM,54516
15
- RNApolis-0.4.17.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
- RNApolis-0.4.17.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
17
- RNApolis-0.4.17.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
18
- RNApolis-0.4.17.dist-info/RECORD,,
15
+ rnapolis-0.5.0.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
16
+ rnapolis-0.5.0.dist-info/METADATA,sha256=gq8j-Oln2H84wuzLZNvilJ5m1dPYtvm7vX2cpEunHYg,54515
17
+ rnapolis-0.5.0.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
18
+ rnapolis-0.5.0.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
19
+ rnapolis-0.5.0.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
20
+ rnapolis-0.5.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (76.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5