sapiopycommons 2025.2.25a448__tar.gz → 2025.2.25a449__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sapiopycommons might be problematic. Click here for more details.

Files changed (85) hide show
  1. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/PKG-INFO +1 -1
  2. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/pyproject.toml +1 -1
  3. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/ai/tool_of_tools.py +36 -49
  4. sapiopycommons-2025.2.25a448/src/sapiopycommons/ai/biopython_helper.py +0 -639
  5. sapiopycommons-2025.2.25a448/src/sapiopycommons/ai/rdkit_helper.py +0 -82
  6. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/.gitignore +0 -0
  7. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/LICENSE +0 -0
  8. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/README.md +0 -0
  9. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/__init__.py +0 -0
  10. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/ai/__init__.py +0 -0
  11. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/callbacks/__init__.py +0 -0
  12. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/callbacks/callback_util.py +0 -0
  13. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/callbacks/field_builder.py +0 -0
  14. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/chem/IndigoMolecules.py +0 -0
  15. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/chem/Molecules.py +0 -0
  16. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/chem/__init__.py +0 -0
  17. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/customreport/__init__.py +0 -0
  18. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/customreport/auto_pagers.py +0 -0
  19. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/customreport/column_builder.py +0 -0
  20. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/customreport/custom_report_builder.py +0 -0
  21. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/customreport/term_builder.py +0 -0
  22. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/datatype/__init__.py +0 -0
  23. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/datatype/attachment_util.py +0 -0
  24. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/datatype/data_fields.py +0 -0
  25. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/datatype/pseudo_data_types.py +0 -0
  26. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/eln/__init__.py +0 -0
  27. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/eln/experiment_handler.py +0 -0
  28. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/eln/experiment_report_util.py +0 -0
  29. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/eln/plate_designer.py +0 -0
  30. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/files/__init__.py +0 -0
  31. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/files/complex_data_loader.py +0 -0
  32. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/files/file_bridge.py +0 -0
  33. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/files/file_bridge_handler.py +0 -0
  34. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/files/file_data_handler.py +0 -0
  35. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/files/file_util.py +0 -0
  36. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/files/file_validator.py +0 -0
  37. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/files/file_writer.py +0 -0
  38. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/flowcyto/flow_cyto.py +0 -0
  39. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/flowcyto/flowcyto_data.py +0 -0
  40. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/general/__init__.py +0 -0
  41. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/general/accession_service.py +0 -0
  42. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/general/aliases.py +0 -0
  43. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/general/audit_log.py +0 -0
  44. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/general/custom_report_util.py +0 -0
  45. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/general/directive_util.py +0 -0
  46. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/general/exceptions.py +0 -0
  47. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/general/popup_util.py +0 -0
  48. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/general/sapio_links.py +0 -0
  49. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/general/storage_util.py +0 -0
  50. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/general/time_util.py +0 -0
  51. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/multimodal/multimodal.py +0 -0
  52. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/multimodal/multimodal_data.py +0 -0
  53. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/processtracking/__init__.py +0 -0
  54. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/processtracking/custom_workflow_handler.py +0 -0
  55. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/processtracking/endpoints.py +0 -0
  56. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/recordmodel/__init__.py +0 -0
  57. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/recordmodel/record_handler.py +0 -0
  58. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/rules/__init__.py +0 -0
  59. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/rules/eln_rule_handler.py +0 -0
  60. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/rules/on_save_rule_handler.py +0 -0
  61. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/samples/aliquot.py +0 -0
  62. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/sftpconnect/__init__.py +0 -0
  63. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/sftpconnect/sftp_builder.py +0 -0
  64. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/webhook/__init__.py +0 -0
  65. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/webhook/webhook_context.py +0 -0
  66. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/webhook/webhook_handlers.py +0 -0
  67. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/src/sapiopycommons/webhook/webservice_handlers.py +0 -0
  68. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/AF-A0A009IHW8-F1-model_v4.cif +0 -0
  69. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/_do_not_add_init_py_here +0 -0
  70. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/accession_test.py +0 -0
  71. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/aliquot_test.py +0 -0
  72. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/bio_reg_test.py +0 -0
  73. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/chem_test.py +0 -0
  74. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/chem_test_curation_queue.py +0 -0
  75. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/curation_queue_test.sdf +0 -0
  76. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/data_type_models.py +0 -0
  77. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/flowcyto/101_DEN084Y5_15_E01_008_clean.fcs +0 -0
  78. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/flowcyto/101_DEN084Y5_15_E03_009_clean.fcs +0 -0
  79. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/flowcyto/101_DEN084Y5_15_E05_010_clean.fcs +0 -0
  80. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/flowcyto/8_color_ICS.wsp +0 -0
  81. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/flowcyto/COVID19_W_001_O.fcs +0 -0
  82. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/flowcyto_test.py +0 -0
  83. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/kappa.chains.fasta +0 -0
  84. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/mafft_test.py +0 -0
  85. {sapiopycommons-2025.2.25a448 → sapiopycommons-2025.2.25a449}/tests/test.gb +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: sapiopycommons
3
- Version: 2025.2.25a448
3
+ Version: 2025.2.25a449
4
4
  Summary: Official Sapio Python API Utilities Package
5
5
  Project-URL: Homepage, https://github.com/sapiosciences
6
6
  Author-email: Jonathan Steck <jsteck@sapiosciences.com>, Yechen Qiao <yqiao@sapiosciences.com>
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sapiopycommons"
7
- version='2025.02.25a448'
7
+ version='2025.02.25a449'
8
8
  authors = [
9
9
  { name="Jonathan Steck", email="jsteck@sapiosciences.com" },
10
10
  { name="Yechen Qiao", email="yqiao@sapiosciences.com" },
@@ -2,7 +2,7 @@ import base64
2
2
  import io
3
3
  import math
4
4
  import re
5
- from typing import Final, Mapping, Any, cast
5
+ from typing import Final, Mapping, Any
6
6
 
7
7
  import requests
8
8
  from pandas import DataFrame
@@ -325,57 +325,48 @@ class AiHelper:
325
325
  if not json_list:
326
326
  return None
327
327
 
328
- def init_string_field(k: str, v: Any, n: str) -> VeloxStringFieldDefinition:
328
+ def update_string_field(f: AbstractVeloxFieldDefinition, v: Any) -> None:
329
329
  """
330
- Initialize a string field.
330
+ Update the max length of the string field and whether it is a link-out field depending on the length and
331
+ form of the given value.
331
332
 
332
- :param k: The JSON key that the field value is being pulled from. Doubles as the display name.
333
- :param v: A particular value of the field.
334
- :param n: The unique name of the field.
333
+ :param f: The definition of the string field.
334
+ :param v: A field value that will be present for this field.
335
335
  """
336
- link_out: dict[str, str] = {}
337
- if isinstance(v, str):
338
- if v.startswith("https://") or v.startswith("http://"):
339
- link_out["Link"] = "[[LINK_OUT]]"
340
- return fb.string_field(n, display_name=k, link_out=link_out)
341
-
342
- def update_field_length(k: str, v: Any, lengths: dict[str, int]) -> None:
343
- """
344
- Update the max length of a string field.
345
-
346
- :param k: The JSON key that the field value is being pulled from.
347
- :param v: The field value.
348
- :param lengths: The dictionary of field lengths.
349
- """
350
- lengths[k] = max(lengths.get(k, 100), len(str(v)) if v is not None else 0)
336
+ if not isinstance(f, VeloxStringFieldDefinition) or v is None:
337
+ return
338
+ sv = str(v)
339
+ f.max_length = max(f.max_length, len(sv))
340
+ if not f.link_out and sv.startswith("http://") or sv.startswith("https://"):
341
+ link_out, link_out_url = FieldBuilder._convert_link_out({"Link": "[[LINK_OUT]]"})
342
+ f.link_out = link_out
343
+ f.link_out_url = link_out_url
351
344
 
352
345
  # Determine which fields in the JSON can be used to create field definitions.
353
346
  fb = FieldBuilder()
354
347
  json_key_to_field_def: dict[str, AbstractVeloxFieldDefinition] = {}
355
- json_key_to_field_name: dict[str, str] = {}
356
- json_key_to_string_length: dict[str, int] = {}
357
348
  numeric_string_fields: set[str] = set()
358
349
  for values in json_list:
359
350
  for key, value in values.items():
360
- # The field name is the JSON key name, but with spaces and dashes replaced by underscores and with a leading
361
- # underscore added if the field name starts with a number.
362
- if key not in json_key_to_field_name:
363
- field_name: str = key.strip()
364
- if " " in field_name:
365
- field_name = field_name.replace(" ", "_")
366
- if "-" in field_name:
367
- field_name = field_name.replace("-", "_")
368
- if field_name[0].isnumeric():
369
- field_name = "_" + field_name
370
- json_key_to_field_name[key] = field_name
371
- else:
372
- field_name = json_key_to_field_name[key]
351
+ # Skip null values, since we can't know what type they're meant to represent.
352
+ if value is None:
353
+ continue
354
+
355
+ # The field name is the JSON key name, but with spaces and dashes replaced by underscores and with a
356
+ # leading underscore added if the field name starts with a number.
357
+ field_name: str = key.strip()
358
+ if " " in field_name:
359
+ field_name = field_name.replace(" ", "_")
360
+ if "-" in field_name:
361
+ field_name = field_name.replace("-", "_")
362
+ if field_name[0].isnumeric():
363
+ field_name = "_" + field_name
373
364
 
374
365
  # If this is the first time this key is being encountered, create a field for it.
375
366
  if key not in json_key_to_field_def:
376
367
  if isinstance(value, str):
377
- json_key_to_field_def[key] = init_string_field(key, value, field_name)
378
- update_field_length(key, value, json_key_to_string_length)
368
+ json_key_to_field_def[key] = fb.string_field(field_name, display_name=key)
369
+ update_string_field(json_key_to_field_def[key], value)
379
370
  elif isinstance(value, bool):
380
371
  json_key_to_field_def[key] = fb.boolean_field(field_name, display_name=key)
381
372
  elif isinstance(value, (int, float)):
@@ -388,25 +379,21 @@ class AiHelper:
388
379
  # Strings can be anything, so we don't need to check the value type.
389
380
  if field_type == FieldType.STRING:
390
381
  # We still need to make sure the lengths are fine.
391
- update_field_length(key, value, json_key_to_string_length)
382
+ update_string_field(json_key_to_field_def[key], value)
392
383
  continue
393
384
  # Boolean values can only be booleans.
394
385
  if field_type == FieldType.BOOLEAN and isinstance(value, bool):
395
386
  continue
396
387
  # Integers and floats both fit in DOUBLE fields, but floats can't be NaN or infinity.
397
- if field_type == FieldType.DOUBLE and not isinstance(value, bool):
398
- if isinstance(value, int):
388
+ if field_type == FieldType.DOUBLE:
389
+ # Booleans count as ints for isinstance, so make sure that true integers continue but bools don't.
390
+ if isinstance(value, int) and not isinstance(value, bool):
399
391
  continue
400
392
  if isinstance(value, float) and not math.isnan(value) and not math.isinf(value):
401
393
  continue
402
394
  numeric_string_fields.add(key)
403
- json_key_to_field_def[key] = init_string_field(key, value, field_name)
404
- update_field_length(key, value, json_key_to_string_length)
405
-
406
- # Update the max length of each string field.
407
- for key, value in json_key_to_string_length.items():
408
- field = cast(VeloxStringFieldDefinition, json_key_to_field_def[key])
409
- field.max_length = value
395
+ json_key_to_field_def[key] = fb.string_field(field_name, display_name=key)
396
+ update_string_field(json_key_to_field_def[key], value)
410
397
 
411
398
  # Sort the JSON list if requested.
412
399
  if sort_field and sort_direction != SortDirection.NONE:
@@ -428,7 +415,7 @@ class AiHelper:
428
415
  field_map: dict[str, Any] = {}
429
416
  for key, field in json_key_to_field_def.items():
430
417
  val: Any = json_dict.get(key)
431
- if key in numeric_string_fields and val is not None and not isinstance(val, str):
418
+ if key in numeric_string_fields and val is not None and isinstance(val, (int, float)):
432
419
  val: str = f"{val:.3f}"
433
420
  field_map[field.data_field_name] = val
434
421
  field_maps.append(field_map)
@@ -1,639 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from io import StringIO
4
- from typing import TypeAlias, Iterator
5
-
6
- from Bio import Phylo, SeqIO
7
- from Bio.Align import substitution_matrices, Alignment
8
- from Bio.Align.substitution_matrices import Array
9
- from Bio.Blast import Records, parse
10
- from Bio.KEGG import REST
11
- from Bio.PDB.Atom import Atom
12
- from Bio.PDB.Chain import Chain
13
- from Bio.PDB.MMCIF2Dict import MMCIF2Dict
14
- from Bio.PDB.MMCIFParser import MMCIFParser
15
- from Bio.PDB.Model import Model
16
- from Bio.PDB.NeighborSearch import NeighborSearch
17
- from Bio.PDB.PDBIO import PDBIO
18
- from Bio.PDB.PDBParser import PDBParser
19
- from Bio.PDB.Residue import Residue
20
- from Bio.PDB.Structure import Structure
21
- from Bio.PDB.Superimposer import Superimposer
22
- from Bio.Phylo.BaseTree import Tree
23
- from Bio.Phylo.TreeConstruction import DistanceTreeConstructor, DistanceMatrix
24
- from Bio.Seq import Seq
25
- from Bio.SeqRecord import SeqRecord
26
- from Bio.motifs import Motif
27
- from Bio.motifs.matrix import PositionSpecificScoringMatrix
28
- from sapiopylib.rest.User import SapioUser
29
-
30
- SeqAlias: TypeAlias = Seq | str
31
- TreeAlias: TypeAlias = Tree | str
32
-
33
-
34
- class BioPythonAliasUtil:
35
- @staticmethod
36
- def to_sequence(sequence: SeqAlias) -> Seq:
37
- """
38
- Converts the input to a Bio.Seq.Seq object if necessary
39
-
40
- :param sequence: The sequence, either as a Bio.Seq.Seq object or as a string.
41
- :return: The sequence as a Bio.Seq.Seq object.
42
- """
43
- if isinstance(sequence, str):
44
- return Seq(sequence)
45
- return sequence
46
-
47
- @staticmethod
48
- def to_tree(tree: TreeAlias) -> Tree:
49
- """
50
- Converts the input to a Bio.Phylo.BaseTree.Tree object if necessary.
51
-
52
- :param tree: The tree, either as a Bio.Phylo.BaseTree.Tree object or as a Newick string.
53
- :return: The tree as a Bio.Phylo.BaseTree.Tree object
54
- """
55
- if isinstance(tree, str):
56
- with StringIO(tree) as tree_io:
57
- return Phylo.read(tree_io, "newick")
58
- return tree
59
-
60
-
61
- class BioPythonHelper:
62
- """
63
- A class designed for simplifying and better documenting the behavior of commonly used BioPython functions.
64
- """
65
- user: SapioUser
66
- exp_id: int
67
- tab_prefix: str | None
68
-
69
- def __init__(self, user: SapioUser, exp_id: int, tab_prefix: str | None = None):
70
- """
71
- :param user: The user to make requests from.
72
- :param exp_id: The ID of the experiment that the user is in.
73
- :param tab_prefix: The prefix of the tab for displaying results of functions in. Defaults to None.
74
- """
75
- self.user = user
76
- self.exp_id = exp_id
77
- self.tab_prefix = tab_prefix
78
-
79
- @staticmethod
80
- def _parse_pdb_structure(pdb_id: str, file_format: str, file_contents: str | None = None) -> Structure:
81
- """
82
- Helper function to parse PDB structures, handling file input and format selection.clade_
83
-
84
- :param pdb_id: PDB ID of the structure. Used only if file_contents is None.
85
- :param file_format: File format ("pdb", "mmcif", "mmtf", "binarycif").
86
- :param file_contents: PDB/mmCIF file contents as a string. If provided, takes precedence over pdb_id.
87
- """
88
- if file_contents:
89
- if file_format == "pdb":
90
- parser = PDBParser()
91
- with StringIO(file_contents) as pdb_io:
92
- structure = parser.get_structure("input_structure", pdb_io)
93
- elif file_format == "mmcif":
94
- parser = MMCIFParser()
95
- with StringIO(file_contents) as mmcif_io:
96
- structure = parser.get_structure("input_structure", mmcif_io)
97
- else:
98
- raise ValueError("Invalid file format when providing file_contents")
99
- return structure
100
-
101
- from Bio.PDB import PDBList
102
- pdbl = PDBList()
103
- pdb_id = pdb_id.strip().upper()
104
- if '|' in pdb_id:
105
- pdb_id = pdb_id.split('|')[0]
106
-
107
- if file_format == "pdb":
108
- parser = PDBParser()
109
- file_path = pdbl.retrieve_pdb_file(pdb_id, file_format="pdb", overwrite=True)
110
- return parser.get_structure(pdb_id, file_path)
111
- elif file_format == "mmcif":
112
- parser = MMCIFParser()
113
- file_path = pdbl.retrieve_pdb_file(pdb_id, file_format="mmcif", overwrite=True)
114
- return parser.get_structure(pdb_id, file_path)
115
- elif file_format == "mmtf":
116
- from Bio.PDB.mmtf import MMTFParser
117
- parser = MMTFParser()
118
- return parser.get_structure_from_url(pdb_id)
119
- elif file_format == "binarycif":
120
- from Bio.PDB.binary_cif import BinaryCIFParser
121
- parser = BinaryCIFParser()
122
- file_path = pdbl.retrieve_pdb_file(pdb_id, file_format="bcif", overwrite=True) # Corrected file_format
123
- return parser.get_structure(pdb_id, file_path)
124
- else:
125
- raise ValueError("Invalid file format.")
126
-
127
- @staticmethod
128
- def load_matrix(matrix_name: str) -> Array:
129
- """
130
- Loads a substitution matrix from the Bio.Align.substitution_matrices module.
131
-
132
- :param matrix_name: The name of the matrix to load (e.g., "BLOSUM62").
133
- :return: An Array object representing the substitution matrix.
134
- """
135
- return substitution_matrices.load(matrix_name)
136
-
137
- @staticmethod
138
- def blast_run(blast_output: str) -> Records:
139
- """
140
- Parses BLAST output (in plain text format) and returns a Bio.Blast.Records object.
141
-
142
- :param blast_output: BLAST output in plain text format, as a string.
143
- :return: A Bio.Blast.Records iterator, yielding Bio.Blast.Record objects.
144
- """
145
- with StringIO(blast_output) as blast_io:
146
- blast_records: Records = parse(blast_io)
147
- return blast_records
148
-
149
- @staticmethod
150
- def kegg_get(argument: str | list[str]) -> str:
151
- """
152
- Retrieves KEGG entries in flat text format using KEGG REST API.
153
-
154
- :param argument: KEGG database entry identifier(s) or command arguments (e.g., "eco:b0002", ["eco:b0002", "eco:b0003"]).
155
- :return: A string containing the raw text data from KEGG.
156
- """
157
- with REST.kegg_get(argument) as handle:
158
- data: str = handle.read()
159
- return data
160
-
161
- @staticmethod
162
- def kegg_list(database: str, arguments: str | None = None) -> str:
163
- """
164
- Retrieves a list of entries from a KEGG database using KEGG REST API.
165
-
166
- :param database: KEGG database name (e.g., "pathway", "enzyme", "compound").
167
- :param arguments: Optional additional arguments for the list command (e.g., "hsa" for human pathways).
168
- Defaults to None.
169
- :return: Raw text list of entries from KEGG, as a single string.
170
- """
171
- with REST.kegg_list(database, arguments) as handle:
172
- data: str = handle.read()
173
- return data
174
-
175
- @staticmethod
176
- def kegg_find(database: str, query: str, arguments: str | None = None) -> str:
177
- """
178
- Finds entries in a KEGG database based on a text query using KEGG REST API.
179
-
180
- :param database: KEGG database name.
181
- :param query: Search term or query.
182
- :param arguments: Optional additional arguments for the find command. Defaults to None.
183
- :return: Raw text list of entries from KEGG matching the query.
184
- """
185
- with REST.kegg_find(database, query, arguments) as handle:
186
- data: str = handle.read()
187
- return data
188
-
189
- @staticmethod
190
- def kegg_conv(database1: str, database2: str) -> str:
191
- """
192
- Converts identifiers between two KEGG databases using KEGG REST API.
193
-
194
- :param database1: Source KEGG database name or identifier list.
195
- :param database2: Target KEGG database name.
196
- :return: Conversion table in raw text format from KEGG.
197
- """
198
- with REST.kegg_conv(database1, database2) as handle:
199
- data: str = handle.read()
200
- return data
201
-
202
- @staticmethod
203
- def pdb_parse(pdb_id: str, file_format: str = "pdb", file_contents: str | None = None) -> Structure:
204
- """
205
- Parses a PDB, mmCIF, MMTF, or BinaryCIF file and returns a Bio.PDB.Structure object.
206
-
207
- :param pdb_id: PDB ID of the structure. Used only if file_contents is None.
208
- :param file_format: File format ("pdb", "mmcif", "mmtf", or "binarycif"). Defaults to "pdb".
209
- :param file_contents: String containing PDB/mmCIF file contents. If provided, takes precedence over pdb_id.
210
- Defaults to None.
211
- :return: A Bio.PDB.Structure object representing the parsed structure.
212
- :raises ValueError: if an invalid file_format is provided.
213
- """
214
- return BioPythonHelper._parse_pdb_structure(pdb_id, file_format, file_contents)
215
-
216
- @staticmethod
217
- def structure_to_pdb_str(structure: Structure, output_format: str = "pdb") -> str:
218
- """
219
- Converts a Bio.PDB.Structure object to a PDB-formatted string.
220
-
221
- :param structure: The Bio.PDB.Structure object to convert.
222
- :param output_format: The desired output format ("pdb" or "mmcif"). Defaults to "pdb".
223
- :return: A PDB-formatted string.
224
- :raises ValueError: if an invalid file_format is provided.
225
- """
226
- io = PDBIO()
227
- io.set_structure(structure)
228
- with StringIO() as out_str:
229
- if output_format == "pdb":
230
- io.save(out_str)
231
- elif output_format == "mmcif":
232
- # For outputting a string, create a stringIO object
233
- io = PDBIO(is_pqr=True)
234
- io.set_structure(structure)
235
- io.save(out_str)
236
- else:
237
- raise ValueError("Invalid output format.")
238
- pdb_string = out_str.read()
239
- return pdb_string
240
-
241
- @staticmethod
242
- def mmcif_parse(pdb_id: str, file_contents: str | None = None) -> dict[str, list[str]]:
243
- """
244
- Parses an mmCIF file and returns a dictionary representation.
245
-
246
- :param pdb_id: PDB ID of the structure (used only if file_contents is None).
247
- :param file_contents: mmCIF file contents as a string. If provided, takes precedence over pdb_id.
248
- Defaults to None.
249
- :return: A dictionary mapping mmCIF tags to lists of their values.
250
- """
251
- if file_contents:
252
- with StringIO(file_contents) as mmcif_io:
253
- return MMCIF2Dict(mmcif_io)
254
-
255
- # Download and parse using MMCIF2Dict directly
256
- from Bio.PDB import PDBList
257
- pdbl = PDBList()
258
- file_path = pdbl.retrieve_pdb_file(pdb_id, file_format="mmcif", overwrite=True)
259
-
260
- return MMCIF2Dict(file_path)
261
-
262
- @staticmethod
263
- def atom_neighbor_search(pdb_id: str, file_format: str, file_contents: str | None = None,
264
- center: tuple[float, float, float] = (0.0, 0.0, 0.0),
265
- radius: float = 1.0) -> list[Atom]:
266
- """
267
- Finds atom neighbors within a specified radius of a center point in a PDB structure.
268
-
269
- :param pdb_id: PDB ID of the structure. Used only if file_contents is None.
270
- :param file_format: File format ("pdb" or "mmcif").
271
- :param file_contents: PDB file contents as a string. If provided, takes precedence over pdb_id.
272
- Defaults to None.
273
- :param center: Coordinates of the center point (x, y, z) as a tuple. Defaults to (0.0, 0.0, 0.0).
274
- :param radius: Search radius in Angstroms. Defaults to 1.0.
275
- :return: A list of Bio.PDB.Atom objects within the radius.
276
- :raises ValueError: if an invalid file_format is provided.
277
- """
278
- structure = BioPythonHelper._parse_pdb_structure(pdb_id, file_format, file_contents)
279
- atom_list = list(structure.get_atoms())
280
- ns = NeighborSearch(atom_list)
281
- neighbors = ns.search(center, radius, level='A')
282
- return neighbors
283
-
284
- @staticmethod
285
- def residue_neighbor_search(pdb_id: str, file_format: str, file_contents: str | None = None,
286
- center: tuple[float, float, float] = (0.0, 0.0, 0.0),
287
- radius: float = 1.0) -> list[Residue]:
288
- """
289
- Finds residue neighbors within a specified radius of a center point in a PDB structure.
290
-
291
- :param pdb_id: PDB ID of the structure. Used only if file_contents is None.
292
- :param file_format: File format ("pdb" or "mmcif").
293
- :param file_contents: PDB file contents as a string. If provided, takes precedence over pdb_id. Defaults to None.
294
- :param center: Coordinates of the center point (x, y, z) as a tuple. Defaults to (0.0, 0.0, 0.0).
295
- :param radius: Search radius in Angstroms. Defaults to 1.0.
296
- :return: A list of Bio.PDB.Residue objects within the radius.
297
- :raises ValueError: if an invalid file_format is provided.
298
- """
299
- structure = BioPythonHelper._parse_pdb_structure(pdb_id, file_format, file_contents)
300
- atom_list = list(structure.get_atoms())
301
- ns = NeighborSearch(atom_list)
302
- neighbors = ns.search(center, radius, level='R')
303
- return neighbors
304
-
305
- @staticmethod
306
- def chain_neighbor_search(pdb_id: str, file_format: str, file_contents: str | None = None,
307
- center: tuple[float, float, float] = (0.0, 0.0, 0.0),
308
- radius: float = 1.0) -> list[Chain]:
309
- """
310
- Finds chain neighbors within a specified radius of a center point in a PDB structure.
311
-
312
- :param pdb_id: PDB ID of the structure. Used only if file_contents is None.
313
- :param file_format: File format ("pdb" or "mmcif").
314
- :param file_contents: PDB file contents as a string. If provided, takes precedence over pdb_id. Defaults to None.
315
- :param center: Coordinates of the center point (x, y, z) as a tuple. Defaults to (0.0, 0.0, 0.0).
316
- :param radius: Search radius in Angstroms. Defaults to 1.0.
317
- :return: A list of Bio.PDB.Chain objects within the radius.
318
- :raises ValueError: if an invalid file_format is provided.
319
- """
320
- structure = BioPythonHelper._parse_pdb_structure(pdb_id, file_format, file_contents)
321
- atom_list = list(structure.get_atoms())
322
- ns = NeighborSearch(atom_list)
323
- neighbors = ns.search(center, radius, level='C')
324
- return neighbors
325
-
326
- @staticmethod
327
- def model_neighbor_search(pdb_id: str, file_format: str, file_contents: str | None = None,
328
- center: tuple[float, float, float] = (0.0, 0.0, 0.0),
329
- radius: float = 1.0) -> list[Model]:
330
- """
331
- Finds model neighbors within a specified radius of a center point in a PDB structure.
332
-
333
- :param pdb_id: PDB ID of the structure. Used only if file_contents is None.
334
- :param file_format: File format ("pdb" or "mmcif").
335
- :param file_contents: PDB file contents as a string. If provided, takes precedence over pdb_id. Defaults to None.
336
- :param center: Coordinates of the center point (x, y, z) as a tuple. Defaults to (0.0, 0.0, 0.0).
337
- :param radius: Search radius in Angstroms. Defaults to 1.0.
338
- :return: A list of Bio.PDB.Model objects within the radius.
339
- :raises ValueError: if an invalid file_format is provided.
340
- """
341
- structure = BioPythonHelper._parse_pdb_structure(pdb_id, file_format, file_contents)
342
- atom_list = list(structure.get_atoms())
343
- ns = NeighborSearch(atom_list)
344
- neighbors = ns.search(center, radius, level='M')
345
- return neighbors
346
-
347
- @staticmethod
348
- def superimpose(fixed_pdb_id: str, moving_pdb_id: str, fixed_file_format: str, moving_file_format: str,
349
- fixed_file_contents: str | None = None, moving_file_contents: str | None = None) \
350
- -> tuple[Superimposer, Structure]:
351
- """
352
- Superimposes two PDB structures and returns the Superimposer object and transformed moving PDB string.
353
-
354
- :param fixed_pdb_id: PDB ID of the fixed structure. Used only if fixed_file_contents is None.
355
- :param moving_pdb_id: PDB ID of the moving structure. Used only if moving_file_contents is None.
356
- :param fixed_file_format: File format of the fixed structure ("pdb" or "mmcif").
357
- :param moving_file_format: File format of the moving structure ("pdb" or "mmcif").
358
- :param fixed_file_contents: Fixed PDB/mmCIF file contents. If provided, takes precedence over fixed_pdb_id.
359
- Defaults to None.
360
- :param moving_file_contents: Moving PDB/mmCIF file contents. If provided, takes precedence over moving_pdb_id.
361
- Defaults to None.
362
- :return: A tuple containing:
363
- - The Bio.PDB.Superimposer object, which contains rotation/translation information.
364
- - The Bio.PDB.Structure object of the moving structure after transformation.
365
- :raises ValueError: if the fixed and moving structures have different numbers of atoms.
366
- :raises ValueError: if an invalid file_format is provided.
367
- """
368
- fixed_structure = BioPythonHelper._parse_pdb_structure(fixed_pdb_id, fixed_file_format, fixed_file_contents)
369
- moving_structure = BioPythonHelper._parse_pdb_structure(moving_pdb_id, moving_file_format, moving_file_contents)
370
-
371
- super_imposer = Superimposer()
372
- fixed_atoms = list(fixed_structure.get_atoms())
373
- moving_atoms = list(moving_structure.get_atoms())
374
-
375
- if len(fixed_atoms) != len(moving_atoms):
376
- raise ValueError("Fixed and moving structures must have the same number of atoms.")
377
-
378
- super_imposer.set_atoms(fixed_atoms, moving_atoms)
379
- super_imposer.apply(moving_atoms)
380
-
381
- return super_imposer, moving_structure
382
-
383
- @staticmethod
384
- def distance_tree(sequences: dict[str, str], method: str = "nj", distance_model: str = "blosum62") -> Tree:
385
- """
386
- Constructs a UPGMA or Neighbor Joining tree from a set of sequences.
387
-
388
- :param sequences: Dictionary of sequences, where keys are sequence IDs and values are sequences (strings).
389
- :param method: Tree construction method ('upgma' or 'nj'). Defaults to 'nj'
390
- :param distance_model: The distance model to use for the distance matrix. Defaults to 'blosum62'
391
- :return: A Bio.Phylo.BaseTree.Tree object representing the constructed tree.
392
- """
393
- # Create SeqRecord objects
394
- seq_records = [SeqRecord(Seq(seq), id=seq_id) for seq_id, seq in sequences.items()]
395
-
396
- # Calculate Distance Matrix
397
- from Bio.Phylo.TreeConstruction import DistanceCalculator
398
- calculator = DistanceCalculator(distance_model) # distance model such as 'blosum62'
399
- dm: DistanceMatrix = calculator.get_distance(seq_records)
400
-
401
- # Construct Tree
402
- constructor = DistanceTreeConstructor()
403
- if method == "upgma":
404
- tree: Tree = constructor.upgma(dm)
405
- elif method == "nj":
406
- tree: Tree = constructor.nj(dm)
407
- else:
408
- raise ValueError("Invalid tree construction method. Choose 'upgma' or 'nj'.")
409
- return tree
410
-
411
- @staticmethod
412
- def newick_to_tree(newick_string: str) -> Tree:
413
- """
414
- Converts a newick string to a tree object.
415
-
416
- :param newick_string: The newick string to be converted.
417
- :return: The tree object.
418
- """
419
- with StringIO(newick_string) as tree_io:
420
- tree: Tree = Phylo.read(tree_io, "newick")
421
- return tree
422
-
423
- @staticmethod
424
- def tree_to_newick(tree: Tree) -> str:
425
- """
426
- Converts a tree object to a newick string.
427
-
428
- :param tree: The tree to be converted, in the Bio.Phylo.BaseTree.Tree format.
429
- :return: The newick string representing the tree.
430
- """
431
- with StringIO() as tree_io:
432
- Phylo.write(tree, tree_io, "newick")
433
- return tree_io.read()
434
-
435
- @staticmethod
436
- def clade_get_terminals(tree: TreeAlias) -> list[str]:
437
- """
438
- Gets the terminal nodes of a phylogenetic tree.
439
-
440
- :param tree: Tree object or Newick formatted tree string.
441
- :return: List of terminal clade names (strings).
442
- """
443
- tree = BioPythonAliasUtil.to_tree(tree)
444
- return [clade.name for clade in tree.get_terminals()]
445
-
446
- @staticmethod
447
- def clade_get_nonterminals(tree: TreeAlias) -> list[str]:
448
- """
449
- Gets the non-terminal nodes of a phylogenetic tree.
450
-
451
- :param tree: Tree object or Newick formatted tree string.
452
- :return: List of non-terminal clade names (strings).
453
- """
454
- tree = BioPythonAliasUtil.to_tree(tree)
455
- return [clade.name for clade in tree.get_nonterminals()]
456
-
457
- @staticmethod
458
- def clade_common_ancestor_by_targets(tree: TreeAlias, target1: str, target2: str) -> str:
459
- """
460
- Finds the common ancestor of two target clades in a tree.
461
- :param tree: Tree object or Newick formatted tree string.
462
- :param target1: Target clade name.
463
- :param target2: Second target clade name.
464
- :return: Name of the common ancestor clade (string)
465
- """
466
- tree = BioPythonAliasUtil.to_tree(tree)
467
- ancestor = tree.common_ancestor(target1, target2)
468
- return ancestor.name if ancestor else "Unnamed"
469
-
470
- @staticmethod
471
- def clade_common_ancestor_by_taxa(tree: TreeAlias, taxa: list[str]) -> str:
472
- """
473
- Finds the common ancestor of a list of taxa
474
- :param tree: Tree object or Newick formatted tree string.
475
- :param taxa: List of taxa.
476
- :return: Name of the common ancestor clade (string)
477
- """
478
- tree = BioPythonAliasUtil.to_tree(tree)
479
- ancestor = tree.common_ancestor(*taxa)
480
- return ancestor.name if ancestor else "Unnamed"
481
-
482
- @staticmethod
483
- def clade_distance(tree: TreeAlias, target1: str, target2: str) -> float:
484
- """
485
- Calculates the distance between two clades in a phylogenetic tree.
486
-
487
- :param tree: Tree object or Newick formatted tree string.
488
- :param target1: Target clade name.
489
- :param target2: Second target clade name.
490
- :return: Distance between two clades (float).
491
- :raises ValueError: If targets are not provided
492
- """
493
- tree = BioPythonAliasUtil.to_tree(tree)
494
- if not (target1 and target2):
495
- raise ValueError("Must Provide Two Targets")
496
- return tree.distance(target1, target2)
497
-
498
- @staticmethod
499
- def clade_total_branch_length(tree: TreeAlias) -> float:
500
- """
501
- Calculates the total branch length of a phylogenetic tree.
502
-
503
- :param tree: Tree object or Newick formatted tree string.
504
- :return: Total branch length of the tree (float).
505
- """
506
- tree = BioPythonAliasUtil.to_tree(tree)
507
- return tree.total_branch_length()
508
-
509
- @staticmethod
510
- def clade_depths(tree: TreeAlias, unit_branch_lengths: bool = False) -> dict[str, float]:
511
- """
512
- Calculates the depths of clades in a phylogenetic tree.
513
-
514
- :param tree: Tree object or Newick formatted tree string.
515
- :param unit_branch_lengths: If True, calculate depths using unit branch lengths. Defaults to False.
516
- :return: Dictionary mapping clade names to depths (float).
517
- """
518
- tree = BioPythonAliasUtil.to_tree(tree)
519
- depths_dict = tree.depths(unit_branch_lengths=unit_branch_lengths)
520
- return {(clade.name if clade.name else str(clade)): depth for clade, depth in depths_dict.items()}
521
-
522
- @staticmethod
523
- def motif_analysis(sequences: list[SeqAlias], alphabet: str = "ACGT") -> Motif:
524
- """
525
- Run a sequence motif analysis on the given sequences.
526
-
527
- :param sequences: A list of DNA sequences, either in the form of strings or of Bio.Seq.Seq objects.
528
- :param alphabet: The alphabet used in the DNA sequences. Defaults to ACGT.
529
- :return: The sequence motif Bio.motifs.Motif object analysing the given sequence.
530
- """
531
- alignment = Alignment([BioPythonAliasUtil.to_sequence(seq) for seq in sequences])
532
- return Motif(alphabet=alphabet, alignment=alignment)
533
-
534
- @staticmethod
535
- def pssm_search(pssm: PositionSpecificScoringMatrix, sequence: SeqAlias,
536
- threshold: float = 0.0, both_strands: bool = True) -> list[tuple[int, float]]:
537
- """
538
- :param pssm: The position specific scoring matrix to run the search on.
539
- :param sequence: The sequence to search for, either as a string or already wrapped as a Bio.Seq object.
540
- :param threshold: The threshold above which the Position Weight Matrix score must be for a hit to be returned
541
- as a match. Defaults to 0.0.
542
- :param both_strands: Whether both sides of the DNA sequence should be searched for hits. Defaults to True.
543
- :return: A list of tuples for each hit in the sequence. The tuple is a pair of integers, the first being the
544
- position of the hit and the second being the score of the hit. Negative positions correspond to positions
545
- on the other side of the strand of DNA.
546
- """
547
- sequence = BioPythonAliasUtil.to_sequence(sequence)
548
- matches: list[tuple[int, float]] = list(pssm.search(sequence, threshold=threshold, both=both_strands))
549
- return matches
550
-
551
- @staticmethod
552
- def read_sequence(file_path: str, seq_format: str) -> SeqRecord:
553
- """
554
- Reads a single sequence record from a file using Bio.SeqIO.read.
555
-
556
- :param file_path: Path to the sequence file.
557
- :param seq_format: Format of the sequence file (e.g., "fasta", "genbank").
558
- :return: A single SeqRecord object.
559
- :raises: ValueError if the file contains more than one record
560
- """
561
- return SeqIO.read(file_path, seq_format)
562
-
563
- @staticmethod
564
- def parse_sequences(file_path: str, seq_format: str) -> Iterator[SeqRecord]:
565
- """
566
- Parses multiple sequence records from a file using Bio.SeqIO.parse
567
-
568
- :param file_path: Path to the sequence file.
569
- :param seq_format: Format of the sequence file (e.g., "fasta", "genbank").
570
- :return: An iterator yielding SeqRecord objects.
571
- """
572
- return SeqIO.parse(file_path, seq_format)
573
-
574
- @staticmethod
575
- def write_sequences(sequences: list[SeqRecord], file_path: str, seq_format: str) -> int:
576
- """
577
- Writes a list of SeqRecord objects to a file using Bio.SeqIO.write.
578
-
579
- :param sequences: List of SeqRecord objects to write.
580
- :param file_path: Output file path.
581
- :param seq_format: Output sequence format (e.g., "fasta", "genbank").
582
- :return: The number of records written.
583
- """
584
- return SeqIO.write(sequences, file_path, seq_format)
585
-
586
- @staticmethod
587
- def convert_sequence_format(input_file: str, input_format: str, output_file: str, output_format: str) -> int:
588
- """
589
- Converts a sequence file from one format to another using Bio.SeqIO.convert.
590
-
591
- :param input_file: Path to the input sequence file.
592
- :param input_format: Format of the input file (e.g., "genbank").
593
- :param output_file: Path to the output sequence file.
594
- :param output_format: Desired format of the output file (e.g., "fasta").
595
- :return: The number of records converted.
596
- """
597
- return SeqIO.convert(input_file, input_format, output_file, output_format)
598
-
599
- @staticmethod
600
- def reverse_complement(sequence: SeqAlias) -> Seq:
601
- """
602
- Calculates the reverse complement of a DNA sequence.
603
-
604
- :param sequence: The DNA sequence (string or Seq object).
605
- :return: The reverse complement as a Seq object.
606
- """
607
- return BioPythonAliasUtil.to_sequence(sequence).reverse_complement()
608
-
609
- @staticmethod
610
- def transcribe(dna_sequence: SeqAlias) -> Seq:
611
- """
612
- Transcribes a DNA sequence to RNA.
613
-
614
- :param dna_sequence: The DNA sequence (string or Seq object).
615
- :return: The transcribed RNA sequence as a Seq object.
616
- """
617
- return BioPythonAliasUtil.to_sequence(dna_sequence).transcribe()
618
-
619
- @staticmethod
620
- def back_transcribe(rna_sequence: SeqAlias) -> Seq:
621
- """
622
- Back-transcribes an RNA sequence to DNA.
623
-
624
- :param rna_sequence: The RNA sequence (string or Seq object).
625
- :return: The back-transcribed DNA sequence as a Seq object.
626
- """
627
- return BioPythonAliasUtil.to_sequence(rna_sequence).back_transcribe()
628
-
629
- @staticmethod
630
- def translate(sequence: SeqAlias, table: str | int = "Standard", to_stop: bool = False) -> Seq:
631
- """
632
- Translates a nucleotide sequence to a protein sequence.
633
-
634
- :param sequence: The nucleotide sequence (string or Seq object).
635
- :param table: The genetic code table to use (string or integer). Defaults to "Standard".
636
- :param to_stop: If True, translation stops at the first in-frame stop codon. Defaults to False.
637
- :return: The translated protein sequence as a Seq object.
638
- """
639
- return BioPythonAliasUtil.to_sequence(sequence).translate(table=table, to_stop=to_stop)
@@ -1,82 +0,0 @@
1
- from typing import Any
2
-
3
- from rdkit import Chem
4
- from rdkit.Chem import QED, Mol
5
- from rdkit.Chem.Crippen import MolLogP
6
- from rdkit.Chem.Descriptors import MolWt
7
- from rdkit.Chem.Lipinski import NumHDonors, NumHAcceptors, NumRotatableBonds
8
- from sapiopylib.rest.User import SapioUser
9
-
10
-
11
- class RdKitHelper:
12
- """
13
- A class designed for simplifying and better documenting the behavior of commonly used RDKit functions.
14
- """
15
- user: SapioUser
16
- exp_id: int
17
- tab_prefix: str
18
-
19
- def __init__(self, user: SapioUser, exp_id: int, tab_prefix: str | None = None):
20
- """
21
- :param user: The user to make requests from.
22
- :param exp_id: The ID of the experiment that the user is in.
23
- :param tab_prefix: The prefix of the tab for displaying results of functions in.
24
- """
25
- self.user = user
26
- self.exp_id = exp_id
27
- self.tab_prefix = tab_prefix
28
-
29
- @staticmethod
30
- def filter_drug_like_compounds(compounds: list[dict[str, Any]]) -> list[dict[str, Any]]:
31
- """
32
- Filter the compounds based on Lipinski's Rule of Five and QED score to prioritize drug-like molecules.
33
-
34
- :param compounds: A list of dictionaries, where each dictionary represents a compound with the following
35
- expected fields:
36
- - "smiles" (str): SMILES representation of the compound.
37
- - "record_id" (Any): Unique identifier for the compound.
38
- - "name" (str): Name of the compound.
39
- :return: A list of dictionaries representing drug-like compounds with the following fields:
40
- - "smiles" (str): SMILES representation of the compound.
41
- - "record_id" (Any): Unique identifier for the compound.
42
- - "name" (str): Name of the compound.
43
- - "mw" (float): Molecular weight of the compound.
44
- - "logp" (float): LogP (lipophilicity) value.
45
- - "hbd" (int): Number of hydrogen bond donors.
46
- - "hba" (int): Number of hydrogen bond acceptors.
47
- - "num_rotatable_bonds" (int): Number of rotatable bonds.
48
- - "qed_score" (float): QED (Quantitative Estimation of Drug-likeness) score.
49
- """
50
- drug_like_compounds: list[dict[str, Any]] = []
51
-
52
- for compound in compounds:
53
- smiles: str = compound.get("smiles", "")
54
- try:
55
- mol: Mol = Chem.MolFromSmiles(smiles)
56
- if mol is not None:
57
- Chem.SanitizeMol(mol)
58
- QED.properties(mol)
59
-
60
- mw = MolWt(mol)
61
- logp = MolLogP(mol)
62
- hbd = NumHDonors(mol)
63
- hba = NumHAcceptors(mol)
64
- num_rotatable_bonds = NumRotatableBonds(mol)
65
- qed_score = QED.qed(mol)
66
-
67
- if mw <= 500 and logp <= 5 and hbd <= 5 and hba <= 10 and qed_score >= 0.5:
68
- drug_like_compounds.append({
69
- "smiles": smiles,
70
- "record_id": compound["record_id"],
71
- "name": compound["name"],
72
- "mw": mw,
73
- "logp": logp,
74
- "hbd": hbd,
75
- "hba": hba,
76
- "num_rotatable_bonds": num_rotatable_bonds,
77
- "qed_score": qed_score
78
- })
79
- except Exception as e:
80
- print(f"Error processing SMILES: {smiles} - {e}")
81
-
82
- return drug_like_compounds