sapiopycommons 2025.2.20a444__tar.gz → 2025.2.21a447__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sapiopycommons might be problematic. Click here for more details.

Files changed (85) hide show
  1. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/PKG-INFO +1 -1
  2. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/pyproject.toml +1 -1
  3. sapiopycommons-2025.2.21a447/src/sapiopycommons/ai/biopython_helper.py +639 -0
  4. sapiopycommons-2025.2.21a447/src/sapiopycommons/ai/rdkit_helper.py +82 -0
  5. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/ai/tool_of_tools.py +86 -40
  6. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/.gitignore +0 -0
  7. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/LICENSE +0 -0
  8. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/README.md +0 -0
  9. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/__init__.py +0 -0
  10. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/ai/__init__.py +0 -0
  11. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/callbacks/__init__.py +0 -0
  12. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/callbacks/callback_util.py +0 -0
  13. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/callbacks/field_builder.py +0 -0
  14. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/chem/IndigoMolecules.py +0 -0
  15. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/chem/Molecules.py +0 -0
  16. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/chem/__init__.py +0 -0
  17. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/customreport/__init__.py +0 -0
  18. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/customreport/auto_pagers.py +0 -0
  19. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/customreport/column_builder.py +0 -0
  20. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/customreport/custom_report_builder.py +0 -0
  21. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/customreport/term_builder.py +0 -0
  22. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/datatype/__init__.py +0 -0
  23. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/datatype/attachment_util.py +0 -0
  24. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/datatype/data_fields.py +0 -0
  25. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/datatype/pseudo_data_types.py +0 -0
  26. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/eln/__init__.py +0 -0
  27. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/eln/experiment_handler.py +0 -0
  28. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/eln/experiment_report_util.py +0 -0
  29. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/eln/plate_designer.py +0 -0
  30. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/files/__init__.py +0 -0
  31. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/files/complex_data_loader.py +0 -0
  32. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/files/file_bridge.py +0 -0
  33. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/files/file_bridge_handler.py +0 -0
  34. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/files/file_data_handler.py +0 -0
  35. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/files/file_util.py +0 -0
  36. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/files/file_validator.py +0 -0
  37. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/files/file_writer.py +0 -0
  38. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/flowcyto/flow_cyto.py +0 -0
  39. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/flowcyto/flowcyto_data.py +0 -0
  40. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/general/__init__.py +0 -0
  41. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/general/accession_service.py +0 -0
  42. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/general/aliases.py +0 -0
  43. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/general/audit_log.py +0 -0
  44. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/general/custom_report_util.py +0 -0
  45. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/general/directive_util.py +0 -0
  46. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/general/exceptions.py +0 -0
  47. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/general/popup_util.py +0 -0
  48. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/general/sapio_links.py +0 -0
  49. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/general/storage_util.py +0 -0
  50. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/general/time_util.py +0 -0
  51. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/multimodal/multimodal.py +0 -0
  52. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/multimodal/multimodal_data.py +0 -0
  53. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/processtracking/__init__.py +0 -0
  54. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/processtracking/custom_workflow_handler.py +0 -0
  55. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/processtracking/endpoints.py +0 -0
  56. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/recordmodel/__init__.py +0 -0
  57. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/recordmodel/record_handler.py +0 -0
  58. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/rules/__init__.py +0 -0
  59. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/rules/eln_rule_handler.py +0 -0
  60. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/rules/on_save_rule_handler.py +0 -0
  61. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/samples/aliquot.py +0 -0
  62. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/sftpconnect/__init__.py +0 -0
  63. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/sftpconnect/sftp_builder.py +0 -0
  64. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/webhook/__init__.py +0 -0
  65. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/webhook/webhook_context.py +0 -0
  66. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/webhook/webhook_handlers.py +0 -0
  67. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/src/sapiopycommons/webhook/webservice_handlers.py +0 -0
  68. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/AF-A0A009IHW8-F1-model_v4.cif +0 -0
  69. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/_do_not_add_init_py_here +0 -0
  70. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/accession_test.py +0 -0
  71. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/aliquot_test.py +0 -0
  72. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/bio_reg_test.py +0 -0
  73. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/chem_test.py +0 -0
  74. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/chem_test_curation_queue.py +0 -0
  75. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/curation_queue_test.sdf +0 -0
  76. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/data_type_models.py +0 -0
  77. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/flowcyto/101_DEN084Y5_15_E01_008_clean.fcs +0 -0
  78. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/flowcyto/101_DEN084Y5_15_E03_009_clean.fcs +0 -0
  79. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/flowcyto/101_DEN084Y5_15_E05_010_clean.fcs +0 -0
  80. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/flowcyto/8_color_ICS.wsp +0 -0
  81. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/flowcyto/COVID19_W_001_O.fcs +0 -0
  82. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/flowcyto_test.py +0 -0
  83. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/kappa.chains.fasta +0 -0
  84. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/mafft_test.py +0 -0
  85. {sapiopycommons-2025.2.20a444 → sapiopycommons-2025.2.21a447}/tests/test.gb +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: sapiopycommons
3
- Version: 2025.2.20a444
3
+ Version: 2025.2.21a447
4
4
  Summary: Official Sapio Python API Utilities Package
5
5
  Project-URL: Homepage, https://github.com/sapiosciences
6
6
  Author-email: Jonathan Steck <jsteck@sapiosciences.com>, Yechen Qiao <yqiao@sapiosciences.com>
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sapiopycommons"
7
- version='2025.02.20a444'
7
+ version='2025.02.21a447'
8
8
  authors = [
9
9
  { name="Jonathan Steck", email="jsteck@sapiosciences.com" },
10
10
  { name="Yechen Qiao", email="yqiao@sapiosciences.com" },
@@ -0,0 +1,639 @@
1
+ from __future__ import annotations
2
+
3
+ from io import StringIO
4
+ from typing import TypeAlias, Iterator
5
+
6
+ from Bio import Phylo, SeqIO
7
+ from Bio.Align import substitution_matrices, Alignment
8
+ from Bio.Align.substitution_matrices import Array
9
+ from Bio.Blast import Records, parse
10
+ from Bio.KEGG import REST
11
+ from Bio.PDB.Atom import Atom
12
+ from Bio.PDB.Chain import Chain
13
+ from Bio.PDB.MMCIF2Dict import MMCIF2Dict
14
+ from Bio.PDB.MMCIFParser import MMCIFParser
15
+ from Bio.PDB.Model import Model
16
+ from Bio.PDB.NeighborSearch import NeighborSearch
17
+ from Bio.PDB.PDBIO import PDBIO
18
+ from Bio.PDB.PDBParser import PDBParser
19
+ from Bio.PDB.Residue import Residue
20
+ from Bio.PDB.Structure import Structure
21
+ from Bio.PDB.Superimposer import Superimposer
22
+ from Bio.Phylo.BaseTree import Tree
23
+ from Bio.Phylo.TreeConstruction import DistanceTreeConstructor, DistanceMatrix
24
+ from Bio.Seq import Seq
25
+ from Bio.SeqRecord import SeqRecord
26
+ from Bio.motifs import Motif
27
+ from Bio.motifs.matrix import PositionSpecificScoringMatrix
28
+ from sapiopylib.rest.User import SapioUser
29
+
30
+ SeqAlias: TypeAlias = Seq | str
31
+ TreeAlias: TypeAlias = Tree | str
32
+
33
+
34
+ class BioPythonAliasUtil:
35
+ @staticmethod
36
+ def to_sequence(sequence: SeqAlias) -> Seq:
37
+ """
38
+ Converts the input to a Bio.Seq.Seq object if necessary
39
+
40
+ :param sequence: The sequence, either as a Bio.Seq.Seq object or as a string.
41
+ :return: The sequence as a Bio.Seq.Seq object.
42
+ """
43
+ if isinstance(sequence, str):
44
+ return Seq(sequence)
45
+ return sequence
46
+
47
+ @staticmethod
48
+ def to_tree(tree: TreeAlias) -> Tree:
49
+ """
50
+ Converts the input to a Bio.Phylo.BaseTree.Tree object if necessary.
51
+
52
+ :param tree: The tree, either as a Bio.Phylo.BaseTree.Tree object or as a Newick string.
53
+ :return: The tree as a Bio.Phylo.BaseTree.Tree object
54
+ """
55
+ if isinstance(tree, str):
56
+ with StringIO(tree) as tree_io:
57
+ return Phylo.read(tree_io, "newick")
58
+ return tree
59
+
60
+
61
+ class BioPythonHelper:
62
+ """
63
+ A class designed for simplifying and better documenting the behavior of commonly used BioPython functions.
64
+ """
65
+ user: SapioUser
66
+ exp_id: int
67
+ tab_prefix: str | None
68
+
69
+ def __init__(self, user: SapioUser, exp_id: int, tab_prefix: str | None = None):
70
+ """
71
+ :param user: The user to make requests from.
72
+ :param exp_id: The ID of the experiment that the user is in.
73
+ :param tab_prefix: The prefix of the tab for displaying results of functions in. Defaults to None.
74
+ """
75
+ self.user = user
76
+ self.exp_id = exp_id
77
+ self.tab_prefix = tab_prefix
78
+
79
+ @staticmethod
80
+ def _parse_pdb_structure(pdb_id: str, file_format: str, file_contents: str | None = None) -> Structure:
81
+ """
82
+ Helper function to parse PDB structures, handling file input and format selection.clade_
83
+
84
+ :param pdb_id: PDB ID of the structure. Used only if file_contents is None.
85
+ :param file_format: File format ("pdb", "mmcif", "mmtf", "binarycif").
86
+ :param file_contents: PDB/mmCIF file contents as a string. If provided, takes precedence over pdb_id.
87
+ """
88
+ if file_contents:
89
+ if file_format == "pdb":
90
+ parser = PDBParser()
91
+ with StringIO(file_contents) as pdb_io:
92
+ structure = parser.get_structure("input_structure", pdb_io)
93
+ elif file_format == "mmcif":
94
+ parser = MMCIFParser()
95
+ with StringIO(file_contents) as mmcif_io:
96
+ structure = parser.get_structure("input_structure", mmcif_io)
97
+ else:
98
+ raise ValueError("Invalid file format when providing file_contents")
99
+ return structure
100
+
101
+ from Bio.PDB import PDBList
102
+ pdbl = PDBList()
103
+ pdb_id = pdb_id.strip().upper()
104
+ if '|' in pdb_id:
105
+ pdb_id = pdb_id.split('|')[0]
106
+
107
+ if file_format == "pdb":
108
+ parser = PDBParser()
109
+ file_path = pdbl.retrieve_pdb_file(pdb_id, file_format="pdb", overwrite=True)
110
+ return parser.get_structure(pdb_id, file_path)
111
+ elif file_format == "mmcif":
112
+ parser = MMCIFParser()
113
+ file_path = pdbl.retrieve_pdb_file(pdb_id, file_format="mmcif", overwrite=True)
114
+ return parser.get_structure(pdb_id, file_path)
115
+ elif file_format == "mmtf":
116
+ from Bio.PDB.mmtf import MMTFParser
117
+ parser = MMTFParser()
118
+ return parser.get_structure_from_url(pdb_id)
119
+ elif file_format == "binarycif":
120
+ from Bio.PDB.binary_cif import BinaryCIFParser
121
+ parser = BinaryCIFParser()
122
+ file_path = pdbl.retrieve_pdb_file(pdb_id, file_format="bcif", overwrite=True) # Corrected file_format
123
+ return parser.get_structure(pdb_id, file_path)
124
+ else:
125
+ raise ValueError("Invalid file format.")
126
+
127
+ @staticmethod
128
+ def load_matrix(matrix_name: str) -> Array:
129
+ """
130
+ Loads a substitution matrix from the Bio.Align.substitution_matrices module.
131
+
132
+ :param matrix_name: The name of the matrix to load (e.g., "BLOSUM62").
133
+ :return: An Array object representing the substitution matrix.
134
+ """
135
+ return substitution_matrices.load(matrix_name)
136
+
137
+ @staticmethod
138
+ def blast_run(blast_output: str) -> Records:
139
+ """
140
+ Parses BLAST output (in plain text format) and returns a Bio.Blast.Records object.
141
+
142
+ :param blast_output: BLAST output in plain text format, as a string.
143
+ :return: A Bio.Blast.Records iterator, yielding Bio.Blast.Record objects.
144
+ """
145
+ with StringIO(blast_output) as blast_io:
146
+ blast_records: Records = parse(blast_io)
147
+ return blast_records
148
+
149
+ @staticmethod
150
+ def kegg_get(argument: str | list[str]) -> str:
151
+ """
152
+ Retrieves KEGG entries in flat text format using KEGG REST API.
153
+
154
+ :param argument: KEGG database entry identifier(s) or command arguments (e.g., "eco:b0002", ["eco:b0002", "eco:b0003"]).
155
+ :return: A string containing the raw text data from KEGG.
156
+ """
157
+ with REST.kegg_get(argument) as handle:
158
+ data: str = handle.read()
159
+ return data
160
+
161
+ @staticmethod
162
+ def kegg_list(database: str, arguments: str | None = None) -> str:
163
+ """
164
+ Retrieves a list of entries from a KEGG database using KEGG REST API.
165
+
166
+ :param database: KEGG database name (e.g., "pathway", "enzyme", "compound").
167
+ :param arguments: Optional additional arguments for the list command (e.g., "hsa" for human pathways).
168
+ Defaults to None.
169
+ :return: Raw text list of entries from KEGG, as a single string.
170
+ """
171
+ with REST.kegg_list(database, arguments) as handle:
172
+ data: str = handle.read()
173
+ return data
174
+
175
+ @staticmethod
176
+ def kegg_find(database: str, query: str, arguments: str | None = None) -> str:
177
+ """
178
+ Finds entries in a KEGG database based on a text query using KEGG REST API.
179
+
180
+ :param database: KEGG database name.
181
+ :param query: Search term or query.
182
+ :param arguments: Optional additional arguments for the find command. Defaults to None.
183
+ :return: Raw text list of entries from KEGG matching the query.
184
+ """
185
+ with REST.kegg_find(database, query, arguments) as handle:
186
+ data: str = handle.read()
187
+ return data
188
+
189
+ @staticmethod
190
+ def kegg_conv(database1: str, database2: str) -> str:
191
+ """
192
+ Converts identifiers between two KEGG databases using KEGG REST API.
193
+
194
+ :param database1: Source KEGG database name or identifier list.
195
+ :param database2: Target KEGG database name.
196
+ :return: Conversion table in raw text format from KEGG.
197
+ """
198
+ with REST.kegg_conv(database1, database2) as handle:
199
+ data: str = handle.read()
200
+ return data
201
+
202
+ @staticmethod
203
+ def pdb_parse(pdb_id: str, file_format: str = "pdb", file_contents: str | None = None) -> Structure:
204
+ """
205
+ Parses a PDB, mmCIF, MMTF, or BinaryCIF file and returns a Bio.PDB.Structure object.
206
+
207
+ :param pdb_id: PDB ID of the structure. Used only if file_contents is None.
208
+ :param file_format: File format ("pdb", "mmcif", "mmtf", or "binarycif"). Defaults to "pdb".
209
+ :param file_contents: String containing PDB/mmCIF file contents. If provided, takes precedence over pdb_id.
210
+ Defaults to None.
211
+ :return: A Bio.PDB.Structure object representing the parsed structure.
212
+ :raises ValueError: if an invalid file_format is provided.
213
+ """
214
+ return BioPythonHelper._parse_pdb_structure(pdb_id, file_format, file_contents)
215
+
216
+ @staticmethod
217
+ def structure_to_pdb_str(structure: Structure, output_format: str = "pdb") -> str:
218
+ """
219
+ Converts a Bio.PDB.Structure object to a PDB-formatted string.
220
+
221
+ :param structure: The Bio.PDB.Structure object to convert.
222
+ :param output_format: The desired output format ("pdb" or "mmcif"). Defaults to "pdb".
223
+ :return: A PDB-formatted string.
224
+ :raises ValueError: if an invalid file_format is provided.
225
+ """
226
+ io = PDBIO()
227
+ io.set_structure(structure)
228
+ with StringIO() as out_str:
229
+ if output_format == "pdb":
230
+ io.save(out_str)
231
+ elif output_format == "mmcif":
232
+ # For outputting a string, create a stringIO object
233
+ io = PDBIO(is_pqr=True)
234
+ io.set_structure(structure)
235
+ io.save(out_str)
236
+ else:
237
+ raise ValueError("Invalid output format.")
238
+ pdb_string = out_str.read()
239
+ return pdb_string
240
+
241
+ @staticmethod
242
+ def mmcif_parse(pdb_id: str, file_contents: str | None = None) -> dict[str, list[str]]:
243
+ """
244
+ Parses an mmCIF file and returns a dictionary representation.
245
+
246
+ :param pdb_id: PDB ID of the structure (used only if file_contents is None).
247
+ :param file_contents: mmCIF file contents as a string. If provided, takes precedence over pdb_id.
248
+ Defaults to None.
249
+ :return: A dictionary mapping mmCIF tags to lists of their values.
250
+ """
251
+ if file_contents:
252
+ with StringIO(file_contents) as mmcif_io:
253
+ return MMCIF2Dict(mmcif_io)
254
+
255
+ # Download and parse using MMCIF2Dict directly
256
+ from Bio.PDB import PDBList
257
+ pdbl = PDBList()
258
+ file_path = pdbl.retrieve_pdb_file(pdb_id, file_format="mmcif", overwrite=True)
259
+
260
+ return MMCIF2Dict(file_path)
261
+
262
+ @staticmethod
263
+ def atom_neighbor_search(pdb_id: str, file_format: str, file_contents: str | None = None,
264
+ center: tuple[float, float, float] = (0.0, 0.0, 0.0),
265
+ radius: float = 1.0) -> list[Atom]:
266
+ """
267
+ Finds atom neighbors within a specified radius of a center point in a PDB structure.
268
+
269
+ :param pdb_id: PDB ID of the structure. Used only if file_contents is None.
270
+ :param file_format: File format ("pdb" or "mmcif").
271
+ :param file_contents: PDB file contents as a string. If provided, takes precedence over pdb_id.
272
+ Defaults to None.
273
+ :param center: Coordinates of the center point (x, y, z) as a tuple. Defaults to (0.0, 0.0, 0.0).
274
+ :param radius: Search radius in Angstroms. Defaults to 1.0.
275
+ :return: A list of Bio.PDB.Atom objects within the radius.
276
+ :raises ValueError: if an invalid file_format is provided.
277
+ """
278
+ structure = BioPythonHelper._parse_pdb_structure(pdb_id, file_format, file_contents)
279
+ atom_list = list(structure.get_atoms())
280
+ ns = NeighborSearch(atom_list)
281
+ neighbors = ns.search(center, radius, level='A')
282
+ return neighbors
283
+
284
+ @staticmethod
285
+ def residue_neighbor_search(pdb_id: str, file_format: str, file_contents: str | None = None,
286
+ center: tuple[float, float, float] = (0.0, 0.0, 0.0),
287
+ radius: float = 1.0) -> list[Residue]:
288
+ """
289
+ Finds residue neighbors within a specified radius of a center point in a PDB structure.
290
+
291
+ :param pdb_id: PDB ID of the structure. Used only if file_contents is None.
292
+ :param file_format: File format ("pdb" or "mmcif").
293
+ :param file_contents: PDB file contents as a string. If provided, takes precedence over pdb_id. Defaults to None.
294
+ :param center: Coordinates of the center point (x, y, z) as a tuple. Defaults to (0.0, 0.0, 0.0).
295
+ :param radius: Search radius in Angstroms. Defaults to 1.0.
296
+ :return: A list of Bio.PDB.Residue objects within the radius.
297
+ :raises ValueError: if an invalid file_format is provided.
298
+ """
299
+ structure = BioPythonHelper._parse_pdb_structure(pdb_id, file_format, file_contents)
300
+ atom_list = list(structure.get_atoms())
301
+ ns = NeighborSearch(atom_list)
302
+ neighbors = ns.search(center, radius, level='R')
303
+ return neighbors
304
+
305
+ @staticmethod
306
+ def chain_neighbor_search(pdb_id: str, file_format: str, file_contents: str | None = None,
307
+ center: tuple[float, float, float] = (0.0, 0.0, 0.0),
308
+ radius: float = 1.0) -> list[Chain]:
309
+ """
310
+ Finds chain neighbors within a specified radius of a center point in a PDB structure.
311
+
312
+ :param pdb_id: PDB ID of the structure. Used only if file_contents is None.
313
+ :param file_format: File format ("pdb" or "mmcif").
314
+ :param file_contents: PDB file contents as a string. If provided, takes precedence over pdb_id. Defaults to None.
315
+ :param center: Coordinates of the center point (x, y, z) as a tuple. Defaults to (0.0, 0.0, 0.0).
316
+ :param radius: Search radius in Angstroms. Defaults to 1.0.
317
+ :return: A list of Bio.PDB.Chain objects within the radius.
318
+ :raises ValueError: if an invalid file_format is provided.
319
+ """
320
+ structure = BioPythonHelper._parse_pdb_structure(pdb_id, file_format, file_contents)
321
+ atom_list = list(structure.get_atoms())
322
+ ns = NeighborSearch(atom_list)
323
+ neighbors = ns.search(center, radius, level='C')
324
+ return neighbors
325
+
326
+ @staticmethod
327
+ def model_neighbor_search(pdb_id: str, file_format: str, file_contents: str | None = None,
328
+ center: tuple[float, float, float] = (0.0, 0.0, 0.0),
329
+ radius: float = 1.0) -> list[Model]:
330
+ """
331
+ Finds model neighbors within a specified radius of a center point in a PDB structure.
332
+
333
+ :param pdb_id: PDB ID of the structure. Used only if file_contents is None.
334
+ :param file_format: File format ("pdb" or "mmcif").
335
+ :param file_contents: PDB file contents as a string. If provided, takes precedence over pdb_id. Defaults to None.
336
+ :param center: Coordinates of the center point (x, y, z) as a tuple. Defaults to (0.0, 0.0, 0.0).
337
+ :param radius: Search radius in Angstroms. Defaults to 1.0.
338
+ :return: A list of Bio.PDB.Model objects within the radius.
339
+ :raises ValueError: if an invalid file_format is provided.
340
+ """
341
+ structure = BioPythonHelper._parse_pdb_structure(pdb_id, file_format, file_contents)
342
+ atom_list = list(structure.get_atoms())
343
+ ns = NeighborSearch(atom_list)
344
+ neighbors = ns.search(center, radius, level='M')
345
+ return neighbors
346
+
347
+ @staticmethod
348
+ def superimpose(fixed_pdb_id: str, moving_pdb_id: str, fixed_file_format: str, moving_file_format: str,
349
+ fixed_file_contents: str | None = None, moving_file_contents: str | None = None) \
350
+ -> tuple[Superimposer, Structure]:
351
+ """
352
+ Superimposes two PDB structures and returns the Superimposer object and transformed moving PDB string.
353
+
354
+ :param fixed_pdb_id: PDB ID of the fixed structure. Used only if fixed_file_contents is None.
355
+ :param moving_pdb_id: PDB ID of the moving structure. Used only if moving_file_contents is None.
356
+ :param fixed_file_format: File format of the fixed structure ("pdb" or "mmcif").
357
+ :param moving_file_format: File format of the moving structure ("pdb" or "mmcif").
358
+ :param fixed_file_contents: Fixed PDB/mmCIF file contents. If provided, takes precedence over fixed_pdb_id.
359
+ Defaults to None.
360
+ :param moving_file_contents: Moving PDB/mmCIF file contents. If provided, takes precedence over moving_pdb_id.
361
+ Defaults to None.
362
+ :return: A tuple containing:
363
+ - The Bio.PDB.Superimposer object, which contains rotation/translation information.
364
+ - The Bio.PDB.Structure object of the moving structure after transformation.
365
+ :raises ValueError: if the fixed and moving structures have different numbers of atoms.
366
+ :raises ValueError: if an invalid file_format is provided.
367
+ """
368
+ fixed_structure = BioPythonHelper._parse_pdb_structure(fixed_pdb_id, fixed_file_format, fixed_file_contents)
369
+ moving_structure = BioPythonHelper._parse_pdb_structure(moving_pdb_id, moving_file_format, moving_file_contents)
370
+
371
+ super_imposer = Superimposer()
372
+ fixed_atoms = list(fixed_structure.get_atoms())
373
+ moving_atoms = list(moving_structure.get_atoms())
374
+
375
+ if len(fixed_atoms) != len(moving_atoms):
376
+ raise ValueError("Fixed and moving structures must have the same number of atoms.")
377
+
378
+ super_imposer.set_atoms(fixed_atoms, moving_atoms)
379
+ super_imposer.apply(moving_atoms)
380
+
381
+ return super_imposer, moving_structure
382
+
383
+ @staticmethod
384
+ def distance_tree(sequences: dict[str, str], method: str = "nj", distance_model: str = "blosum62") -> Tree:
385
+ """
386
+ Constructs a UPGMA or Neighbor Joining tree from a set of sequences.
387
+
388
+ :param sequences: Dictionary of sequences, where keys are sequence IDs and values are sequences (strings).
389
+ :param method: Tree construction method ('upgma' or 'nj'). Defaults to 'nj'
390
+ :param distance_model: The distance model to use for the distance matrix. Defaults to 'blosum62'
391
+ :return: A Bio.Phylo.BaseTree.Tree object representing the constructed tree.
392
+ """
393
+ # Create SeqRecord objects
394
+ seq_records = [SeqRecord(Seq(seq), id=seq_id) for seq_id, seq in sequences.items()]
395
+
396
+ # Calculate Distance Matrix
397
+ from Bio.Phylo.TreeConstruction import DistanceCalculator
398
+ calculator = DistanceCalculator(distance_model) # distance model such as 'blosum62'
399
+ dm: DistanceMatrix = calculator.get_distance(seq_records)
400
+
401
+ # Construct Tree
402
+ constructor = DistanceTreeConstructor()
403
+ if method == "upgma":
404
+ tree: Tree = constructor.upgma(dm)
405
+ elif method == "nj":
406
+ tree: Tree = constructor.nj(dm)
407
+ else:
408
+ raise ValueError("Invalid tree construction method. Choose 'upgma' or 'nj'.")
409
+ return tree
410
+
411
+ @staticmethod
412
+ def newick_to_tree(newick_string: str) -> Tree:
413
+ """
414
+ Converts a newick string to a tree object.
415
+
416
+ :param newick_string: The newick string to be converted.
417
+ :return: The tree object.
418
+ """
419
+ with StringIO(newick_string) as tree_io:
420
+ tree: Tree = Phylo.read(tree_io, "newick")
421
+ return tree
422
+
423
+ @staticmethod
424
+ def tree_to_newick(tree: Tree) -> str:
425
+ """
426
+ Converts a tree object to a newick string.
427
+
428
+ :param tree: The tree to be converted, in the Bio.Phylo.BaseTree.Tree format.
429
+ :return: The newick string representing the tree.
430
+ """
431
+ with StringIO() as tree_io:
432
+ Phylo.write(tree, tree_io, "newick")
433
+ return tree_io.read()
434
+
435
+ @staticmethod
436
+ def clade_get_terminals(tree: TreeAlias) -> list[str]:
437
+ """
438
+ Gets the terminal nodes of a phylogenetic tree.
439
+
440
+ :param tree: Tree object or Newick formatted tree string.
441
+ :return: List of terminal clade names (strings).
442
+ """
443
+ tree = BioPythonAliasUtil.to_tree(tree)
444
+ return [clade.name for clade in tree.get_terminals()]
445
+
446
+ @staticmethod
447
+ def clade_get_nonterminals(tree: TreeAlias) -> list[str]:
448
+ """
449
+ Gets the non-terminal nodes of a phylogenetic tree.
450
+
451
+ :param tree: Tree object or Newick formatted tree string.
452
+ :return: List of non-terminal clade names (strings).
453
+ """
454
+ tree = BioPythonAliasUtil.to_tree(tree)
455
+ return [clade.name for clade in tree.get_nonterminals()]
456
+
457
+ @staticmethod
458
+ def clade_common_ancestor_by_targets(tree: TreeAlias, target1: str, target2: str) -> str:
459
+ """
460
+ Finds the common ancestor of two target clades in a tree.
461
+ :param tree: Tree object or Newick formatted tree string.
462
+ :param target1: Target clade name.
463
+ :param target2: Second target clade name.
464
+ :return: Name of the common ancestor clade (string)
465
+ """
466
+ tree = BioPythonAliasUtil.to_tree(tree)
467
+ ancestor = tree.common_ancestor(target1, target2)
468
+ return ancestor.name if ancestor else "Unnamed"
469
+
470
+ @staticmethod
471
+ def clade_common_ancestor_by_taxa(tree: TreeAlias, taxa: list[str]) -> str:
472
+ """
473
+ Finds the common ancestor of a list of taxa
474
+ :param tree: Tree object or Newick formatted tree string.
475
+ :param taxa: List of taxa.
476
+ :return: Name of the common ancestor clade (string)
477
+ """
478
+ tree = BioPythonAliasUtil.to_tree(tree)
479
+ ancestor = tree.common_ancestor(*taxa)
480
+ return ancestor.name if ancestor else "Unnamed"
481
+
482
+ @staticmethod
483
+ def clade_distance(tree: TreeAlias, target1: str, target2: str) -> float:
484
+ """
485
+ Calculates the distance between two clades in a phylogenetic tree.
486
+
487
+ :param tree: Tree object or Newick formatted tree string.
488
+ :param target1: Target clade name.
489
+ :param target2: Second target clade name.
490
+ :return: Distance between two clades (float).
491
+ :raises ValueError: If targets are not provided
492
+ """
493
+ tree = BioPythonAliasUtil.to_tree(tree)
494
+ if not (target1 and target2):
495
+ raise ValueError("Must Provide Two Targets")
496
+ return tree.distance(target1, target2)
497
+
498
+ @staticmethod
499
+ def clade_total_branch_length(tree: TreeAlias) -> float:
500
+ """
501
+ Calculates the total branch length of a phylogenetic tree.
502
+
503
+ :param tree: Tree object or Newick formatted tree string.
504
+ :return: Total branch length of the tree (float).
505
+ """
506
+ tree = BioPythonAliasUtil.to_tree(tree)
507
+ return tree.total_branch_length()
508
+
509
+ @staticmethod
510
+ def clade_depths(tree: TreeAlias, unit_branch_lengths: bool = False) -> dict[str, float]:
511
+ """
512
+ Calculates the depths of clades in a phylogenetic tree.
513
+
514
+ :param tree: Tree object or Newick formatted tree string.
515
+ :param unit_branch_lengths: If True, calculate depths using unit branch lengths. Defaults to False.
516
+ :return: Dictionary mapping clade names to depths (float).
517
+ """
518
+ tree = BioPythonAliasUtil.to_tree(tree)
519
+ depths_dict = tree.depths(unit_branch_lengths=unit_branch_lengths)
520
+ return {(clade.name if clade.name else str(clade)): depth for clade, depth in depths_dict.items()}
521
+
522
+ @staticmethod
523
+ def motif_analysis(sequences: list[SeqAlias], alphabet: str = "ACGT") -> Motif:
524
+ """
525
+ Run a sequence motif analysis on the given sequences.
526
+
527
+ :param sequences: A list of DNA sequences, either in the form of strings or of Bio.Seq.Seq objects.
528
+ :param alphabet: The alphabet used in the DNA sequences. Defaults to ACGT.
529
+ :return: The sequence motif Bio.motifs.Motif object analysing the given sequence.
530
+ """
531
+ alignment = Alignment([BioPythonAliasUtil.to_sequence(seq) for seq in sequences])
532
+ return Motif(alphabet=alphabet, alignment=alignment)
533
+
534
+ @staticmethod
535
+ def pssm_search(pssm: PositionSpecificScoringMatrix, sequence: SeqAlias,
536
+ threshold: float = 0.0, both_strands: bool = True) -> list[tuple[int, float]]:
537
+ """
538
+ :param pssm: The position specific scoring matrix to run the search on.
539
+ :param sequence: The sequence to search for, either as a string or already wrapped as a Bio.Seq object.
540
+ :param threshold: The threshold above which the Position Weight Matrix score must be for a hit to be returned
541
+ as a match. Defaults to 0.0.
542
+ :param both_strands: Whether both sides of the DNA sequence should be searched for hits. Defaults to True.
543
+ :return: A list of tuples for each hit in the sequence. The tuple is a pair of integers, the first being the
544
+ position of the hit and the second being the score of the hit. Negative positions correspond to positions
545
+ on the other side of the strand of DNA.
546
+ """
547
+ sequence = BioPythonAliasUtil.to_sequence(sequence)
548
+ matches: list[tuple[int, float]] = list(pssm.search(sequence, threshold=threshold, both=both_strands))
549
+ return matches
550
+
551
+ @staticmethod
552
+ def read_sequence(file_path: str, seq_format: str) -> SeqRecord:
553
+ """
554
+ Reads a single sequence record from a file using Bio.SeqIO.read.
555
+
556
+ :param file_path: Path to the sequence file.
557
+ :param seq_format: Format of the sequence file (e.g., "fasta", "genbank").
558
+ :return: A single SeqRecord object.
559
+ :raises: ValueError if the file contains more than one record
560
+ """
561
+ return SeqIO.read(file_path, seq_format)
562
+
563
+ @staticmethod
564
+ def parse_sequences(file_path: str, seq_format: str) -> Iterator[SeqRecord]:
565
+ """
566
+ Parses multiple sequence records from a file using Bio.SeqIO.parse
567
+
568
+ :param file_path: Path to the sequence file.
569
+ :param seq_format: Format of the sequence file (e.g., "fasta", "genbank").
570
+ :return: An iterator yielding SeqRecord objects.
571
+ """
572
+ return SeqIO.parse(file_path, seq_format)
573
+
574
+ @staticmethod
575
+ def write_sequences(sequences: list[SeqRecord], file_path: str, seq_format: str) -> int:
576
+ """
577
+ Writes a list of SeqRecord objects to a file using Bio.SeqIO.write.
578
+
579
+ :param sequences: List of SeqRecord objects to write.
580
+ :param file_path: Output file path.
581
+ :param seq_format: Output sequence format (e.g., "fasta", "genbank").
582
+ :return: The number of records written.
583
+ """
584
+ return SeqIO.write(sequences, file_path, seq_format)
585
+
586
+ @staticmethod
587
+ def convert_sequence_format(input_file: str, input_format: str, output_file: str, output_format: str) -> int:
588
+ """
589
+ Converts a sequence file from one format to another using Bio.SeqIO.convert.
590
+
591
+ :param input_file: Path to the input sequence file.
592
+ :param input_format: Format of the input file (e.g., "genbank").
593
+ :param output_file: Path to the output sequence file.
594
+ :param output_format: Desired format of the output file (e.g., "fasta").
595
+ :return: The number of records converted.
596
+ """
597
+ return SeqIO.convert(input_file, input_format, output_file, output_format)
598
+
599
+ @staticmethod
600
+ def reverse_complement(sequence: SeqAlias) -> Seq:
601
+ """
602
+ Calculates the reverse complement of a DNA sequence.
603
+
604
+ :param sequence: The DNA sequence (string or Seq object).
605
+ :return: The reverse complement as a Seq object.
606
+ """
607
+ return BioPythonAliasUtil.to_sequence(sequence).reverse_complement()
608
+
609
+ @staticmethod
610
+ def transcribe(dna_sequence: SeqAlias) -> Seq:
611
+ """
612
+ Transcribes a DNA sequence to RNA.
613
+
614
+ :param dna_sequence: The DNA sequence (string or Seq object).
615
+ :return: The transcribed RNA sequence as a Seq object.
616
+ """
617
+ return BioPythonAliasUtil.to_sequence(dna_sequence).transcribe()
618
+
619
+ @staticmethod
620
+ def back_transcribe(rna_sequence: SeqAlias) -> Seq:
621
+ """
622
+ Back-transcribes an RNA sequence to DNA.
623
+
624
+ :param rna_sequence: The RNA sequence (string or Seq object).
625
+ :return: The back-transcribed DNA sequence as a Seq object.
626
+ """
627
+ return BioPythonAliasUtil.to_sequence(rna_sequence).back_transcribe()
628
+
629
+ @staticmethod
630
+ def translate(sequence: SeqAlias, table: str | int = "Standard", to_stop: bool = False) -> Seq:
631
+ """
632
+ Translates a nucleotide sequence to a protein sequence.
633
+
634
+ :param sequence: The nucleotide sequence (string or Seq object).
635
+ :param table: The genetic code table to use (string or integer). Defaults to "Standard".
636
+ :param to_stop: If True, translation stops at the first in-frame stop codon. Defaults to False.
637
+ :return: The translated protein sequence as a Seq object.
638
+ """
639
+ return BioPythonAliasUtil.to_sequence(sequence).translate(table=table, to_stop=to_stop)
@@ -0,0 +1,82 @@
1
+ from typing import Any
2
+
3
+ from rdkit import Chem
4
+ from rdkit.Chem import QED, Mol
5
+ from rdkit.Chem.Crippen import MolLogP
6
+ from rdkit.Chem.Descriptors import MolWt
7
+ from rdkit.Chem.Lipinski import NumHDonors, NumHAcceptors, NumRotatableBonds
8
+ from sapiopylib.rest.User import SapioUser
9
+
10
+
11
+ class RdKitHelper:
12
+ """
13
+ A class designed for simplifying and better documenting the behavior of commonly used RDKit functions.
14
+ """
15
+ user: SapioUser
16
+ exp_id: int
17
+ tab_prefix: str
18
+
19
+ def __init__(self, user: SapioUser, exp_id: int, tab_prefix: str | None = None):
20
+ """
21
+ :param user: The user to make requests from.
22
+ :param exp_id: The ID of the experiment that the user is in.
23
+ :param tab_prefix: The prefix of the tab for displaying results of functions in.
24
+ """
25
+ self.user = user
26
+ self.exp_id = exp_id
27
+ self.tab_prefix = tab_prefix
28
+
29
+ @staticmethod
30
+ def filter_drug_like_compounds(compounds: list[dict[str, Any]]) -> list[dict[str, Any]]:
31
+ """
32
+ Filter the compounds based on Lipinski's Rule of Five and QED score to prioritize drug-like molecules.
33
+
34
+ :param compounds: A list of dictionaries, where each dictionary represents a compound with the following
35
+ expected fields:
36
+ - "smiles" (str): SMILES representation of the compound.
37
+ - "record_id" (Any): Unique identifier for the compound.
38
+ - "name" (str): Name of the compound.
39
+ :return: A list of dictionaries representing drug-like compounds with the following fields:
40
+ - "smiles" (str): SMILES representation of the compound.
41
+ - "record_id" (Any): Unique identifier for the compound.
42
+ - "name" (str): Name of the compound.
43
+ - "mw" (float): Molecular weight of the compound.
44
+ - "logp" (float): LogP (lipophilicity) value.
45
+ - "hbd" (int): Number of hydrogen bond donors.
46
+ - "hba" (int): Number of hydrogen bond acceptors.
47
+ - "num_rotatable_bonds" (int): Number of rotatable bonds.
48
+ - "qed_score" (float): QED (Quantitative Estimation of Drug-likeness) score.
49
+ """
50
+ drug_like_compounds: list[dict[str, Any]] = []
51
+
52
+ for compound in compounds:
53
+ smiles: str = compound.get("smiles", "")
54
+ try:
55
+ mol: Mol = Chem.MolFromSmiles(smiles)
56
+ if mol is not None:
57
+ Chem.SanitizeMol(mol)
58
+ QED.properties(mol)
59
+
60
+ mw = MolWt(mol)
61
+ logp = MolLogP(mol)
62
+ hbd = NumHDonors(mol)
63
+ hba = NumHAcceptors(mol)
64
+ num_rotatable_bonds = NumRotatableBonds(mol)
65
+ qed_score = QED.qed(mol)
66
+
67
+ if mw <= 500 and logp <= 5 and hbd <= 5 and hba <= 10 and qed_score >= 0.5:
68
+ drug_like_compounds.append({
69
+ "smiles": smiles,
70
+ "record_id": compound["record_id"],
71
+ "name": compound["name"],
72
+ "mw": mw,
73
+ "logp": logp,
74
+ "hbd": hbd,
75
+ "hba": hba,
76
+ "num_rotatable_bonds": num_rotatable_bonds,
77
+ "qed_score": qed_score
78
+ })
79
+ except Exception as e:
80
+ print(f"Error processing SMILES: {smiles} - {e}")
81
+
82
+ return drug_like_compounds
@@ -190,19 +190,22 @@ class AiHelper:
190
190
  # Contextual info.
191
191
  user: SapioUser
192
192
  exp_id: int
193
+ timeout: int
193
194
 
194
195
  # Managers.
195
196
  dr_man: DataRecordManager
196
197
  eln_man: ElnManager
197
198
  dt_man: DataTypeManager
198
199
 
199
- def __init__(self, user: SapioUser, exp_id: int):
200
+ def __init__(self, user: SapioUser, exp_id: int, timeout: int = 120):
200
201
  """
201
202
  :param user: The user to send the requests from.
202
203
  :param exp_id: The ID of the experiment to create the entries in.
204
+ :param timeout: The timeout in seconds to use for requests.
203
205
  """
204
206
  self.user = user
205
207
  self.exp_id = exp_id
208
+ self.timeout = timeout
206
209
 
207
210
  self.dr_man = DataRecordManager(self.user)
208
211
  self.eln_man = ElnManager(self.user)
@@ -218,7 +221,7 @@ class AiHelper:
218
221
  :return: The Response object returned by the endpoint.
219
222
  """
220
223
  headers = create_tot_headers(self.user.url, self.user.username, self.user.password, self.exp_id, tab_prefix)
221
- response = requests.post(url, json=payload, headers=headers)
224
+ response = requests.post(url, json=payload, headers=headers, timeout=self.timeout)
222
225
  response.raise_for_status()
223
226
  return response
224
227
 
@@ -232,7 +235,7 @@ class AiHelper:
232
235
  :return: The Response object returned by the endpoint.
233
236
  """
234
237
  headers = create_tot_headers(self.user.url, self.user.username, self.user.password, self.exp_id, tab_prefix)
235
- response = requests.get(url, params=params, headers=headers)
238
+ response = requests.get(url, params=params, headers=headers, timeout=self.timeout)
236
239
  response.raise_for_status()
237
240
  return response
238
241
 
@@ -322,42 +325,87 @@ class AiHelper:
322
325
  if not json_list:
323
326
  return None
324
327
 
328
+ def init_string_field(k: str, v: Any, n: str) -> VeloxStringFieldDefinition:
329
+ """
330
+ Initialize a string field.
331
+
332
+ :param k: The JSON key that the field value is being pulled from. Doubles as the display name.
333
+ :param v: A particular value of the field.
334
+ :param n: The unique name of the field.
335
+ """
336
+ link_out: dict[str, str] = {}
337
+ if v.startswith("https://") or v.startswith("http://"):
338
+ link_out["Link"] = "[[LINK_OUT]]"
339
+ return fb.string_field(n, display_name=k, link_out=link_out)
340
+
341
+ def update_field_length(k: str, v: Any, lengths: dict[str, int]) -> None:
342
+ """
343
+ Update the max length of a string field.
344
+
345
+ :param k: The JSON key that the field value is being pulled from.
346
+ :param v: The field value.
347
+ :param lengths: The dictionary of field lengths.
348
+ """
349
+ lengths[k] = max(lengths.get(k, 100), len(str(v)) if v is not None else 0)
350
+
325
351
  # Determine which fields in the JSON can be used to create field definitions.
326
352
  fb = FieldBuilder()
327
- fields: list[AbstractVeloxFieldDefinition] = []
328
- fields_by_json_key: dict[str, AbstractVeloxFieldDefinition] = {}
329
- string_field_lengths: dict[str, int] = {}
330
- for key, value in json_list[0].items():
331
- # The field name is the JSON key name, but with spaces and dashes replaced by underscores and with a leading
332
- # underscore added if the field name starts with a number.
333
- field_name: str = key.strip()
334
- if " " in field_name:
335
- field_name = field_name.replace(" ", "_")
336
- if "-" in field_name:
337
- field_name = field_name.replace("-", "_")
338
- if field_name[0].isnumeric():
339
- field_name = "_" + field_name
340
-
341
- if isinstance(value, str):
342
- field = fb.string_field(field_name, display_name=key)
343
- fields.append(field)
344
- fields_by_json_key[key] = field
345
- string_field_lengths[key] = 100
346
- elif isinstance(value, (int, float)):
347
- field = fb.double_field(field_name, display_name=key, precision=3)
348
- fields.append(field)
349
- fields_by_json_key[key] = field
350
-
351
- # Determine the max length of each string field.
353
+ json_key_to_field_def: dict[str, AbstractVeloxFieldDefinition] = {}
354
+ json_key_to_field_name: dict[str, str] = {}
355
+ json_key_to_string_length: dict[str, int] = {}
356
+ numeric_string_fields: set[str] = set()
352
357
  for values in json_list:
353
- for key in string_field_lengths:
354
- length: int = len(values.get(key)) if values.get(key) else 0
355
- string_field_lengths[key] = max(string_field_lengths[key], length)
358
+ for key, value in values.items():
359
+ # The field name is the JSON key name, but with spaces and dashes replaced by underscores and with a leading
360
+ # underscore added if the field name starts with a number.
361
+ if key not in json_key_to_field_name:
362
+ field_name: str = key.strip()
363
+ if " " in field_name:
364
+ field_name = field_name.replace(" ", "_")
365
+ if "-" in field_name:
366
+ field_name = field_name.replace("-", "_")
367
+ if field_name[0].isnumeric():
368
+ field_name = "_" + field_name
369
+ json_key_to_field_name[key] = field_name
370
+ else:
371
+ field_name = json_key_to_field_name[key]
372
+
373
+ # If this is the first time this key is being encountered, create a field for it.
374
+ if key not in json_key_to_field_def:
375
+ if isinstance(value, str):
376
+ json_key_to_field_def[key] = init_string_field(key, value, field_name)
377
+ update_field_length(key, value, json_key_to_string_length)
378
+ elif isinstance(value, bool):
379
+ json_key_to_field_def[key] = fb.boolean_field(field_name, display_name=key)
380
+ elif isinstance(value, (int, float)):
381
+ json_key_to_field_def[key] = fb.double_field(field_name, display_name=key, precision=3)
382
+ # All other values in the JSON get skipped.
383
+ continue
384
+
385
+ # The field definition already exists, but it may not be a valid field type for this value.
386
+ field_type: FieldType = json_key_to_field_def[key].data_field_type
387
+ # Strings can be anything, so we don't need to check the value type.
388
+ if field_type == FieldType.STRING:
389
+ # We still need to make sure the lengths are fine.
390
+ update_field_length(key, value, json_key_to_string_length)
391
+ continue
392
+ # Boolean values can only be booleans.
393
+ if field_type == FieldType.BOOLEAN and isinstance(value, bool):
394
+ continue
395
+ # Integers and floats both fit in DOUBLE fields, but floats can't be NaN or infinity.
396
+ if field_type == FieldType.DOUBLE and not isinstance(value, bool):
397
+ if isinstance(value, int):
398
+ continue
399
+ if isinstance(value, float) and not math.isnan(value) and not math.isinf(value):
400
+ continue
401
+ numeric_string_fields.add(key)
402
+ json_key_to_field_def[key] = init_string_field(key, value, field_name)
403
+ update_field_length(key, value, json_key_to_string_length)
356
404
 
357
405
  # Update the max length of each string field.
358
- for key in string_field_lengths:
359
- field = cast(VeloxStringFieldDefinition, fields_by_json_key[key])
360
- field.max_length = string_field_lengths[key]
406
+ for key, value in json_key_to_string_length.items():
407
+ field = cast(VeloxStringFieldDefinition, json_key_to_field_def[key])
408
+ field.max_length = value
361
409
 
362
410
  # Sort the JSON list if requested.
363
411
  if sort_field and sort_direction != SortDirection.NONE:
@@ -377,12 +425,10 @@ class AiHelper:
377
425
  field_maps: list[dict[str, Any]] = []
378
426
  for json_dict in json_list:
379
427
  field_map: dict[str, Any] = {}
380
- for key, field in fields_by_json_key.items():
381
- # Watch out for NaN values or other special values in numeric columns.
428
+ for key, field in json_key_to_field_def.items():
382
429
  val: Any = json_dict.get(key)
383
- if (field.data_field_type == FieldType.DOUBLE
384
- and (not isinstance(val, (int, float))) or (isinstance(val, float) and math.isnan(val))):
385
- val = None
430
+ if key in numeric_string_fields and val is not None and not isinstance(val, str):
431
+ val: str = f"{val:.3f}"
386
432
  field_map[field.data_field_name] = val
387
433
  field_maps.append(field_map)
388
434
 
@@ -391,7 +437,7 @@ class AiHelper:
391
437
  ElnBaseDataType.EXPERIMENT_DETAIL.data_type_name,
392
438
  self.tab_next_entry_order(tab),
393
439
  notebook_experiment_tab_id=tab.tab_id,
394
- field_definition_list=fields)
440
+ field_definition_list=[y for x, y in json_key_to_field_def.items()])
395
441
  entry = self.eln_man.add_experiment_entry(self.exp_id, detail_entry)
396
442
  records: list[DataRecord] = self.dr_man.add_data_records_with_data(entry.data_type_name, field_maps)
397
443