gemmi-protools 0.1.17__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gemmi-protools might be problematic. Click here for more details.

@@ -1,19 +1,24 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gemmi_protools
3
- Version: 0.1.17
3
+ Version: 1.0.1
4
4
  Summary: An Enhanced tool to process PDB structures based on Gemmi
5
5
  Author: Luo Jiejian
6
6
  Author-email: Luo Jiejian <luojiejian12@mails.ucas.ac.cn>
7
7
  License-Expression: MIT
8
- Requires-Python: >=3.10
8
+ Requires-Python: <3.13,>=3.12
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
- Requires-Dist: gemmi>=0.7.0
12
- Requires-Dist: pandas>=2.2.3
13
- Requires-Dist: typeguard>=4.1.2
11
+ Requires-Dist: gemmi==0.7.3
12
+ Requires-Dist: biopython==1.85
13
+ Requires-Dist: dockq==2.1.3
14
+ Requires-Dist: pandas
15
+ Requires-Dist: typeguard
14
16
  Requires-Dist: numpy
15
- Requires-Dist: biopython>=1.84
16
- Requires-Dist: scipy>=1.14.1
17
+ Requires-Dist: scipy
18
+ Requires-Dist: trimesh
19
+ Requires-Dist: joblib
20
+ Requires-Dist: rtree
21
+ Requires-Dist: freesasa==2.2.1
17
22
  Dynamic: author
18
23
  Dynamic: license-file
19
24
 
@@ -21,10 +26,8 @@ Dynamic: license-file
21
26
 
22
27
  # Install
23
28
  ```commandline
24
- conda create -n gp python=3.10
25
- conda install -n gp anarci hmmer -c bioconda
26
- conda install -n gp dockq -c conda-forge
27
- conda activate gp
29
+
30
+ conda install python=3.12.9 anarci hmmer dockq trimesh rtree -c bioconda -c conda-forge
28
31
  pip install gemmi_protools
29
32
  ```
30
33
 
@@ -33,6 +36,11 @@ pip install gemmi_protools
33
36
  ## read structures
34
37
  ```commandline
35
38
  from gemmi_protools import StructureParser
39
+
40
+ # load structure
36
41
  st=StructureParser()
37
- st.load_from_file("your.pdb")
42
+ st.load_from_file("7mmo.cif")
43
+
44
+ # get chain IDs
45
+ print(st.chain_ids)
38
46
  ```
@@ -0,0 +1,19 @@
1
+ gemmi_protools/__init__.py,sha256=_q8gXGIxrg2-3N6gxA3aa-_DRWDt3HFCLHyIh2XNHz0,157
2
+ gemmi_protools/data/MHC/MHC_combined.hmm,sha256=w0_vzPiEWne_d_kYmqR0OiSsCOpQioItKy3Zq-JMsH4,159451
3
+ gemmi_protools/data/MHC/MHC_combined.hmm.h3f,sha256=QGG4l-v76RYtysJ5rybnz5v6VgJg2RjoQQHUVWL5jmg,45522
4
+ gemmi_protools/data/MHC/MHC_combined.hmm.h3i,sha256=yn-700hoBSJB39Tj8Ia8UhSZWpYiCZFNcbnYAFNjReI,300
5
+ gemmi_protools/data/MHC/MHC_combined.hmm.h3m,sha256=CvNMCsobQiX-wL7iB4CreNcbpnElE31NmmjmMR0iYVI,66174
6
+ gemmi_protools/data/MHC/MHC_combined.hmm.h3p,sha256=-mK278pRedG3-KL-DtuVAQy7La9DgXg5FcP89D6X3Ck,78325
7
+ gemmi_protools/io/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
8
+ gemmi_protools/io/convert.py,sha256=A1i1vPgxG1LqMSUvWtegLl9LipgUQbfmKeGJ_f00UYo,3781
9
+ gemmi_protools/io/reader.py,sha256=joQr_glerss3QcfIJGr0O6lw8Mc4N1-pVobMHqY1zi0,33255
10
+ gemmi_protools/tools/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
11
+ gemmi_protools/tools/align.py,sha256=oKHvpeDa62zEjLkPmuyBM6avYDl3HFeJVHeRX62I2f4,7085
12
+ gemmi_protools/tools/dockq.py,sha256=baCuO5-GZCwrlS59T5UIXogpM44OIFIfXqksqRBAb0A,4428
13
+ gemmi_protools/tools/mesh.py,sha256=73MuJYwS_ACJI15OsrooAAhB1Ti4fM8CJSBqFOBR7LU,6537
14
+ gemmi_protools/tools/pdb_annot.py,sha256=EzgcntlERR04TfN0dIhf_GM9UCXEvUaH60Xohmbx_do,8253
15
+ gemmi_protools-1.0.1.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
16
+ gemmi_protools-1.0.1.dist-info/METADATA,sha256=cdKO7zuEv4ZwwCrcBZcprXmDtLB4AbEFDKH887JRcTI,1034
17
+ gemmi_protools-1.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
+ gemmi_protools-1.0.1.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
19
+ gemmi_protools-1.0.1.dist-info/RECORD,,
@@ -1,173 +0,0 @@
1
- """
2
- @Author: Luo Jiejian
3
- """
4
-
5
- import pathlib
6
- from typing import Union, Dict, Any
7
-
8
- import gemmi
9
- import pandas as pd
10
- from typeguard import typechecked
11
-
12
- from gemmi_protools.io.struct_info import Entity
13
-
14
-
15
- @typechecked
16
- def _is_cif(path: Union[str, pathlib.Path]) -> bool:
17
- if isinstance(path, str):
18
- path = pathlib.Path(path)
19
- if path.suffixes:
20
- if path.suffixes[-1] == ".cif":
21
- return True
22
- elif "".join(path.suffixes[-2:]) == ".cif.gz":
23
- return True
24
- else:
25
- return False
26
- else:
27
- return False
28
-
29
-
30
- @typechecked
31
- def _value_mapper_from_block(block: gemmi.cif.Block, category: str, column1: str, column2: str,
32
- expand_column1: bool = False) -> Dict[str, Any]:
33
- """
34
- mapper from column1 to column2
35
- :param block:
36
- :param category:
37
- :param column1:
38
- :param column2:
39
- :param expand_column1: bool, if True, values joint by comma in column1 with be split
40
- :return:
41
- Only return a mapper when both column1 and column2 in category
42
- """
43
- loop = block.find_mmcif_category(category)
44
- tags = list(loop.tags)
45
-
46
- results = dict()
47
- if column1 in tags:
48
- values1 = loop.column(tags.index(column1))
49
- v1 = [values1.str(i) for i in range(len(values1))]
50
-
51
- if column2 in tags:
52
- values2 = loop.column(tags.index(column2))
53
- v2 = [values2.str(i) for i in range(len(values2))]
54
- else:
55
- v2 = ["?"] * len(v1)
56
-
57
- outputs = dict(zip(v1, v2))
58
-
59
- if expand_column1:
60
- outputs_ex = dict()
61
- for key, val in outputs.items():
62
- tmp = key.split(",")
63
- for sk in tmp:
64
- nk = sk.strip()
65
- if nk:
66
- outputs_ex[nk] = val
67
- results = outputs_ex
68
- else:
69
- results = outputs
70
- return results
71
-
72
-
73
- @typechecked
74
- def _get_cif_resolution(block: gemmi.cif.Block) -> float:
75
- resolution = 0.0
76
- for key in ["_reflns.d_resolution_high",
77
- "_refine.ls_d_res_high",
78
- "_refine_hist.d_res_high",
79
- "_em_3d_reconstruction.resolution",
80
- ]:
81
- v = block.find_value(key)
82
- try:
83
- vf = float(v)
84
- except (TypeError, ValueError):
85
- continue
86
- else:
87
- resolution = vf
88
- break
89
- return resolution
90
-
91
-
92
- @typechecked
93
- def _cif_entity_info(block: gemmi.cif.Block) -> Entity:
94
- entity2description = _value_mapper_from_block(block, category="_entity.",
95
- column1="_entity.id",
96
- column2="_entity.pdbx_description")
97
-
98
- polymer2entity = _value_mapper_from_block(block, category="_entity_poly.",
99
- column1="_entity_poly.pdbx_strand_id",
100
- column2="_entity_poly.entity_id",
101
- expand_column1=True)
102
- entity2species = _value_mapper_from_block(block, category="_entity_src_gen.",
103
- column1="_entity_src_gen.entity_id",
104
- column2="_entity_src_gen.pdbx_gene_src_scientific_name")
105
-
106
- entity2species.update(_value_mapper_from_block(block, category="_pdbx_entity_src_syn.",
107
- column1="_pdbx_entity_src_syn.entity_id",
108
- column2="_pdbx_entity_src_syn.organism_scientific")
109
- )
110
- entity2species.update(_value_mapper_from_block(block, category="_entity_src_nat.",
111
- column1="_entity_src_nat.entity_id",
112
- column2="_entity_src_nat.pdbx_organism_scientific")
113
- )
114
- entity2taxid = _value_mapper_from_block(block, category="_entity_src_gen.",
115
- column1="_entity_src_gen.entity_id",
116
- column2="_entity_src_gen.pdbx_gene_src_ncbi_taxonomy_id")
117
- entity2taxid.update(_value_mapper_from_block(block, category="_pdbx_entity_src_syn.",
118
- column1="_pdbx_entity_src_syn.entity_id",
119
- column2="_pdbx_entity_src_syn.ncbi_taxonomy_id")
120
- )
121
- entity2taxid.update(_value_mapper_from_block(block, category="_entity_src_nat.",
122
- column1="_entity_src_nat.entity_id",
123
- column2="_entity_src_nat.pdbx_ncbi_taxonomy_id")
124
- )
125
-
126
- vals = dict(eid2desc=entity2description,
127
- eid2specie=entity2species,
128
- eid2taxid=entity2taxid,
129
- polymer2eid=polymer2entity
130
- )
131
- return Entity(**vals)
132
-
133
-
134
- @typechecked
135
- def _cif_block_for_output(structure: gemmi.Structure, entity: Entity) -> gemmi.cif.Block:
136
- block = structure.make_mmcif_block()
137
-
138
- reflns = block.find_mmcif_category(category="_reflns.")
139
- resolution = "%.2f" % structure.resolution
140
- reflns.erase()
141
- reflns_loop = block.init_loop(prefix="_reflns.", tags=["d_resolution_high", "d_resolution_low"])
142
- reflns_loop.add_row([resolution, resolution])
143
-
144
- ta = block.find_mmcif_category(category="_entity.")
145
- da = pd.DataFrame(list(ta), columns=list(ta.tags))
146
- if "_entity.id" in da.columns:
147
- da["_entity.pdbx_description"] = da["_entity.id"].apply(
148
- lambda i: entity["eid2desc"].get(i, "?").strip() or "?")
149
-
150
- rows = []
151
- for ar in da.to_numpy().tolist():
152
- rows.append([gemmi.cif.quote(i) for i in ar])
153
-
154
- if "_entity.pdbx_description" not in list(ta.tags):
155
- ta.loop.add_columns(["_entity.pdbx_description"], "?")
156
-
157
- ta = block.find_mmcif_category(category="_entity.")
158
- for _ in range(len(ta)):
159
- ta.remove_row(0)
160
- for row in rows:
161
- ta.append_row(row)
162
-
163
- loop = block.init_loop("_entity_src_gen.", ["entity_id",
164
- "pdbx_gene_src_scientific_name",
165
- "pdbx_gene_src_ncbi_taxonomy_id"])
166
-
167
- for k in entity["eid2specie"].keys():
168
- loop.add_row([gemmi.cif.quote(k),
169
- gemmi.cif.quote(entity["eid2specie"].get(k, "?")),
170
- gemmi.cif.quote(entity["eid2taxid"].get(k, "?"))]
171
- )
172
- block.move_item(-1, 16)
173
- return block
@@ -1,387 +0,0 @@
1
- #!/usr/bin/env python
2
- # Copyright 2004 Kristian Rother.
3
- # Revisions copyright 2004 Thomas Hamelryck.
4
- # Revisions copyright 2024 James Krieger.
5
- #
6
- # This file is part of the Biopython distribution and governed by your
7
- # choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
8
- # Please see the LICENSE file that should have been included as part of this
9
- # package.
10
-
11
- """Parse header of PDB files into a python dictionary.
12
-
13
- Emerged from the Columba database project www.columba-db.de, original author
14
- Kristian Rother.
15
-
16
- Modify _parse_pdb_header_list
17
- Don't perform lower() to chain id
18
- By Luo Jiejian
19
- """
20
-
21
- import re
22
- from collections import defaultdict
23
-
24
- from Bio import File
25
-
26
-
27
- def _get_biomoltrans(inl):
28
- # REMARK 350
29
- # REMARK 350 COORDINATES FOR A COMPLETE MULTIMER REPRESENTING THE KNOWN
30
- # REMARK 350 BIOLOGICALLY SIGNIFICANT OLIGOMERIZATION STATE OF THE
31
- # REMARK 350 MOLECULE CAN BE GENERATED BY APPLYING BIOMT TRANSFORMATIONS
32
- # REMARK 350 GIVEN BELOW. BOTH NON-CRYSTALLOGRAPHIC AND
33
- # REMARK 350 CRYSTALLOGRAPHIC OPERATIONS ARE GIVEN.
34
- # REMARK 350
35
- # REMARK 350 BIOMOLECULE: 1
36
- # REMARK 350 AUTHOR DETERMINED BIOLOGICAL UNIT: MONOMERIC
37
- # REMARK 350 APPLY THE FOLLOWING TO CHAINS: A
38
- # REMARK 350 BIOMT1 1 1.000000 0.000000 0.000000 0.00000
39
- # REMARK 350 BIOMT2 1 0.000000 1.000000 0.000000 0.00000
40
- # REMARK 350 BIOMT3 1 0.000000 0.000000 1.000000 0.00000
41
- biomolecule = defaultdict(list)
42
- for line in inl:
43
- if line.startswith("REMARK 350"):
44
- if line[11:23] == "BIOMOLECULE:":
45
- currentBiomolecule = line.split()[-1]
46
- applyToChains = []
47
- elif (
48
- line[11:41] == "APPLY THE FOLLOWING TO CHAINS:"
49
- or line[30:41] == "AND CHAINS:"
50
- ):
51
- applyToChains.extend(
52
- line[41:].replace(" ", "").strip().strip(",").split(",")
53
- )
54
- elif line[13:18] == "BIOMT":
55
- biomt = biomolecule[currentBiomolecule]
56
- if line[13:19] == "BIOMT1":
57
- if applyToChains == []:
58
- applyToChains = biomt[0]
59
- biomt.append(applyToChains)
60
- elif line[13:19]:
61
- applyToChains = []
62
- biomt.append(line[23:])
63
- return dict(biomolecule)
64
-
65
-
66
- def _get_journal(inl):
67
- # JRNL AUTH L.CHEN,M.DOI,F.S.MATHEWS,A.Y.CHISTOSERDOV, 2BBK 7
68
- journal = ""
69
- for line in inl:
70
- if re.search(r"\AJRNL", line):
71
- journal += line[19:72].lower()
72
- journal = re.sub(r"\s\s+", " ", journal)
73
- return journal
74
-
75
-
76
- def _get_references(inl):
77
- # REMARK 1 REFERENCE 1 1CSE 11
78
- # REMARK 1 AUTH W.BODE,E.PAPAMOKOS,D.MUSIL 1CSE 12
79
- references = []
80
- actref = ""
81
- for line in inl:
82
- if re.search(r"\AREMARK 1", line):
83
- if re.search(r"\AREMARK 1 REFERENCE", line):
84
- if actref != "":
85
- actref = re.sub(r"\s\s+", " ", actref)
86
- if actref != " ":
87
- references.append(actref)
88
- actref = ""
89
- else:
90
- actref += line[19:72].lower()
91
-
92
- if actref != "":
93
- actref = re.sub(r"\s\s+", " ", actref)
94
- if actref != " ":
95
- references.append(actref)
96
- return references
97
-
98
-
99
- # bring dates to format: 1909-01-08
100
- def _format_date(pdb_date):
101
- """Convert dates from DD-Mon-YY to YYYY-MM-DD format (PRIVATE)."""
102
- date = ""
103
- year = int(pdb_date[7:])
104
- if year < 50:
105
- century = 2000
106
- else:
107
- century = 1900
108
- date = str(century + year) + "-"
109
- all_months = [
110
- "xxx",
111
- "Jan",
112
- "Feb",
113
- "Mar",
114
- "Apr",
115
- "May",
116
- "Jun",
117
- "Jul",
118
- "Aug",
119
- "Sep",
120
- "Oct",
121
- "Nov",
122
- "Dec",
123
- ]
124
- month = str(all_months.index(pdb_date[3:6]))
125
- if len(month) == 1:
126
- month = "0" + month
127
- date = date + month + "-" + pdb_date[:2]
128
- return date
129
-
130
-
131
- def _chop_end_codes(line):
132
- """Chops lines ending with ' 1CSA 14' and the like (PRIVATE)."""
133
- return re.sub(r"\s\s\s\s+[\w]{4}.\s+\d*\Z", "", line)
134
-
135
-
136
- def _chop_end_misc(line):
137
- """Chops lines ending with ' 14-JUL-97 1CSA' and the like (PRIVATE)."""
138
- return re.sub(r"\s+\d\d-\w\w\w-\d\d\s+[1-9][0-9A-Z]{3}\s*\Z", "", line)
139
-
140
-
141
- def _nice_case(line):
142
- """Make A Lowercase String With Capitals (PRIVATE)."""
143
- line_lower = line.lower()
144
- s = ""
145
- i = 0
146
- nextCap = 1
147
- while i < len(line_lower):
148
- c = line_lower[i]
149
- if c >= "a" and c <= "z" and nextCap:
150
- c = c.upper()
151
- nextCap = 0
152
- elif c in " .,;:\t-_":
153
- nextCap = 1
154
- s += c
155
- i += 1
156
- return s
157
-
158
-
159
- def parse_pdb_header(infile):
160
- """Return the header lines of a pdb file as a dictionary.
161
-
162
- Dictionary keys are: head, deposition_date, release_date, structure_method,
163
- resolution, structure_reference, journal_reference, author and
164
- compound.
165
- """
166
- header = []
167
- with File.as_handle(infile) as f:
168
- for line in f:
169
- record_type = line[0:6]
170
- if record_type in ("ATOM ", "HETATM", "MODEL "):
171
- break
172
- header.append(line)
173
- return _parse_pdb_header_list(header)
174
-
175
-
176
- def _parse_remark_465(line):
177
- """Parse missing residue remarks.
178
-
179
- Returns a dictionary describing the missing residue.
180
- The specification for REMARK 465 at
181
- http://www.wwpdb.org/documentation/file-format-content/format33/remarks2.html#REMARK%20465
182
- only gives templates, but does not say they have to be followed.
183
- So we assume that not all pdb-files with a REMARK 465 can be understood.
184
-
185
- Returns a dictionary with the following keys:
186
- "model", "res_name", "chain", "ssseq", "insertion"
187
- """
188
- if line:
189
- # Note that line has been stripped.
190
- assert line[0] != " " and line[-1] not in "\n ", "line has to be stripped"
191
- pattern = re.compile(
192
- r"""
193
- (\d+\s[\sA-Z][\sA-Z][A-Z] | # Either model number + residue name
194
- [A-Z]{1,3}) # Or only residue name with 1 (RNA) to 3 letters
195
- \s ([A-Za-z0-9]) # A single character chain
196
- \s+(-?\d+[A-Za-z]?)$ # Residue number: A digit followed by an optional
197
- # insertion code (Hetero-flags make no sense in
198
- # context with missing res)
199
- """,
200
- re.VERBOSE,
201
- )
202
- match = pattern.match(line)
203
- if match is None:
204
- return None
205
- residue = {}
206
- if " " in match.group(1):
207
- model, residue["res_name"] = match.group(1).split()
208
- residue["model"] = int(model)
209
- else:
210
- residue["model"] = None
211
- residue["res_name"] = match.group(1)
212
- residue["chain"] = match.group(2)
213
- try:
214
- residue["ssseq"] = int(match.group(3))
215
- except ValueError:
216
- residue["insertion"] = match.group(3)[-1]
217
- residue["ssseq"] = int(match.group(3)[:-1])
218
- else:
219
- residue["insertion"] = None
220
- return residue
221
-
222
-
223
- def _parse_pdb_header_list(header):
224
- # database fields
225
- pdbh_dict = {
226
- "name": "",
227
- "head": "",
228
- "idcode": "",
229
- "deposition_date": "1909-01-08",
230
- "release_date": "1909-01-08",
231
- "structure_method": "unknown",
232
- "resolution": None,
233
- "structure_reference": "unknown",
234
- "journal_reference": "unknown",
235
- "author": "",
236
- "compound": {"1": {"misc": ""}},
237
- "source": {"1": {"misc": ""}},
238
- "has_missing_residues": False,
239
- "missing_residues": [],
240
- "biomoltrans": [],
241
- }
242
-
243
- pdbh_dict["structure_reference"] = _get_references(header)
244
- pdbh_dict["journal_reference"] = _get_journal(header)
245
- pdbh_dict["biomoltrans"] = _get_biomoltrans(header)
246
- comp_molid = "1"
247
- last_comp_key = "misc"
248
- last_src_key = "misc"
249
-
250
- for hh in header:
251
- h = re.sub(r"[\s\n\r]*\Z", "", hh) # chop linebreaks off
252
- # key=re.sub("\s.+\s*","",h)
253
- key = h[:6].strip()
254
- # tail=re.sub("\A\w+\s+\d*\s*","",h)
255
- tail = h[10:].strip()
256
- # print("%s:%s" % (key, tail)
257
-
258
- # From here, all the keys from the header are being parsed
259
- if key == "TITLE":
260
- name = _chop_end_codes(tail).lower()
261
- pdbh_dict["name"] = " ".join([pdbh_dict["name"], name]).strip()
262
- elif key == "HEADER":
263
- rr = re.search(r"\d\d-\w\w\w-\d\d", tail)
264
- if rr is not None:
265
- pdbh_dict["deposition_date"] = _format_date(_nice_case(rr.group()))
266
- rr = re.search(r"\s+([1-9][0-9A-Z]{3})\s*\Z", tail)
267
- if rr is not None:
268
- pdbh_dict["idcode"] = rr.group(1)
269
- head = _chop_end_misc(tail).lower()
270
- pdbh_dict["head"] = head
271
- elif key == "COMPND":
272
- # LJJ
273
- # tt = re.sub(r"\;\s*\Z", "", _chop_end_codes(tail)).lower()
274
- tt = re.sub(r"\;\s*\Z", "", _chop_end_codes(tail))
275
- # look for E.C. numbers in COMPND lines
276
- rec = re.search(r"\d+\.\d+\.\d+\.\d+", tt)
277
- if rec:
278
- pdbh_dict["compound"][comp_molid]["ec_number"] = rec.group()
279
- tt = re.sub(r"\((e\.c\.)*\d+\.\d+\.\d+\.\d+\)", "", tt)
280
- tok = tt.split(":")
281
- if len(tok) >= 2:
282
- # lower ckey LJJ
283
- ckey = tok[0].lower()
284
- # ckey = tok[0]
285
- cval = re.sub(r"\A\s*", "", tok[1])
286
- if ckey == "mol_id":
287
- # mol_id, keep original, usually digital string
288
- pdbh_dict["compound"][cval] = {"misc": ""}
289
- comp_molid = cval
290
- last_comp_key = "misc"
291
- else:
292
- # add two lines, lower all except chain value LJJ
293
- if ckey != "chain":
294
- cval = cval.lower()
295
-
296
- pdbh_dict["compound"][comp_molid][ckey] = cval
297
- last_comp_key = ckey
298
- else:
299
- # pdbh_dict["compound"][comp_molid][last_comp_key] += tok[0] + " "
300
- # concat and lower LJJ
301
- pdbh_dict["compound"][comp_molid][last_comp_key] += tok[0].lower() + " "
302
- elif key == "SOURCE":
303
- tt = re.sub(r"\;\s*\Z", "", _chop_end_codes(tail)).lower()
304
- tok = tt.split(":")
305
- # print(tok)
306
- if len(tok) >= 2:
307
- ckey = tok[0]
308
- cval = re.sub(r"\A\s*", "", tok[1])
309
- if ckey == "mol_id":
310
- pdbh_dict["source"][cval] = {"misc": ""}
311
- comp_molid = cval
312
- last_src_key = "misc"
313
- else:
314
- pdbh_dict["source"][comp_molid][ckey] = cval
315
- last_src_key = ckey
316
- else:
317
- pdbh_dict["source"][comp_molid][last_src_key] += tok[0] + " "
318
- elif key == "KEYWDS":
319
- kwd = _chop_end_codes(tail).lower()
320
- if "keywords" in pdbh_dict:
321
- pdbh_dict["keywords"] += " " + kwd
322
- else:
323
- pdbh_dict["keywords"] = kwd
324
- elif key == "EXPDTA":
325
- expd = _chop_end_codes(tail)
326
- # chop junk at end of lines for some structures
327
- expd = re.sub(r"\s\s\s\s\s\s\s.*\Z", "", expd)
328
- # if re.search('\Anmr',expd,re.IGNORECASE): expd='nmr'
329
- # if re.search('x-ray diffraction',expd,re.IGNORECASE): expd='x-ray diffraction'
330
- pdbh_dict["structure_method"] = expd.lower()
331
- elif key == "CAVEAT":
332
- # make Annotation entries out of these!!!
333
- pass
334
- elif key == "REVDAT":
335
- rr = re.search(r"\d\d-\w\w\w-\d\d", tail)
336
- if rr is not None:
337
- pdbh_dict["release_date"] = _format_date(_nice_case(rr.group()))
338
- elif key == "JRNL":
339
- # print("%s:%s" % (key, tail))
340
- if "journal" in pdbh_dict:
341
- pdbh_dict["journal"] += tail
342
- else:
343
- pdbh_dict["journal"] = tail
344
- elif key == "AUTHOR":
345
- auth = _nice_case(_chop_end_codes(tail))
346
- if "author" in pdbh_dict:
347
- pdbh_dict["author"] += auth
348
- else:
349
- pdbh_dict["author"] = auth
350
- elif key == "REMARK":
351
- if re.search("REMARK 2 RESOLUTION.", hh):
352
- r = _chop_end_codes(re.sub("REMARK 2 RESOLUTION.", "", hh))
353
- r = re.sub(r"\s+ANGSTROM.*", "", r)
354
- try:
355
- pdbh_dict["resolution"] = float(r)
356
- except ValueError:
357
- # print('nonstandard resolution %r' % r)
358
- pdbh_dict["resolution"] = None
359
- elif hh.startswith("REMARK 465"):
360
- if tail:
361
- pdbh_dict["has_missing_residues"] = True
362
- missing_res_info = _parse_remark_465(tail)
363
- if missing_res_info:
364
- pdbh_dict["missing_residues"].append(missing_res_info)
365
- elif hh.startswith("REMARK 99 ASTRAL"):
366
- if tail:
367
- remark_99_keyval = tail.replace("ASTRAL ", "").split(": ")
368
- if (
369
- isinstance(remark_99_keyval, list)
370
- and len(remark_99_keyval) == 2
371
- ):
372
- if "astral" not in pdbh_dict:
373
- pdbh_dict["astral"] = {
374
- remark_99_keyval[0]: remark_99_keyval[1]
375
- }
376
- else:
377
- pdbh_dict["astral"][remark_99_keyval[0]] = remark_99_keyval[
378
- 1
379
- ]
380
- else:
381
- # print(key)
382
- pass
383
- if pdbh_dict["structure_method"] == "unknown":
384
- res = pdbh_dict["resolution"]
385
- if res is not None and res > 0.0:
386
- pdbh_dict["structure_method"] = "x-ray diffraction"
387
- return pdbh_dict