bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,1682 @@
1
+ #
2
+ # bio/db/pdb/pdb.rb - PDB database class for PDB file format
3
+ #
4
+ # Copyright (C) 2003,2004 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
5
+ # Copyright (C) 2004 Alex Gutteridge <alexg@ebi.ac.uk>
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: pdb.rb,v 1.6 2005/12/18 17:37:14 ngoto Exp $
22
+ #
23
+
24
+ # *** CAUTION ***
25
+ # This is pre-alpha version. Specs shall be changed frequently.
26
+ #
27
+
28
+ require 'bio/db/pdb'
29
+ require 'bio/data/aa'
30
+
31
+ module Bio
32
+
33
+ #This is the main PDB class which takes care of parsing, annotations
34
+ #and is the entry way to the co-ordinate data held in models
35
+ class PDB #< DB
36
+
37
+ include Utils
38
+ include AtomFinder
39
+ include ResidueFinder
40
+ include ChainFinder
41
+ include ModelFinder
42
+ include Enumerable
43
+
44
+ DELIMITER = RS = nil # 1 file 1 entry
45
+
46
+ #Modules required by the field definitions
47
+ module DataType
48
+
49
+ Pdb_Continuation = nil
50
+
51
+ module Pdb_Integer
52
+ def self.new(str)
53
+ str.to_i
54
+ end
55
+ end
56
+
57
+ module Pdb_SList
58
+ def self.new(str)
59
+ str.strip.split(/\;\s*/)
60
+ end
61
+ end
62
+
63
+ module Pdb_List
64
+ def self.new(str)
65
+ str.strip.split(/\,\s*/)
66
+ end
67
+ end
68
+
69
+ module Pdb_Specification_list
70
+ def self.new(str)
71
+ a = str.strip.split(/\;\s*/)
72
+ a.collect! { |x| x.split(/\:\s*/, 2) }
73
+ a
74
+ end
75
+ end
76
+
77
+ module Pdb_String
78
+ def self.new(str)
79
+ str.gsub(/\s+\z/, '')
80
+ end
81
+
82
+ #Creates a new module with a string left justified to the
83
+ #length given in nn
84
+ def self.[](nn)
85
+ m = Module.new
86
+ m.module_eval %Q{
87
+ @@nn = nn
88
+ def self.new(str)
89
+ str.gsub(/\s+\z/, '').ljust(@@nn)[0, @@nn]
90
+ end
91
+ }
92
+ m
93
+ end
94
+ end #module Pdb_String
95
+
96
+ module Pdb_LString
97
+ def self.[](nn)
98
+ m = Module.new
99
+ m.module_eval %Q{
100
+ @@nn = nn
101
+ def self.new(str)
102
+ str.ljust(@@nn)[0, @@nn]
103
+ end
104
+ }
105
+ m
106
+ end
107
+ def self.new(str)
108
+ String.new(str)
109
+ end
110
+ end
111
+
112
+ module Pdb_Real
113
+ def self.[](fmt)
114
+ m = Module.new
115
+ m.module_eval %Q{
116
+ @@format = fmt
117
+ def self.new(str)
118
+ str.to_f
119
+ end
120
+ }
121
+ m
122
+ end
123
+ def self.new(str)
124
+ str.to_f
125
+ end
126
+ end
127
+
128
+ module Pdb_StringRJ
129
+ def self.new(str)
130
+ str.gsub(/\A\s+/, '')
131
+ end
132
+ end
133
+
134
+ Pdb_Date = Pdb_String
135
+ Pdb_IDcode = Pdb_String
136
+ Pdb_Residue_name = Pdb_String
137
+ Pdb_SymOP = Pdb_String
138
+ Pdb_Atom = Pdb_String
139
+ Pdb_AChar = Pdb_String
140
+ Pdb_Character = Pdb_LString
141
+
142
+ module ConstLikeMethod
143
+ def Pdb_LString(nn)
144
+ Pdb_LString[nn]
145
+ end
146
+
147
+ def Pdb_String(nn)
148
+ Pdb_String[nn]
149
+ end
150
+
151
+ def Pdb_Real(fmt)
152
+ Pdb_Real[fmt]
153
+ end
154
+ end #module ConstLikeMethod
155
+ end #module DataType
156
+
157
+ class Record < Struct
158
+ include DataType
159
+ extend DataType::ConstLikeMethod
160
+
161
+ def self.parse_field_definitions(ary)
162
+ symbolhash = {}
163
+ symbolary = []
164
+ cont = false
165
+
166
+ # For each field definition (range(start, end), type,symbol)
167
+ ary.each do |x|
168
+ range = (x[0] - 1)..(x[1] - 1)
169
+ # If type is nil (Pdb_Continuation) then set 'cont' to the range
170
+ # (other wise it is false to indicate no continuation
171
+ unless x[2] then
172
+ cont = range
173
+ else
174
+ klass = x[2]
175
+ sym = x[3]
176
+ # If the symbol is a proper symbol then...
177
+ if sym.is_a?(Symbol) then
178
+ # ..if we have the symbol already in the symbol hash
179
+ # then add the range onto the range array
180
+ if symbolhash.has_key?(sym) then
181
+ symbolhash[sym][1] << range
182
+ else
183
+ # Other wise put a new symbol in with its type and range
184
+ # range is given its own array. You can have
185
+ # anumber of ranges.
186
+ symbolhash[sym] = [ klass, [ range ] ]
187
+ symbolary << sym
188
+ end
189
+ end
190
+ end
191
+ end #each
192
+ [ symbolhash, symbolary, cont ]
193
+ end
194
+ private_class_method :parse_field_definitions
195
+
196
+ def self.def_rec(*ary)
197
+ symbolhash, symbolary, cont = parse_field_definitions(ary)
198
+
199
+ klass = Class.new(self.new(*symbolary))
200
+ klass.module_eval {
201
+ @definition = ary
202
+ @symbols = symbolhash
203
+ @cont = cont
204
+ }
205
+ klass.module_eval {
206
+ symbolary.each do |x|
207
+ define_method(x) { do_parse; super }
208
+ end
209
+ }
210
+ klass
211
+ end #def self.def_rec
212
+
213
+ def self.new_inherit(klass)
214
+ newklass = Class.new(klass)
215
+ newklass.module_eval {
216
+ @definition = klass.module_eval { @definition }
217
+ @symbols = klass.module_eval { @symbols }
218
+ @cont = klass.module_eval { @cont }
219
+ }
220
+ newklass
221
+ end
222
+
223
+ def self.new_direct(*ary)
224
+ symbolhash, symbolary, cont = parse_field_definitions(ary)
225
+ if cont
226
+ raise 'continuation not allowed. please use def_rec instead'
227
+ end
228
+
229
+ klass = Class.new(self.new(*symbolary))
230
+ klass.module_eval {
231
+ @definition = ary
232
+ @symbols = symbolhash
233
+ @cont = cont
234
+ }
235
+ klass.module_eval {
236
+ define_method(:initialize_from_string) { |str|
237
+ r = super
238
+ do_parse
239
+ r
240
+ }
241
+ }
242
+ klass
243
+ end #def self.new_direct
244
+
245
+ def self.symbols
246
+ #p self
247
+ @symbols
248
+ end
249
+
250
+ def self.continue?
251
+ @cont
252
+ end
253
+
254
+ # Returns true if this record has a field type which allows
255
+ # continuations.
256
+ def continue?
257
+ self.class.continue?
258
+ end
259
+
260
+ # yields the symbol(k), type(x[0]) and array of ranges
261
+ # of each symbol.
262
+ def each_symbol
263
+ self.class.symbols.each do |k, x|
264
+ yield k, x[0], x[1]
265
+ end
266
+ end
267
+
268
+ #Return original string for this record (usually just @str, but
269
+ #sometimes add on the continuation data from other lines
270
+ def original_data
271
+ if defined?(@cont_data) then
272
+ [ @str, *@cont_data ]
273
+ else
274
+ [ @str ]
275
+ end
276
+ end
277
+
278
+ # initialize from the string
279
+ def initialize_from_string(str)
280
+ @str = str
281
+ @record_name = fetch_record_name(str)
282
+ @parsed = false
283
+ self
284
+ end
285
+
286
+ #Called when we need to access the data, takes the string
287
+ #and the array of FieldDefs and parses it out
288
+ def do_parse
289
+ return self if @parsed
290
+ str = @str
291
+ each_symbol do |key, klass, ranges|
292
+ #If we only have one range then pull that out
293
+ #and store it in the hash
294
+ if ranges.size <= 1 then
295
+ self[key] = klass.new(str[ranges.first])
296
+ else
297
+ #Go through each range and add the string to an array
298
+ #set the hash key to point to that array
299
+ ary = []
300
+ ranges.each do |r|
301
+ ary << klass.new(str[r]) unless str[r].to_s.strip.empty?
302
+ end
303
+ self[key] = ary
304
+ end
305
+ end #each_symbol
306
+ #If we have continuations then for each line of extra data...
307
+ if defined?(@cont_data) then
308
+ @cont_data.each do |str|
309
+ #Get the symbol, type and range array
310
+ each_symbol do |key, klass, ranges|
311
+ #If there's one range then grab that range
312
+ if ranges.size <= 1 then
313
+ r = ranges.first
314
+ unless str[r].to_s.strip.empty?
315
+ #and concatenate the new data onto the old
316
+ v = klass.new(str[r])
317
+ self[key].concat(v) if self[key] != v
318
+ end
319
+ else
320
+ #If there's more than one range then add to the array
321
+ ary = self[key]
322
+ ranges.each do |r|
323
+ ary << klass.new(str[r]) unless str[r].to_s.strip.empty?
324
+ end
325
+ end
326
+ end
327
+ end
328
+ end
329
+ @parsed = true
330
+ self
331
+ end
332
+
333
+ def fetch_record_name(str)
334
+ str[0..5].strip
335
+ end
336
+ private :fetch_record_name
337
+
338
+ def self.fetch_record_name(str)
339
+ str[0..5].strip
340
+ end
341
+ private_class_method :fetch_record_name
342
+
343
+ # If given str can be the continuation of the current record, then
344
+ # then return the order number of the continuation associated with
345
+ # the Pdb_Continuation field definition.
346
+ # Otherwise, returns -1.
347
+ def fetch_cont(str)
348
+ (c = continue?) ? str[c].to_i : -1
349
+ end
350
+ private :fetch_cont
351
+
352
+ def record_name
353
+ @record_name or self.class.to_s.split(/\:\:/)[-1]
354
+ end
355
+ # keeping compatibility with old version
356
+ alias record_type record_name
357
+
358
+ # Adds continuation data to the record from str if str is
359
+ # really the continuation of current record.
360
+ # Returns self (= not nil) if str is the continuation.
361
+ # Otherwaise, returns false.
362
+ def add_continuation(str)
363
+ #Check that this record can continue
364
+ #and that str has the same type and definition
365
+ return false unless self.continue?
366
+ return false unless fetch_record_name(str) == @record_name
367
+ return false unless self.class.get_record_class(str) == self.class
368
+ return false unless fetch_cont(str) >= 2
369
+ #If all this is OK then add onto @cont_data
370
+ unless defined?(@cont_data)
371
+ @cont_data = []
372
+ end
373
+ @cont_data << str
374
+ # Returns self (= not nil) if succeeded.
375
+ self
376
+ end
377
+
378
+ # creates definition hash from current classes constants
379
+ def self.create_definition_hash
380
+ hash = {}
381
+ constants.each do |x|
382
+ hash[x] = const_get(x) if /\A[A-Z][A-Z0-9]+\z/ =~ x
383
+ end
384
+ if x = const_get(:Default) then
385
+ hash.default = x
386
+ end
387
+ hash
388
+ end
389
+
390
+ def inspect
391
+ #do_parse
392
+ super
393
+ end
394
+
395
+ # definitions
396
+ # contains all the rules for parsing each field
397
+ # based on format V 2.2, 16-DEC-1996
398
+ #
399
+ # http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html
400
+ # http://www.rcsb.org/pdb/docs/format/pdbguide2.2/Contents_Guide_21.html
401
+ #
402
+ # Details of following data are taken from these documents.
403
+
404
+ # [ 1..6, :Record_name, nil ],
405
+
406
+ # XXXXXX =
407
+ # new([ start, end, type of data, symbol to access ], ...)
408
+
409
+ HEADER =
410
+ def_rec([ 11, 50, Pdb_String, :classification ], #Pdb_String(40)
411
+ [ 51, 59, Pdb_Date, :depDate ],
412
+ [ 63, 66, Pdb_IDcode, :idCode ]
413
+ )
414
+
415
+ OBSLTE =
416
+ def_rec([ 9, 10, Pdb_Continuation, nil ],
417
+ [ 12, 20, Pdb_Date, :repDate ],
418
+ [ 22, 25, Pdb_IDcode, :idCode ],
419
+ [ 32, 35, Pdb_IDcode, :rIdCode ],
420
+ [ 37, 40, Pdb_IDcode, :rIdCode ],
421
+ [ 42, 45, Pdb_IDcode, :rIdCode ],
422
+ [ 47, 50, Pdb_IDcode, :rIdCode ],
423
+ [ 52, 55, Pdb_IDcode, :rIdCode ],
424
+ [ 57, 60, Pdb_IDcode, :rIdCode ],
425
+ [ 62, 65, Pdb_IDcode, :rIdCode ],
426
+ [ 67, 70, Pdb_IDcode, :rIdCode ]
427
+ )
428
+
429
+ TITLE =
430
+ def_rec([ 9, 10, Pdb_Continuation, nil ],
431
+ [ 11, 70, Pdb_String, :title ]
432
+ )
433
+
434
+ CAVEAT =
435
+ def_rec([ 9, 10, Pdb_Continuation, nil ],
436
+ [ 12, 15, Pdb_IDcode, :idcode ],
437
+ [ 20, 70, Pdb_String, :comment ]
438
+ )
439
+
440
+ COMPND =
441
+ def_rec([ 9, 10, Pdb_Continuation, nil ],
442
+ [ 11, 70, Pdb_Specification_list, :compound ]
443
+ )
444
+
445
+ SOURCE =
446
+ def_rec([ 9, 10, Pdb_Continuation, nil ],
447
+ [ 11, 70, Pdb_Specification_list, :srcName ]
448
+ )
449
+
450
+ KEYWDS =
451
+ def_rec([ 9, 10, Pdb_Continuation, nil ],
452
+ [ 11, 70, Pdb_List, :keywds ]
453
+ )
454
+
455
+ EXPDTA =
456
+ def_rec([ 9, 10, Pdb_Continuation, nil ],
457
+ [ 11, 70, Pdb_SList, :technique ]
458
+ )
459
+
460
+ AUTHOR =
461
+ def_rec([ 9, 10, Pdb_Continuation, nil ],
462
+ [ 11, 70, Pdb_List, :authorList ]
463
+ )
464
+
465
+ REVDAT =
466
+ def_rec([ 8, 10, Pdb_Integer, :modNum ],
467
+ [ 11, 12, Pdb_Continuation, nil ],
468
+ [ 14, 22, Pdb_Date, :modDate ],
469
+ [ 24, 28, Pdb_String, :modId ], # Pdb_String(5)
470
+ [ 32, 32, Pdb_Integer, :modType ],
471
+ [ 40, 45, Pdb_LString(6), :record ],
472
+ [ 47, 52, Pdb_LString(6), :record ],
473
+ [ 54, 59, Pdb_LString(6), :record ],
474
+ [ 61, 66, Pdb_LString(6), :record ]
475
+ )
476
+
477
+ SPRSDE =
478
+ def_rec([ 9, 10, Pdb_Continuation, nil ],
479
+ [ 12, 20, Pdb_Date, :sprsdeDate ],
480
+ [ 22, 25, Pdb_IDcode, :idCode ],
481
+ [ 32, 35, Pdb_IDcode, :sIdCode ],
482
+ [ 37, 40, Pdb_IDcode, :sIdCode ],
483
+ [ 42, 45, Pdb_IDcode, :sIdCode ],
484
+ [ 47, 50, Pdb_IDcode, :sIdCode ],
485
+ [ 52, 55, Pdb_IDcode, :sIdCode ],
486
+ [ 57, 60, Pdb_IDcode, :sIdCode ],
487
+ [ 62, 65, Pdb_IDcode, :sIdCode ],
488
+ [ 67, 70, Pdb_IDcode, :sIdCode ]
489
+ )
490
+
491
+ # 'JRNL' is defined below
492
+ JRNL = nil
493
+
494
+ # 'REMARK' is defined below
495
+ REMARK = nil
496
+
497
+ DBREF =
498
+ def_rec([ 8, 11, Pdb_IDcode, :idCode ],
499
+ [ 13, 13, Pdb_Character, :chainID ],
500
+ [ 15, 18, Pdb_Integer, :seqBegin ],
501
+ [ 19, 19, Pdb_AChar, :insertBegin ],
502
+ [ 21, 24, Pdb_Integer, :seqEnd ],
503
+ [ 25, 25, Pdb_AChar, :insertEnd ],
504
+ [ 27, 32, Pdb_String, :database ], #Pdb_LString
505
+ [ 34, 41, Pdb_String, :dbAccession ], #Pdb_LString
506
+ [ 43, 54, Pdb_String, :dbIdCode ], #Pdb_LString
507
+ [ 56, 60, Pdb_Integer, :dbseqBegin ],
508
+ [ 61, 61, Pdb_AChar, :idbnsBeg ],
509
+ [ 63, 67, Pdb_Integer, :dbseqEnd ],
510
+ [ 68, 68, Pdb_AChar, :dbinsEnd ]
511
+ )
512
+
513
+ SEQADV =
514
+ def_rec([ 8, 11, Pdb_IDcode, :idCode ],
515
+ [ 13, 15, Pdb_Residue_name, :resName ],
516
+ [ 17, 17, Pdb_Character, :chainID ],
517
+ [ 19, 22, Pdb_Integer, :seqNum ],
518
+ [ 23, 23, Pdb_AChar, :iCode ],
519
+ [ 25, 28, Pdb_String, :database ], #Pdb_LString
520
+ [ 30, 38, Pdb_String, :dbIdCode ], #Pdb_LString
521
+ [ 40, 42, Pdb_Residue_name, :dbRes ],
522
+ [ 44, 48, Pdb_Integer, :dbSeq ],
523
+ [ 50, 70, Pdb_LString, :conflict ]
524
+ )
525
+
526
+ SEQRES =
527
+ def_rec(#[ 9, 10, Pdb_Integer, :serNum ],
528
+ [ 9, 10, Pdb_Continuation, nil ],
529
+ [ 12, 12, Pdb_Character, :chainID ],
530
+ [ 14, 17, Pdb_Integer, :numRes ],
531
+ [ 20, 22, Pdb_Residue_name, :resName ],
532
+ [ 24, 26, Pdb_Residue_name, :resName ],
533
+ [ 28, 30, Pdb_Residue_name, :resName ],
534
+ [ 32, 34, Pdb_Residue_name, :resName ],
535
+ [ 36, 38, Pdb_Residue_name, :resName ],
536
+ [ 40, 42, Pdb_Residue_name, :resName ],
537
+ [ 44, 46, Pdb_Residue_name, :resName ],
538
+ [ 48, 50, Pdb_Residue_name, :resName ],
539
+ [ 52, 54, Pdb_Residue_name, :resName ],
540
+ [ 56, 58, Pdb_Residue_name, :resName ],
541
+ [ 60, 62, Pdb_Residue_name, :resName ],
542
+ [ 64, 66, Pdb_Residue_name, :resName ],
543
+ [ 68, 70, Pdb_Residue_name, :resName ]
544
+ )
545
+
546
+ MODRES =
547
+ def_rec([ 8, 11, Pdb_IDcode, :idCode ],
548
+ [ 13, 15, Pdb_Residue_name, :resName ],
549
+ [ 17, 17, Pdb_Character, :chainID ],
550
+ [ 19, 22, Pdb_Integer, :seqNum ],
551
+ [ 23, 23, Pdb_AChar, :iCode ],
552
+ [ 25, 27, Pdb_Residue_name, :stdRes ],
553
+ [ 30, 70, Pdb_String, :comment ]
554
+ )
555
+
556
+ HET =
557
+ def_rec([ 8, 10, Pdb_LString(3), :hetID ],
558
+ [ 13, 13, Pdb_Character, :ChainID ],
559
+ [ 14, 17, Pdb_Integer, :seqNum ],
560
+ [ 18, 18, Pdb_AChar, :iCode ],
561
+ [ 21, 25, Pdb_Integer, :numHetAtoms ],
562
+ [ 31, 70, Pdb_String, :text ]
563
+ )
564
+
565
+ HETNAM =
566
+ def_rec([ 9, 10, Pdb_Continuation, nil ],
567
+ [ 12, 14, Pdb_LString(3), :hetID ],
568
+ [ 16, 70, Pdb_String, :text ]
569
+ )
570
+
571
+ HETSYN =
572
+ def_rec([ 9, 10, Pdb_Continuation, nil ],
573
+ [ 12, 14, Pdb_LString(3), :hetID ],
574
+ [ 16, 70, Pdb_SList, :hetSynonyms ]
575
+ )
576
+
577
+ FORMUL =
578
+ def_rec([ 9, 10, Pdb_Integer, :compNum ],
579
+ [ 13, 15, Pdb_LString(3), :hetID ],
580
+ [ 17, 18, Pdb_Integer, :continuation ],
581
+ [ 19, 19, Pdb_Character, :asterisk ],
582
+ [ 20, 70, Pdb_String, :text ]
583
+ )
584
+
585
+ HELIX =
586
+ def_rec([ 8, 10, Pdb_Integer, :serNum ],
587
+ #[ 12, 14, Pdb_LString(3), :helixID ],
588
+ [ 12, 14, Pdb_StringRJ, :helixID ],
589
+ [ 16, 18, Pdb_Residue_name, :initResName ],
590
+ [ 20, 20, Pdb_Character, :initChainID ],
591
+ [ 22, 25, Pdb_Integer, :initSeqNum ],
592
+ [ 26, 26, Pdb_AChar, :initICode ],
593
+ [ 28, 30, Pdb_Residue_name, :endResName ],
594
+ [ 32, 32, Pdb_Character, :endChainID ],
595
+ [ 34, 37, Pdb_Integer, :endSeqNum ],
596
+ [ 38, 38, Pdb_AChar, :endICode ],
597
+ [ 39, 40, Pdb_Integer, :helixClass ],
598
+ [ 41, 70, Pdb_String, :comment ],
599
+ [ 72, 76, Pdb_Integer, :length ]
600
+ )
601
+
602
+ SHEET =
603
+ def_rec([ 8, 10, Pdb_Integer, :strand ],
604
+ #[ 12, 14, Pdb_LString(3), :sheetID ],
605
+ [ 12, 14, Pdb_StringRJ, :sheetID ],
606
+ [ 15, 16, Pdb_Integer, :numStrands ],
607
+ [ 18, 20, Pdb_Residue_name, :initResName ],
608
+ [ 22, 22, Pdb_Character, :initChainID ],
609
+ [ 23, 26, Pdb_Integer, :initSeqNum ],
610
+ [ 27, 27, Pdb_AChar, :initICode ],
611
+ [ 29, 31, Pdb_Residue_name, :endResName ],
612
+ [ 33, 33, Pdb_Character, :endChainID ],
613
+ [ 34, 37, Pdb_Integer, :endSeqNum ],
614
+ [ 38, 38, Pdb_AChar, :endICode ],
615
+ [ 39, 40, Pdb_Integer, :sense ],
616
+ [ 42, 45, Pdb_Atom, :curAtom ],
617
+ [ 46, 48, Pdb_Residue_name, :curResName ],
618
+ [ 50, 50, Pdb_Character, :curChainId ],
619
+ [ 51, 54, Pdb_Integer, :curResSeq ],
620
+ [ 55, 55, Pdb_AChar, :curICode ],
621
+ [ 57, 60, Pdb_Atom, :prevAtom ],
622
+ [ 61, 63, Pdb_Residue_name, :prevResName ],
623
+ [ 65, 65, Pdb_Character, :prevChainId ],
624
+ [ 66, 69, Pdb_Integer, :prevResSeq ],
625
+ [ 70, 70, Pdb_AChar, :prevICode ]
626
+ )
627
+
628
+ TURN =
629
+ def_rec([ 8, 10, Pdb_Integer, :seq ],
630
+ #[ 12, 14, Pdb_LString(3), :turnId ],
631
+ [ 12, 14, Pdb_StringRJ, :turnId ],
632
+ [ 16, 18, Pdb_Residue_name, :initResName ],
633
+ [ 20, 20, Pdb_Character, :initChainId ],
634
+ [ 21, 24, Pdb_Integer, :initSeqNum ],
635
+ [ 25, 25, Pdb_AChar, :initICode ],
636
+ [ 27, 29, Pdb_Residue_name, :endResName ],
637
+ [ 31, 31, Pdb_Character, :endChainId ],
638
+ [ 32, 35, Pdb_Integer, :endSeqNum ],
639
+ [ 36, 36, Pdb_AChar, :endICode ],
640
+ [ 41, 70, Pdb_String, :comment ]
641
+ )
642
+
643
+ SSBOND =
644
+ def_rec([ 8, 10, Pdb_Integer, :serNum ],
645
+ [ 12, 14, Pdb_LString(3), :pep1 ], # "CYS"
646
+ [ 16, 16, Pdb_Character, :chainID1 ],
647
+ [ 18, 21, Pdb_Integer, :seqNum1 ],
648
+ [ 22, 22, Pdb_AChar, :icode1 ],
649
+ [ 26, 28, Pdb_LString(3), :pep2 ], # "CYS"
650
+ [ 30, 30, Pdb_Character, :chainID2 ],
651
+ [ 32, 35, Pdb_Integer, :seqNum2 ],
652
+ [ 36, 36, Pdb_AChar, :icode2 ],
653
+ [ 60, 65, Pdb_SymOP, :sym1 ],
654
+ [ 67, 72, Pdb_SymOP, :sym2 ]
655
+ )
656
+
657
+ LINK =
658
+ def_rec([ 13, 16, Pdb_Atom, :name1 ],
659
+ [ 17, 17, Pdb_Character, :altLoc1 ],
660
+ [ 18, 20, Pdb_Residue_name, :resName1 ],
661
+ [ 22, 22, Pdb_Character, :chainID1 ],
662
+ [ 23, 26, Pdb_Integer, :resSeq1 ],
663
+ [ 27, 27, Pdb_AChar, :iCode1 ],
664
+ [ 43, 46, Pdb_Atom, :name2 ],
665
+ [ 47, 47, Pdb_Character, :altLoc2 ],
666
+ [ 48, 50, Pdb_Residue_name, :resName2 ],
667
+ [ 52, 52, Pdb_Character, :chainID2 ],
668
+ [ 53, 56, Pdb_Integer, :resSeq2 ],
669
+ [ 57, 57, Pdb_AChar, :iCode2 ],
670
+ [ 60, 65, Pdb_SymOP, :sym1 ],
671
+ [ 67, 72, Pdb_SymOP, :sym2 ]
672
+ )
673
+
674
+ HYDBND =
675
+ def_rec([ 13, 16, Pdb_Atom, :name1 ],
676
+ [ 17, 17, Pdb_Character, :altLoc1 ],
677
+ [ 18, 20, Pdb_Residue_name, :resName1 ],
678
+ [ 22, 22, Pdb_Character, :Chain1 ],
679
+ [ 23, 27, Pdb_Integer, :resSeq1 ],
680
+ [ 28, 28, Pdb_AChar, :ICode1 ],
681
+ [ 30, 33, Pdb_Atom, :nameH ],
682
+ [ 34, 34, Pdb_Character, :altLocH ],
683
+ [ 36, 36, Pdb_Character, :ChainH ],
684
+ [ 37, 41, Pdb_Integer, :resSeqH ],
685
+ [ 42, 42, Pdb_AChar, :iCodeH ],
686
+ [ 44, 47, Pdb_Atom, :name2 ],
687
+ [ 48, 48, Pdb_Character, :altLoc2 ],
688
+ [ 49, 51, Pdb_Residue_name, :resName2 ],
689
+ [ 53, 53, Pdb_Character, :chainID2 ],
690
+ [ 54, 58, Pdb_Integer, :resSeq2 ],
691
+ [ 59, 59, Pdb_AChar, :iCode2 ],
692
+ [ 60, 65, Pdb_SymOP, :sym1 ],
693
+ [ 67, 72, Pdb_SymOP, :sym2 ]
694
+ )
695
+
696
+ SLTBRG =
697
+ def_rec([ 13, 16, Pdb_Atom, :atom1 ],
698
+ [ 17, 17, Pdb_Character, :altLoc1 ],
699
+ [ 18, 20, Pdb_Residue_name, :resName1 ],
700
+ [ 22, 22, Pdb_Character, :chainID1 ],
701
+ [ 23, 26, Pdb_Integer, :resSeq1 ],
702
+ [ 27, 27, Pdb_AChar, :iCode1 ],
703
+ [ 43, 46, Pdb_Atom, :atom2 ],
704
+ [ 47, 47, Pdb_Character, :altLoc2 ],
705
+ [ 48, 50, Pdb_Residue_name, :resName2 ],
706
+ [ 52, 52, Pdb_Character, :chainID2 ],
707
+ [ 53, 56, Pdb_Integer, :resSeq2 ],
708
+ [ 57, 57, Pdb_AChar, :iCode2 ],
709
+ [ 60, 65, Pdb_SymOP, :sym1 ],
710
+ [ 67, 72, Pdb_SymOP, :sym2 ]
711
+ )
712
+
713
+ CISPEP =
714
+ def_rec([ 8, 10, Pdb_Integer, :serNum ],
715
+ [ 12, 14, Pdb_LString(3), :pep1 ],
716
+ [ 16, 16, Pdb_Character, :chainID1 ],
717
+ [ 18, 21, Pdb_Integer, :seqNum1 ],
718
+ [ 22, 22, Pdb_AChar, :icode1 ],
719
+ [ 26, 28, Pdb_LString(3), :pep2 ],
720
+ [ 30, 30, Pdb_Character, :chainID2 ],
721
+ [ 32, 35, Pdb_Integer, :seqNum2 ],
722
+ [ 36, 36, Pdb_AChar, :icode2 ],
723
+ [ 44, 46, Pdb_Integer, :modNum ],
724
+ [ 54, 59, Pdb_Real('6.2'), :measure ]
725
+ )
726
+
727
+ SITE =
728
+ def_rec([ 8, 10, Pdb_Integer, :seqNum ],
729
+ [ 12, 14, Pdb_LString(3), :siteID ],
730
+ [ 16, 17, Pdb_Integer, :numRes ],
731
+ [ 19, 21, Pdb_Residue_name, :resName1 ],
732
+ [ 23, 23, Pdb_Character, :chainID1 ],
733
+ [ 24, 27, Pdb_Integer, :seq1 ],
734
+ [ 28, 28, Pdb_AChar, :iCode1 ],
735
+ [ 30, 32, Pdb_Residue_name, :resName2 ],
736
+ [ 34, 34, Pdb_Character, :chainID2 ],
737
+ [ 35, 38, Pdb_Integer, :seq2 ],
738
+ [ 39, 39, Pdb_AChar, :iCode2 ],
739
+ [ 41, 43, Pdb_Residue_name, :resName3 ],
740
+ [ 45, 45, Pdb_Character, :chainID3 ],
741
+ [ 46, 49, Pdb_Integer, :seq3 ],
742
+ [ 50, 50, Pdb_AChar, :iCode3 ],
743
+ [ 52, 54, Pdb_Residue_name, :resName4 ],
744
+ [ 56, 56, Pdb_Character, :chainID4 ],
745
+ [ 57, 60, Pdb_Integer, :seq4 ],
746
+ [ 61, 61, Pdb_AChar, :iCode4 ]
747
+ )
748
+
749
+ CRYST1 =
750
+ def_rec([ 7, 15, Pdb_Real('9.3'), :a ],
751
+ [ 16, 24, Pdb_Real('9.3'), :b ],
752
+ [ 25, 33, Pdb_Real('9.3'), :c ],
753
+ [ 34, 40, Pdb_Real('7.2'), :alpha ],
754
+ [ 41, 47, Pdb_Real('7.2'), :beta ],
755
+ [ 48, 54, Pdb_Real('7.2'), :gamma ],
756
+ [ 56, 66, Pdb_LString, :sGroup ],
757
+ [ 67, 70, Pdb_Integer, :z ]
758
+ )
759
+
760
+ # ORIGXn n=1, 2, or 3
761
+ ORIGX1 =
762
+ def_rec([ 11, 20, Pdb_Real('10.6'), :On1 ],
763
+ [ 21, 30, Pdb_Real('10.6'), :On2 ],
764
+ [ 31, 40, Pdb_Real('10.6'), :On3 ],
765
+ [ 46, 55, Pdb_Real('10.5'), :Tn ]
766
+ )
767
+
768
+ ORIGX2 = new_inherit(ORIGX1)
769
+ ORIGX3 = new_inherit(ORIGX1)
770
+
771
+ # SCALEn n=1, 2, or 3
772
+ SCALE1 =
773
+ def_rec([ 11, 20, Pdb_Real('10.6'), :Sn1 ],
774
+ [ 21, 30, Pdb_Real('10.6'), :Sn2 ],
775
+ [ 31, 40, Pdb_Real('10.6'), :Sn3 ],
776
+ [ 46, 55, Pdb_Real('10.5'), :Un ]
777
+ )
778
+
779
+ SCALE2 = new_inherit(SCALE1)
780
+ SCALE3 = new_inherit(SCALE1)
781
+
782
+ # MTRIXn n=1,2, or 3
783
+ MTRIX1 =
784
+ def_rec([ 8, 10, Pdb_Integer, :serial ],
785
+ [ 11, 20, Pdb_Real('10.6'), :Mn1 ],
786
+ [ 21, 30, Pdb_Real('10.6'), :Mn2 ],
787
+ [ 31, 40, Pdb_Real('10.6'), :Mn3 ],
788
+ [ 46, 55, Pdb_Real('10.5'), :Vn ],
789
+ [ 60, 60, Pdb_Integer, :iGiven ]
790
+ )
791
+
792
+ MTRIX2 = new_inherit(MTRIX1)
793
+ MTRIX3 = new_inherit(MTRIX1)
794
+
795
+ TVECT =
796
+ def_rec([ 8, 10, Pdb_Integer, :serial ],
797
+ [ 11, 20, Pdb_Real('10.5'), :t1 ],
798
+ [ 21, 30, Pdb_Real('10.5'), :t2 ],
799
+ [ 31, 40, Pdb_Real('10.5'), :t3 ],
800
+ [ 41, 70, Pdb_String, :text ]
801
+ )
802
+
803
+ MODEL =
804
+ def_rec([ 11, 14, Pdb_Integer, :serial ]
805
+ )
806
+ # ChangeLog: model_serial are changed to serial
807
+
808
+ ATOM =
809
+ new_direct([ 7, 11, Pdb_Integer, :serial ],
810
+ [ 13, 16, Pdb_Atom, :name ],
811
+ [ 17, 17, Pdb_Character, :altLoc ],
812
+ [ 18, 20, Pdb_Residue_name, :resName ],
813
+ [ 22, 22, Pdb_Character, :chainID ],
814
+ [ 23, 26, Pdb_Integer, :resSeq ],
815
+ [ 27, 27, Pdb_AChar, :iCode ],
816
+ [ 31, 38, Pdb_Real('8.3'), :x ],
817
+ [ 39, 46, Pdb_Real('8.3'), :y ],
818
+ [ 47, 54, Pdb_Real('8.3'), :z ],
819
+ [ 55, 60, Pdb_Real('6.2'), :occupancy ],
820
+ [ 61, 66, Pdb_Real('6.2'), :tempFactor ],
821
+ [ 73, 76, Pdb_LString(4), :segID ],
822
+ [ 77, 78, Pdb_LString(2), :element ],
823
+ [ 79, 80, Pdb_LString(2), :charge ]
824
+ )
825
+
826
+ class ATOM
827
+
828
+ include Utils
829
+ include Comparable
830
+
831
+ # for backward compatibility
832
+ alias occ occupancy
833
+ alias bfac tempFactor
834
+
835
+ # residue the atom belongs to.
836
+ attr_accessor :residue
837
+
838
+ #Returns a Coordinate class instance of the xyz positions
839
+ def xyz
840
+ Coordinate[ x, y, z ]
841
+ end
842
+
843
+ #Returns an array of the xyz positions
844
+ def to_a
845
+ [ x, y, z ]
846
+ end
847
+
848
+ #Sorts based on serial numbers
849
+ def <=>(other)
850
+ return serial <=> other.serial
851
+ end
852
+
853
+ def do_parse
854
+ return self if @parsed
855
+ self.serial = @str[6..10].to_i
856
+ self.name = @str[12..15]
857
+ self.altLoc = @str[16..16]
858
+ self.resName = @str[17..19].rstrip
859
+ self.chainID = @str[21..21]
860
+ self.resSeq = @str[22..25].to_i
861
+ self.iCode = @str[26..26]
862
+ self.x = @str[30..37].to_f
863
+ self.y = @str[38..45].to_f
864
+ self.z = @str[46..53].to_f
865
+ self.occupancy = @str[54..59].to_f
866
+ self.tempFactor = @str[60..65].to_f
867
+ self.segID = @str[72..75]
868
+ self.element = @str[76..77]
869
+ self.charge = @str[78..79]
870
+ @parsed = true
871
+ self
872
+ end
873
+ end #class ATOM
874
+
875
+ SIGATM =
876
+ def_rec([ 7, 11, Pdb_Integer, :serial ],
877
+ [ 13, 16, Pdb_Atom, :name ],
878
+ [ 17, 17, Pdb_Character, :altLoc ],
879
+ [ 18, 20, Pdb_Residue_name, :resName ],
880
+ [ 22, 22, Pdb_Character, :chainID ],
881
+ [ 23, 26, Pdb_Integer, :resSeq ],
882
+ [ 27, 27, Pdb_AChar, :iCode ],
883
+ [ 31, 38, Pdb_Real('8.3'), :sigX ],
884
+ [ 39, 46, Pdb_Real('8.3'), :sigY ],
885
+ [ 47, 54, Pdb_Real('8.3'), :sigZ ],
886
+ [ 55, 60, Pdb_Real('6.2'), :sigOcc ],
887
+ [ 61, 66, Pdb_Real('6.2'), :sigTemp ],
888
+ [ 73, 76, Pdb_LString(4), :segID ],
889
+ [ 77, 78, Pdb_LString(2), :element ],
890
+ [ 79, 80, Pdb_LString(2), :charge ]
891
+ )
892
+
893
+ ANISOU =
894
+ def_rec([ 7, 11, Pdb_Integer, :serial ],
895
+ [ 13, 16, Pdb_Atom, :name ],
896
+ [ 17, 17, Pdb_Character, :altLoc ],
897
+ [ 18, 20, Pdb_Residue_name, :resName ],
898
+ [ 22, 22, Pdb_Character, :chainID ],
899
+ [ 23, 26, Pdb_Integer, :resSeq ],
900
+ [ 27, 27, Pdb_AChar, :iCode ],
901
+ [ 29, 35, Pdb_Integer, :U11 ],
902
+ [ 36, 42, Pdb_Integer, :U22 ],
903
+ [ 43, 49, Pdb_Integer, :U33 ],
904
+ [ 50, 56, Pdb_Integer, :U12 ],
905
+ [ 57, 63, Pdb_Integer, :U13 ],
906
+ [ 64, 70, Pdb_Integer, :U23 ],
907
+ [ 73, 76, Pdb_LString(4), :segID ],
908
+ [ 77, 78, Pdb_LString(2), :element ],
909
+ [ 79, 80, Pdb_LString(2), :charge ]
910
+ )
911
+
912
+ SIGUIJ =
913
+ def_rec([ 7, 11, Pdb_Integer, :serial ],
914
+ [ 13, 16, Pdb_Atom, :name ],
915
+ [ 17, 17, Pdb_Character, :altLoc ],
916
+ [ 18, 20, Pdb_Residue_name, :resName ],
917
+ [ 22, 22, Pdb_Character, :chainID ],
918
+ [ 23, 26, Pdb_Integer, :resSeq ],
919
+ [ 27, 27, Pdb_AChar, :iCode ],
920
+ [ 29, 35, Pdb_Integer, :SigmaU11 ],
921
+ [ 36, 42, Pdb_Integer, :SigmaU22 ],
922
+ [ 43, 49, Pdb_Integer, :SigmaU33 ],
923
+ [ 50, 56, Pdb_Integer, :SigmaU12 ],
924
+ [ 57, 63, Pdb_Integer, :SigmaU13 ],
925
+ [ 64, 70, Pdb_Integer, :SigmaU23 ],
926
+ [ 73, 76, Pdb_LString(4), :segID ],
927
+ [ 77, 78, Pdb_LString(2), :element ],
928
+ [ 79, 80, Pdb_LString(2), :charge ]
929
+ )
930
+
931
+ TER =
932
+ def_rec([ 7, 11, Pdb_Integer, :serial ],
933
+ [ 18, 20, Pdb_Residue_name, :resName ],
934
+ [ 22, 22, Pdb_Character, :chainID ],
935
+ [ 23, 26, Pdb_Integer, :resSeq ],
936
+ [ 27, 27, Pdb_AChar, :iCode ]
937
+ )
938
+
939
+ #HETATM =
940
+ # new_direct([ 7, 11, Pdb_Integer, :serial ],
941
+ # [ 13, 16, Pdb_Atom, :name ],
942
+ # [ 17, 17, Pdb_Character, :altLoc ],
943
+ # [ 18, 20, Pdb_Residue_name, :resName ],
944
+ # [ 22, 22, Pdb_Character, :chainID ],
945
+ # [ 23, 26, Pdb_Integer, :resSeq ],
946
+ # [ 27, 27, Pdb_AChar, :iCode ],
947
+ # [ 31, 38, Pdb_Real('8.3'), :x ],
948
+ # [ 39, 46, Pdb_Real('8.3'), :y ],
949
+ # [ 47, 54, Pdb_Real('8.3'), :z ],
950
+ # [ 55, 60, Pdb_Real('6.2'), :occupancy ],
951
+ # [ 61, 66, Pdb_Real('6.2'), :tempFactor ],
952
+ # [ 73, 76, Pdb_LString(4), :segID ],
953
+ # [ 77, 78, Pdb_LString(2), :element ],
954
+ # [ 79, 80, Pdb_LString(2), :charge ]
955
+ # )
956
+
957
+ HETATM = new_inherit(ATOM)
958
+
959
+ ENDMDL =
960
+ def_rec([ 2, 1, Pdb_Integer, :serial ] # dummy field (always 0)
961
+ )
962
+
963
+ CONECT =
964
+ def_rec([ 7, 11, Pdb_Integer, :serial ],
965
+ [ 12, 16, Pdb_Integer, :serial ],
966
+ [ 17, 21, Pdb_Integer, :serial ],
967
+ [ 22, 26, Pdb_Integer, :serial ],
968
+ [ 27, 31, Pdb_Integer, :serial ],
969
+ [ 32, 36, Pdb_Integer, :serial ],
970
+ [ 37, 41, Pdb_Integer, :serial ],
971
+ [ 42, 46, Pdb_Integer, :serial ],
972
+ [ 47, 51, Pdb_Integer, :serial ],
973
+ [ 52, 56, Pdb_Integer, :serial ],
974
+ [ 57, 61, Pdb_Integer, :serial ]
975
+ )
976
+
977
+ MASTER =
978
+ def_rec([ 11, 15, Pdb_Integer, :numRemark ],
979
+ [ 16, 20, Pdb_Integer, "0" ],
980
+ [ 21, 25, Pdb_Integer, :numHet ],
981
+ [ 26, 30, Pdb_Integer, :numHelix ],
982
+ [ 31, 35, Pdb_Integer, :numSheet ],
983
+ [ 36, 40, Pdb_Integer, :numTurn ],
984
+ [ 41, 45, Pdb_Integer, :numSite ],
985
+ [ 46, 50, Pdb_Integer, :numXform ],
986
+ [ 51, 55, Pdb_Integer, :numCoord ],
987
+ [ 56, 60, Pdb_Integer, :numTer ],
988
+ [ 61, 65, Pdb_Integer, :numConect ],
989
+ [ 66, 70, Pdb_Integer, :numSeq ]
990
+ )
991
+
992
+ class Jrnl < self
993
+ # subrecord of JRNL
994
+ # 13, 16
995
+ AUTH =
996
+ def_rec([ 13, 16, Pdb_String, :sub_record ], # "AUTH"
997
+ [ 17, 18, Pdb_Continuation, nil ],
998
+ [ 20, 70, Pdb_List, :authorList ]
999
+ )
1000
+
1001
+ TITL =
1002
+ def_rec([ 13, 16, Pdb_String, :sub_record ], # "TITL"
1003
+ [ 17, 18, Pdb_Continuation, nil ],
1004
+ [ 20, 70, Pdb_LString, :title ]
1005
+ )
1006
+
1007
+ EDIT =
1008
+ def_rec([ 13, 16, Pdb_String, :sub_record ], # "EDIT"
1009
+ [ 17, 18, Pdb_Continuation, nil ],
1010
+ [ 20, 70, Pdb_List, :editorList ]
1011
+ )
1012
+
1013
+ REF =
1014
+ def_rec([ 13, 16, Pdb_String, :sub_record ], # "REF"
1015
+ [ 17, 18, Pdb_Continuation, nil ],
1016
+ [ 20, 47, Pdb_LString, :pubName ],
1017
+ [ 50, 51, Pdb_LString(2), "V." ],
1018
+ [ 52, 55, Pdb_String, :volume ],
1019
+ [ 57, 61, Pdb_String, :page ],
1020
+ [ 63, 66, Pdb_Integer, :year ]
1021
+ )
1022
+
1023
+ PUBL =
1024
+ def_rec([ 13, 16, Pdb_String, :sub_record ], # "PUBL"
1025
+ [ 17, 18, Pdb_Continuation, nil ],
1026
+ [ 20, 70, Pdb_LString, :pub ]
1027
+ )
1028
+
1029
+ REFN =
1030
+ def_rec([ 13, 16, Pdb_String, :sub_record ], # "REFN"
1031
+ [ 20, 23, Pdb_LString(4), "ASTM" ],
1032
+ [ 25, 30, Pdb_LString(6), :astm ],
1033
+ [ 33, 34, Pdb_LString(2), :country ],
1034
+ [ 36, 39, Pdb_LString(4), :BorS ], # "ISBN" or "ISSN"
1035
+ [ 41, 65, Pdb_LString, :isbn ],
1036
+ [ 67, 70, Pdb_LString(4), :coden ] # "0353" for unpublished
1037
+ )
1038
+
1039
+ # default or unknown record
1040
+ # ''
1041
+ Default =
1042
+ def_rec([ 13, 16, Pdb_String, :sub_record ]) # ""
1043
+
1044
+ Definition = create_definition_hash
1045
+ end #class JRNL
1046
+
1047
+ class Remark1 < self
1048
+ # 13, 16
1049
+ EFER =
1050
+ def_rec([ 8, 10, Pdb_Integer, :remarkNum ], # "1"
1051
+ [ 12, 20, Pdb_String, :sub_record ], # "REFERENCE"
1052
+ [ 22, 70, Pdb_Integer, :refNum ]
1053
+ )
1054
+
1055
+ AUTH =
1056
+ def_rec([ 8, 10, Pdb_Integer, :remarkNum ], # "1"
1057
+ [ 13, 16, Pdb_String, :sub_record ], # "AUTH"
1058
+ [ 17, 18, Pdb_Continuation, nil ],
1059
+ [ 20, 70, Pdb_List, :authorList ]
1060
+ )
1061
+
1062
+ TITL =
1063
+ def_rec([ 8, 10, Pdb_Integer, :remarkNum ], # "1"
1064
+ [ 13, 16, Pdb_String, :sub_record ], # "TITL"
1065
+ [ 17, 18, Pdb_Continuation, nil ],
1066
+ [ 20, 70, Pdb_LString, :title ]
1067
+ )
1068
+
1069
+ EDIT =
1070
+ def_rec([ 8, 10, Pdb_Integer, :remarkNum ], # "1"
1071
+ [ 13, 16, Pdb_String, :sub_record ], # "EDIT"
1072
+ [ 17, 18, Pdb_Continuation, nil ],
1073
+ [ 20, 70, Pdb_LString, :editorList ]
1074
+ )
1075
+
1076
+ REF =
1077
+ def_rec([ 8, 10, Pdb_Integer, :remarkNum ], # "1"
1078
+ [ 13, 16, Pdb_LString(3), :sub_record ], # "REF"
1079
+ [ 17, 18, Pdb_Continuation, nil ],
1080
+ [ 20, 47, Pdb_LString, :pubName ],
1081
+ [ 50, 51, Pdb_LString(2), "V." ],
1082
+ [ 52, 55, Pdb_String, :volume ],
1083
+ [ 57, 61, Pdb_String, :page ],
1084
+ [ 63, 66, Pdb_Integer, :year ]
1085
+ )
1086
+
1087
+ PUBL =
1088
+ def_rec([ 8, 10, Pdb_Integer, :remarkNum ], # "1"
1089
+ [ 13, 16, Pdb_String, :sub_record ], # "PUBL"
1090
+ [ 17, 18, Pdb_Continuation, nil ],
1091
+ [ 20, 70, Pdb_LString, :pub ]
1092
+ )
1093
+
1094
+ REFN =
1095
+ def_rec([ 8, 10, Pdb_Integer, :remarkNum ], # "1"
1096
+ [ 13, 16, Pdb_String, :sub_record ], # "REFN"
1097
+ [ 20, 23, Pdb_LString(4), "ASTM" ],
1098
+ [ 25, 30, Pdb_LString, :astm ],
1099
+ [ 33, 34, Pdb_LString, :country ],
1100
+ [ 36, 39, Pdb_LString(4), :BorS ],
1101
+ [ 41, 65, Pdb_LString, :isbn ],
1102
+ [ 68, 70, Pdb_LString(4), :coden ]
1103
+ )
1104
+
1105
+ Default =
1106
+ def_rec([ 8, 10, Pdb_Integer, :remarkNum ], # "1"
1107
+ [ 13, 16, Pdb_String, :sub_record ] # ""
1108
+ )
1109
+
1110
+ Definition = create_definition_hash
1111
+ end #class Remark1
1112
+
1113
+ class Remark2 < self
1114
+ # 29, 38 == 'ANGSTROMS.'
1115
+ ANGSTROMS =
1116
+ def_rec([ 8, 10, Pdb_Integer, :remarkNum ], # "2"
1117
+ [ 12, 22, Pdb_LString(11), :sub_record ], # "RESOLUTION."
1118
+ [ 23, 27, Pdb_Real('5.2'), :resolution ],
1119
+ [ 29, 38, Pdb_LString(10), "ANGSTROMS." ]
1120
+ )
1121
+
1122
+ # 23, 38 == ' NOT APPLICABLE.'
1123
+ NOT_APPLICABLE =
1124
+ def_rec([ 8, 10, Pdb_Integer, :remarkNum ], # "2"
1125
+ [ 12, 22, Pdb_LString(11), :sub_record ], # "RESOLUTION."
1126
+ [ 23, 38, Pdb_LString(16), :resolution ], # " NOT APPLICABLE."
1127
+ [ 41, 70, Pdb_String, :comment ]
1128
+ )
1129
+
1130
+ # others
1131
+ Default =
1132
+ def_rec([ 8, 10, Pdb_Integer, :remarkNum ], # "2"
1133
+ [ 12, 22, Pdb_LString(11), :sub_record ], # "RESOLUTION."
1134
+ [ 24, 70, Pdb_String, :comment ]
1135
+ )
1136
+ end #class Remark2
1137
+
1138
+ RemarkN =
1139
+ def_rec([ 8, 10, Pdb_Integer, :remarkNum ],
1140
+ [ 12, 70, Pdb_LString, :text ]
1141
+ )
1142
+
1143
+ Default = def_rec([ 8, 70, Pdb_LString, :text ])
1144
+
1145
+ Definition = create_definition_hash
1146
+
1147
+ # because END is a reserved word of Ruby, it is separately
1148
+ # added to the hash
1149
+ End =
1150
+ def_rec([ 2, 1, Pdb_Integer, :serial ]) # dummy field (always 0)
1151
+
1152
+ Definition['END'] = End
1153
+
1154
+ # Basically just look up the class in Definition hash
1155
+ # do some munging for JRNL and REMARK
1156
+ def self.get_record_class(str)
1157
+ t = fetch_record_name(str)
1158
+ if d = Definition[t] then
1159
+ return d
1160
+ end
1161
+ case t
1162
+ when 'JRNL'
1163
+ d = Jrnl::Definition[str[12..15].to_s.strip]
1164
+ when 'REMARK'
1165
+ case str[7..9].to_i
1166
+ when 1
1167
+ d = Remark1::Definition[str[12..15].to_s.strip]
1168
+ when 2
1169
+ if str[28..37] == 'ANGSTROMS.' then
1170
+ d = Remark2::ANGSTROMS
1171
+ elsif str[22..37] == ' NOT APPLICABLE.' then
1172
+ d = Remark2::NOT_APPLICABLE
1173
+ else
1174
+ d = Remark2::Default
1175
+ end
1176
+ else
1177
+ d = RemarkN
1178
+ end
1179
+ else
1180
+ # unknown field
1181
+ d = Default
1182
+ end
1183
+ return d
1184
+ end
1185
+ end #class Record
1186
+
1187
+ Coordinate_fileds = {
1188
+ 'MODEL' => true,
1189
+ 'ENDMDL' => true,
1190
+ 'ATOM' => true,
1191
+ 'HETATM' => true,
1192
+ 'SIGATM' => true,
1193
+ 'SIGUIJ' => true,
1194
+ 'ANISOU' => true,
1195
+ 'TER' => true,
1196
+ }
1197
+
1198
+ def initialize(str)
1199
+ #Aha! Our entry into the world of PDB parsing, we initialise a PDB
1200
+ #object with the whole PDB file as a string
1201
+ #each PDB has an array of the lines of the original file
1202
+ #a bit memory-tastic! A hash of records and an array of models
1203
+ #also has an id
1204
+
1205
+ @data = str.split(/[\r\n]+/)
1206
+ @hash = {}
1207
+ @models = []
1208
+ @id = nil
1209
+
1210
+ #Flag to say whether the current line is part of a continuation
1211
+ cont = false
1212
+
1213
+ #Empty current model
1214
+ cModel = Bio::PDB::Model.new
1215
+ cChain = Bio::PDB::Chain.new
1216
+ cResidue = Bio::PDB::Residue.new
1217
+
1218
+ #Goes through each line and replace that line with a PDB::Record
1219
+ @data.collect! do |line|
1220
+ #Go to next if the previous line was contiunation able, and
1221
+ #add_continuation returns true. Line is added by add_continuation
1222
+ next if cont and cont = cont.add_continuation(line)
1223
+
1224
+ #Make the new record
1225
+ f = Record.get_record_class(line).new.initialize_from_string(line)
1226
+ #p f
1227
+ #Set cont
1228
+ cont = f if f.continue?
1229
+ #Set the hash to point to this record either by adding to an
1230
+ #array, or on it's own
1231
+ key = f.record_name
1232
+ if a = @hash[key] then
1233
+ a << f
1234
+ else
1235
+ @hash[key] = [ f ]
1236
+ end
1237
+
1238
+ # Do something for ATOM and HETATM
1239
+ case key
1240
+ when 'ATOM'
1241
+ residueID = "#{f.resSeq}#{f.iCode.strip}".strip
1242
+ #p f
1243
+
1244
+ if f.chainID == cChain.id
1245
+ chain = cChain
1246
+ elsif !(chain = cModel[f.chainID])
1247
+ #If we don't have chain, add a new chain
1248
+ newChain = Chain.new(f.chainID, cModel)
1249
+ cModel.addChain(newChain)
1250
+ cChain = newChain
1251
+ chain = newChain
1252
+ end
1253
+
1254
+ if !newChain and residueID == cResidue.id
1255
+ residue = cResidue
1256
+ elsif newChain or !(residue = chain[residueID])
1257
+ newResidue = Residue.new(f.resName, f.resSeq, f.iCode, chain)
1258
+ chain.addResidue(newResidue)
1259
+ cResidue = newResidue
1260
+ residue = newResidue
1261
+ end
1262
+
1263
+ f.residue = residue
1264
+ residue.addAtom(f)
1265
+
1266
+ when 'HETATM'
1267
+
1268
+ #Each model has a special solvent chain
1269
+ #any chain id with the solvent is lost
1270
+ #I can fix this if really needed
1271
+ if f.resName == 'HOH'
1272
+ solvent = Residue.new(f.resName, f.resSeq, f.iCode,
1273
+ cModel.solvent, true)
1274
+ #p solvent
1275
+ f.residue = solvent
1276
+ solvent.addAtom(f)
1277
+ cModel.addSolvent(solvent)
1278
+
1279
+ else
1280
+
1281
+ #Make residue we add 'LIGAND' to the id if it's a HETATM
1282
+ #I think this is neccessary because some PDB files reuse
1283
+ #numbers for HETATMS
1284
+ residueID = "#{f.resSeq}#{f.iCode.strip}".strip
1285
+ residueID = "LIGAND" + residueID
1286
+ #p f
1287
+ #p residueID
1288
+
1289
+ if f.chainID == cChain.id
1290
+ chain = cChain
1291
+ elsif !(chain = cModel[f.chainID])
1292
+ #If we don't have chain, add a new chain
1293
+ newChain = Chain.new(f.chainID, cModel)
1294
+ cModel.addChain(newChain)
1295
+ cChain = newChain
1296
+ chain = newChain
1297
+ end
1298
+
1299
+ if !newChain and residueID == cResidue.id
1300
+ residue = cResidue
1301
+ elsif newChain or !(residue = chain[residueID])
1302
+ newResidue = Residue.new(f.resName, f.resSeq, f.iCode,
1303
+ chain, true)
1304
+ chain.addLigand(newResidue)
1305
+ cResidue = newResidue
1306
+ residue = newResidue
1307
+ end
1308
+
1309
+ f.residue = residue
1310
+ residue.addAtom(f)
1311
+
1312
+ end
1313
+
1314
+ when 'MODEL'
1315
+ if cModel.model_serial
1316
+ self.addModel(cModel)
1317
+ end
1318
+ model_serial = line[6,5]
1319
+ cModel = Model.new(model_serial)
1320
+ end
1321
+ f
1322
+ end #each
1323
+ #At the end we need to add the final model
1324
+ self.addModel(cModel)
1325
+ @data.compact!
1326
+ end #def initialize
1327
+
1328
+ attr_reader :data, :hash
1329
+
1330
+ #Adds a Bio::Model to the current strucutre
1331
+ def addModel(model)
1332
+ raise "Expecting a Bio::PDB::Model" if not model.is_a? Bio::PDB::Model
1333
+ @models.push(model)
1334
+ self
1335
+ end
1336
+
1337
+ #Iterates over the models
1338
+ def each
1339
+ @models.each{ |model| yield model }
1340
+ end
1341
+ #Alias needed for Bio::PDB::ModelFinder
1342
+ alias each_model each
1343
+
1344
+ #Provides keyed access to the models based on serial number
1345
+ #returns nil if it's not there (should it raise an exception?)
1346
+ def [](key)
1347
+ @models.find{ |model| key == model.model_serial }
1348
+ end
1349
+
1350
+ #Stringifies to a list of atom records - we could add the annotation
1351
+ #as well if needed
1352
+ def to_s
1353
+ string = ""
1354
+ @models.each{ |model| string << model.to_s }
1355
+ string << "END"
1356
+ return string
1357
+ end
1358
+
1359
+ #Makes a hash out of an array of PDB::Records and some kind of symbol
1360
+ #.__send__ invokes the method specified by the symbol.
1361
+ #Essentially it ends up with a hash with keys given in the sub_record
1362
+ #Not sure I fully understand this
1363
+ def make_hash(ary, meth)
1364
+ h = {}
1365
+ ary.each do |f|
1366
+ k = f.__send__(meth)
1367
+ h[k] = [] unless h.has_key?(k)
1368
+ h[k] << f
1369
+ end
1370
+ h
1371
+ end
1372
+ private :make_hash
1373
+
1374
+ #Takes an array and returns another array of PDB::Records
1375
+ def make_grouping(ary, meth)
1376
+ a = []
1377
+ k_prev = nil
1378
+ ary.each do |f|
1379
+ k = f.__send__(meth)
1380
+ if k_prev and k_prev == k then
1381
+ a.last << f
1382
+ else
1383
+ a << []
1384
+ a.last << f
1385
+ end
1386
+ k_prev = k
1387
+ end
1388
+ a
1389
+ end
1390
+ private :make_grouping
1391
+
1392
+ def record(name)
1393
+ @hash[name]
1394
+ end
1395
+
1396
+ # PDB original methods
1397
+ #Returns a hash of the REMARK records based on the remarkNum
1398
+ def remark(nn = nil)
1399
+ unless defined?(@remark)
1400
+ h = make_hash(self.record('REMARK'), :remarkNum)
1401
+ h.each do |i, a|
1402
+ a.shift # remove first record (= space only)
1403
+ if i != 1 and i != 2 then
1404
+ a.collect! { |f| f.text.gsub(/\s+\z/, '') }
1405
+ end
1406
+ end
1407
+ @remark = h
1408
+ end
1409
+ nn ? @remark[nn] : @remark
1410
+ end
1411
+
1412
+ #Returns a hash of journal entries
1413
+ def jrnl(sub_record = nil)
1414
+ unless defined?(@jrnl)
1415
+ @jrnl = make_hash(self.record('JRNL'), :sub_record)
1416
+ end
1417
+ sub_record ? @jrnl[sub_record] : @jrnl
1418
+ end
1419
+
1420
+ #Finding methods - just grabs the record with the appropriate id
1421
+ #or returns and array of all of them
1422
+ def helix(helixID = nil)
1423
+ if helixID then
1424
+ self.record('HELIX').find { |f| f.helixID == helixID }
1425
+ else
1426
+ self.record('HELIX')
1427
+ end
1428
+ end
1429
+
1430
+ def turn(turnId = nil)
1431
+ if turnId then
1432
+ self.record('TURN').find { |f| f.turnId == turnId }
1433
+ else
1434
+ self.record('TURN')
1435
+ end
1436
+ end
1437
+
1438
+ def sheet(sheetID = nil)
1439
+ unless defined?(@sheet)
1440
+ @sheet = make_grouping(self.record('SHEET'), :sheetID)
1441
+ end
1442
+ if sheetID then
1443
+ @sheet.find_all { |f| f.first.sheetID == sheetID }
1444
+ else
1445
+ @sheet
1446
+ end
1447
+ end
1448
+
1449
+ def ssbond
1450
+ self.record('SSBOND')
1451
+ end
1452
+
1453
+ #Get seqres - we get this to return a nice Bio::Seq object
1454
+ def seqres(chainID = nil)
1455
+ unless defined?(@seqres)
1456
+ h = make_hash(self.record('SEQRES'), :chainID)
1457
+ newHash = {}
1458
+ h.each do |k, a|
1459
+ a.collect! { |f| f.resName }
1460
+ a.flatten!
1461
+ a.collect!{ |aa|
1462
+ #aa is three letter code: i.e. ALA
1463
+ #need to look up with Ala
1464
+ aa = aa.capitalize
1465
+ aa = AminoAcid.names.invert[aa]
1466
+ aa = 'X' if aa.nil?
1467
+ }
1468
+ newHash[k] = Bio::Sequence::AA.new(a.to_s)
1469
+ end
1470
+ @seqres = newHash
1471
+ end
1472
+ if chainID then
1473
+ @seqres[chainID]
1474
+ else
1475
+ @seqres
1476
+ end
1477
+ end
1478
+
1479
+ def dbref(chainID = nil)
1480
+ if chainID then
1481
+ self.record('DBREF').find_all { |f| f.chainID == chainID }
1482
+ else
1483
+ self.record('DBREF')
1484
+ end
1485
+ end
1486
+
1487
+ def keywords
1488
+ self.record('KEYWDS').collect { |f| f.keywds }.flatten
1489
+ end
1490
+
1491
+ def classification
1492
+ self.record('HEADER').first.classification
1493
+ end
1494
+
1495
+ # Bio::DB methods
1496
+ def entry_id
1497
+ @id = self.record('HEADER').first.idCode unless @id
1498
+ @id
1499
+ end
1500
+
1501
+ def accession
1502
+ self.entry_id
1503
+ end
1504
+
1505
+ def definition
1506
+ self.record('TITLE').first.title
1507
+ end
1508
+
1509
+ def version
1510
+ self.record('REVDAT').first.modNum
1511
+ end
1512
+
1513
+ end #class PDB
1514
+
1515
+ end #module Bio
1516
+
1517
+ =begin
1518
+
1519
+ = Caution
1520
+
1521
+ This is a test version, specs of these class shall be changed.
1522
+
1523
+ = Bio::PDB < Bio::DB
1524
+
1525
+ PDB File format class.
1526
+
1527
+ --- Bio::PDB.new(str)
1528
+
1529
+ Creates new object.
1530
+
1531
+ --- Bio::PDB#entry_id
1532
+
1533
+ PDB identifier written in "HEADER". (e.g. 1A00)
1534
+
1535
+ --- Bio::PDB#accession
1536
+
1537
+ Same as Bio::PDB#entry_id
1538
+
1539
+ --- Bio::PDB#version
1540
+
1541
+ Current modification number in "REVDAT".
1542
+
1543
+ --- Bio::PDB#definition
1544
+
1545
+ Title of this entry in "TITLE".
1546
+
1547
+ --- Bio::PDB#keywords
1548
+
1549
+ Keywords in "KEYWDS".
1550
+ Returns an array of string.
1551
+
1552
+ --- Bio::PDB#classification
1553
+
1554
+ Classification in "HEADER".
1555
+
1556
+ --- Bio::PDB#record(name)
1557
+
1558
+ Gets all records whose record type is 'name'.
1559
+ Returns an array of Bio::PDB::Record.
1560
+
1561
+ --- Bio::PDB#remark(number = nil)
1562
+
1563
+ Gets REMARK records.
1564
+ If no arguments, it returns all REMARK records as a hash.
1565
+ If remark number is specified, returns only corresponding REMARK records.
1566
+ If number == 1 or 2 ("REMARK 1" or "REMARK 2"), returns an array
1567
+ of Bio::PDB::Record instances. Otherwise, returns an array of strings.
1568
+
1569
+ --- Bio::PDB#jrnl(sub_record = nil)
1570
+
1571
+ Gets JRNL records.
1572
+ If no arguments, it returns all JRNL records as a hash.
1573
+ If sub record name is specified, it returns only corresponding records
1574
+ as an array of Bio::PDB::Record instances.
1575
+
1576
+ --- Bio::PDB#seqres(chainID = nil)
1577
+
1578
+ Amino acid or nucleic acid sequence of backbone residues in "SEQRES".
1579
+ If chainID is given, it returns corresponding sequence as an array of string.
1580
+ Otherwise, returns a hash which contains all sequences.
1581
+
1582
+ --- Bio::PDB#helix(helixID = nil)
1583
+
1584
+ Gets HELIX records.
1585
+ If no arguments are given, it returns all HELIX records.
1586
+ (Returns an array of Bio::PDB::Record instances.)
1587
+ If helixID is given, it only returns records corresponding to given helixID.
1588
+ (Returns an Bio::PDB::Record instance.)
1589
+
1590
+ --- Bio::PDB#sheet(sheetID = nil)
1591
+
1592
+ Gets SHEET records.
1593
+ If no arguments are given, it returns all SHEET records as an array of
1594
+ arrays of Bio::PDB::Record instances.
1595
+ If sheetID is given, it returns an array of Bio::PDB::Record instances.
1596
+
1597
+ --- Bio::PDB#turn(turnId = nil)
1598
+
1599
+ Gets TURN records.
1600
+ If no arguments are given, it returns all TURN records.
1601
+ (Returns an array of Bio::PDB::Record instances.)
1602
+ If turnId is given, it only returns a record corresponding to given turnId.
1603
+ (Returns an Bio::PDB::Record instance.)
1604
+
1605
+ --- Bio::PDB.addModel(model)
1606
+
1607
+ Adds a model to the current structure
1608
+ Returns self
1609
+
1610
+ --- Bio::PDB.each
1611
+
1612
+ Iterates over each of the models in the structure
1613
+ Returns Bio::PDB::Models
1614
+
1615
+ --- Bio::PDB[](key)
1616
+
1617
+ Returns the model with the given key as serial number
1618
+
1619
+ --- Bio::PDB.to_s
1620
+
1621
+ Returns a string of Bio::PDB::Models. This propogates down the heirarchy
1622
+ till you get to Bio::PDB::Atoms which are outputed in PDB format
1623
+
1624
+
1625
+ = Bio::PDB::Record < Hash
1626
+
1627
+ A class for single PDB record.
1628
+ Basically, each line of a PDB file corresponds to an instance of the class.
1629
+ If continuation exists, multiple lines may correspond to single instance.
1630
+
1631
+ --- Bio::PDB::Record.new(line)
1632
+
1633
+ Internal use only.
1634
+ Creates a new instance.
1635
+
1636
+ --- Bio::PDB::Record#add_continuation(line)
1637
+
1638
+ Internal use only.
1639
+ If continuation is allowed and 'line' is a continuation of this record,
1640
+ it adds 'line' and returns self.
1641
+ Otherwise, returns false.
1642
+
1643
+ --- Bio::PDB::Record#original_data
1644
+
1645
+ Original text (except that "\n" are truncated) of this record.
1646
+ Returns an array of string.
1647
+
1648
+ --- Bio::PDB::Record#record_type
1649
+
1650
+ Record type of this record, e.g. "HEADER", "ATOM".
1651
+
1652
+ --- Bio::PDB::Record#do_parse
1653
+
1654
+ In order to speeding up processing of PDB File format,
1655
+ fields have not been parsed before calling this method.
1656
+
1657
+ If you want to use this class as a hash (not so recommended),
1658
+ you must call this method once.
1659
+
1660
+ When accessing via rec.xxxxx style (described below),
1661
+ do_parse is automatically called.
1662
+
1663
+ Returns self
1664
+
1665
+ --- Bio::PDB::Record#"anything"
1666
+
1667
+ Same as Bio::PDB::Record#[](:anything) after do_parse.
1668
+ For example, r.helixID is same as r.do_parse; r[:helixID] .
1669
+
1670
+
1671
+ = Bio::PDB::FieldDef
1672
+
1673
+ Internal use only.
1674
+ Format definition of each record.
1675
+
1676
+ = References
1677
+
1678
+ * ((<URL:http://www.rcsb.org/pdb/>))
1679
+ * PDB File Format Contents Guide Version 2.2 (20 December 1996)
1680
+ ((<URL:http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html>))
1681
+
1682
+ =end