bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,283 @@
1
+ #
2
+ # = bio/appl/targetp/report.rb - TargetP report class
3
+ #
4
+ # Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ # $Id: report.rb,v 1.7 2005/12/18 15:58:41 k Exp $
8
+ #
9
+ # == Description
10
+ #
11
+ # TargetP class for http://www.cbs.dtu.dk/services/TargetP/
12
+ #
13
+ # == Example
14
+ # == References
15
+ #--
16
+ #
17
+ # This library is free software; you can redistribute it and/or
18
+ # modify it under the terms of the GNU Lesser General Public
19
+ # License as published by the Free Software Foundation; either
20
+ # version 2 of the License, or (at your option) any later version.
21
+ #
22
+ # This library is distributed in the hope that it will be useful,
23
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
24
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25
+ # Lesser General Public License for more details.
26
+ #
27
+ # You should have received a copy of the GNU Lesser General Public
28
+ # License along with this library; if not, write to the Free Software
29
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30
+ #
31
+ #++
32
+ #
33
+
34
+ module Bio
35
+
36
+
37
+ class TargetP
38
+
39
+ # = A parser and container class for TargetP report.
40
+ class Report
41
+
42
+ # Delimiter
43
+ DELIMITER = "\n \n"
44
+
45
+ # Delimiter
46
+ RS = DELIMITER
47
+
48
+ # Returns the program version.
49
+ attr_reader :version
50
+
51
+ # Returns the query sequences.
52
+ attr_reader :query_sequences
53
+
54
+ # Returns 'included' or 'not included'.
55
+ # If the value is 'included', Bio::TargetP::Report#prediction['TPlen']
56
+ # contains a valid value.
57
+ attr_reader :cleavage_site_prediction
58
+
59
+ # Returns ``PLANT'' or ``NON-PLANT'' networks.
60
+ attr_reader :networks
61
+
62
+ # Returns a Hash of the prediction results.
63
+ #
64
+ # {"Name"=>"MGI_2141503", "Loc."=>"_", "RC"=>3, "SP"=>0.271,
65
+ # "other"=>0.844, "mTP"=>0.161, "cTP"=>0.031, "Length"=>640}
66
+ #
67
+ # Keys: Name, Len, SP, mTP, other, Loc, RC
68
+ # Optional key for PLANT networks: cTP
69
+ # Optional key in Cleavage site: TPlen
70
+ #
71
+ # Use 'Length' and 'Loc.' instead of 'Len' and 'Loc' respectively
72
+ # for the version 1.0 report.
73
+ attr_reader :prediction
74
+
75
+ # Returns a Hash of cutoff values.
76
+ attr_reader :cutoff
77
+
78
+ # Sets output report.
79
+ def initialize(str)
80
+ @version = nil
81
+ @query_sequences = nil
82
+ @cleavage_site_prediction = nil
83
+ @networks = nil
84
+ @prediction = {}
85
+ @cutoff = {}
86
+ parse_entry(str)
87
+ end
88
+
89
+ alias pred prediction
90
+
91
+ # Returns the name of query sequence.
92
+ def name
93
+ @prediction['Name']
94
+ end
95
+ alias entry_id name
96
+
97
+ # Returns length of query sequence.
98
+ def query_len
99
+ if @prediction['Len']
100
+ @prediction['Len']
101
+ else
102
+ @prediction['Length']
103
+ end
104
+ end
105
+ alias length query_len
106
+
107
+ # Returns the predicted localization signal:
108
+ # 1. S (Signal peptide)
109
+ # 2. M (mTP)
110
+ # 3. C (cTP)
111
+ # 4. *
112
+ # 5. _
113
+ def loc
114
+ if @prediction['Loc']
115
+ @prediction['Loc'] # version 1.0
116
+ else
117
+ @prediction['Loc.'] # version 1.1
118
+ end
119
+ end
120
+
121
+ # Returns RC.
122
+ def rc
123
+ @prediction['RC']
124
+ end
125
+
126
+ private
127
+
128
+ #
129
+ def parse_entry(str)
130
+ labels = []
131
+ cutoff = []
132
+ values = []
133
+
134
+ str.split("\n").each {|line|
135
+ case line
136
+ when /targetp v(\d+.\d+)/,/T A R G E T P\s+(\d+.\d+)/
137
+ @version = $1
138
+
139
+ when /Number of (query|input) sequences:\s+(\d+)/
140
+ @query_sequences = $1.to_i
141
+
142
+ when /Cleavage site predictions (\w.+)\./
143
+ @cleavage_site_prediction = $1
144
+
145
+ when /Using (\w+.+) networks/
146
+ @networks = $1
147
+ when /Name +Len/
148
+ labels = line.sub(/^\#\s*/,'').split(/\s+/)
149
+
150
+ when /cutoff/
151
+ cutoff = line.split(/\s+/)
152
+ cutoff.shift
153
+ labels[2, 4].each_with_index {|loc, i|
154
+ next if loc =~ /Loc/
155
+ @cutoff[loc] = cutoff[i].to_f
156
+ }
157
+ when /-----$/
158
+ when /^ +$/, ''
159
+ else
160
+ values = line.sub(/^\s*/,'').split(/\s+/)
161
+ values.each_with_index {|val, i|
162
+ label = labels[i]
163
+ case label
164
+ when 'RC', /Len/
165
+ val = val.to_i
166
+ when 'SP','mTP','cTP','other'
167
+ val = val.to_f
168
+ end
169
+ @prediction[label] = val
170
+ }
171
+ end
172
+ }
173
+ end
174
+
175
+ end # class Report
176
+
177
+ end # class TargetP
178
+
179
+ end # moudel Bio
180
+
181
+
182
+
183
+ if __FILE__ == $0
184
+
185
+ begin
186
+ require 'pp'
187
+ alias p pp
188
+ rescue LoadError
189
+ end
190
+
191
+
192
+ plant = <<HOGE
193
+
194
+ ### ### ### T A R G E T P 1.0 prediction results ### ### ###
195
+
196
+ # Number of input sequences: 1
197
+ # Cleavage site predictions not included.
198
+ # Using PLANT networks.
199
+
200
+ # Name Length cTP mTP SP other Loc. RC
201
+ #----------------------------------------------------------------------------------
202
+ MGI_2141503 640 0.031 0.161 0.271 0.844 _ 3
203
+ #----------------------------------------------------------------------------------
204
+ # cutoff 0.00 0.00 0.00 0.00
205
+
206
+
207
+ HOGE
208
+
209
+ plant_c = <<HOGE
210
+
211
+ ### ### ### T A R G E T P 1.0 prediction results ### ### ###
212
+
213
+ # Number of input sequences: 1
214
+ # Cleavage site predictions included.
215
+ # Using PLANT networks.
216
+
217
+ # Name Length cTP mTP SP other Loc. RC TPlen
218
+ #----------------------------------------------------------------------------------
219
+ MGI_2141503 640 0.031 0.161 0.271 0.844 _ 3 -
220
+ #----------------------------------------------------------------------------------
221
+ # cutoff 0.00 0.00 0.00 0.00
222
+
223
+
224
+
225
+ HOGE
226
+
227
+ non_plant_c = <<HOGE
228
+
229
+ ### ### ### T A R G E T P 1.0 prediction results ### ### ###
230
+
231
+ # Number of input sequences: 1
232
+ # Cleavage site predictions included.
233
+ # Using NON-PLANT networks.
234
+
235
+ # Name Length mTP SP other Loc. RC TPlen
236
+ #--------------------------------------------------------------------------
237
+ MGI_96083 2187 0.292 0.053 0.746 _ 3 -
238
+ #--------------------------------------------------------------------------
239
+ # cutoff 0.00 0.00 0.00
240
+
241
+
242
+
243
+ HOGE
244
+
245
+
246
+ def hoge(e)
247
+ puts e
248
+ ent = Bio::TargetP::Report.new(e)
249
+ pp ent
250
+
251
+ p [:entry_id, ent.entry_id]
252
+ p [:name, ent.name]
253
+ p [:version, ent.version]
254
+ p [:query_sequnces, ent.query_sequences]
255
+ p [:cleavage_site_prediction, ent.cleavage_site_prediction]
256
+ p [:networks, ent.networks]
257
+ p [:query_len, ent.query_len]
258
+ p [:prediction, ent.prediction]
259
+ p [:pred_Name, ent.pred['Name']]
260
+ p [:pred_SP, ent.pred['SP']]
261
+ p [:pred_mTP, ent.pred['mTP']]
262
+ p [:cutoff, ent.cutoff]
263
+ p [:loc, ent.loc]
264
+ p [:rc, ent.rc]
265
+
266
+ puts '=='
267
+ end
268
+
269
+
270
+ [plant, plant_c, non_plant_c].each {|e|
271
+ hoge(e)
272
+ }
273
+
274
+ exit if ARGV.size == 0
275
+
276
+ while ent = $<.gets(Bio::TargetP::Report::DELIMITER)
277
+ hoge(ent)
278
+ end
279
+
280
+ end
281
+
282
+
283
+
@@ -0,0 +1,238 @@
1
+ #
2
+ # = bio/appl/tmhmm/report.rb - TMHMM report class
3
+ #
4
+ # Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ # $Id: report.rb,v 1.6 2005/12/18 15:58:41 k Exp $
8
+ #
9
+ # == Description
10
+ #
11
+ #
12
+ # == Example
13
+ # == References
14
+ #--
15
+ #
16
+ # This library is free software; you can redistribute it and/or
17
+ # modify it under the terms of the GNU Lesser General Public
18
+ # License as published by the Free Software Foundation; either
19
+ # version 2 of the License, or (at your option) any later version.
20
+ #
21
+ # This library is distributed in the hope that it will be useful,
22
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
23
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24
+ # Lesser General Public License for more details.
25
+ #
26
+ # You should have received a copy of the GNU Lesser General Public
27
+ # License along with this library; if not, write to the Free Software
28
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29
+ #
30
+ #++
31
+ #
32
+
33
+ module Bio
34
+
35
+ # = TMHMM class for http://www.cbs.dtu.dk/services/TMHMM/
36
+ class TMHMM
37
+
38
+ # Splits multiple reports into a report entry.
39
+ def TMHMM.reports(data)
40
+ entry = []
41
+ ent_state = ''
42
+ data.each_line do |line|
43
+ if /^\#/ =~ line
44
+ if ent_state == 'next'
45
+ ent_state = 'entry'
46
+ elsif ent_state == 'tmh'
47
+ ent_state = 'next'
48
+ end
49
+ else
50
+ ent_state = 'tmh'
51
+ end
52
+
53
+ if ent_state != 'next'
54
+ entry << line
55
+ else
56
+ if block_given?
57
+ yield Bio::TMHMM::Report.new(entry)
58
+ else
59
+ Bio::TMHMM::Report.new(entry)
60
+ end
61
+ entry = [line]
62
+ end
63
+ end
64
+
65
+ if block_given?
66
+ yield Bio::TMHMM::Report.new(entry)
67
+ else
68
+ Bio::TMHMM::Report.new(entry)
69
+ end
70
+ end
71
+
72
+ # = TMHMM report parser class.
73
+ class Report
74
+
75
+ # Returns an Array of Bio::TMHMM::TMH.
76
+ attr_reader :tmhs
77
+
78
+ # Returns
79
+ attr_reader :entry_id
80
+
81
+ # Returns
82
+ attr_reader :query_len
83
+
84
+ # Returns
85
+ attr_reader :predicted_tmhs
86
+
87
+ # Returns
88
+ attr_reader :exp_aas_in_tmhs
89
+
90
+ # Returns
91
+ attr_reader :exp_first_60aa
92
+
93
+ # Returns
94
+ attr_reader :total_prob_of_N_in
95
+
96
+ alias length query_len
97
+
98
+ #
99
+ def initialize(entry = nil)
100
+ parse_header(entry)
101
+ @tmhs = parse_tmhs(entry)
102
+ end
103
+
104
+ # Returns an Array of Bio::TMHMM::TMH including only "TMhelix".
105
+ def helix
106
+ @tmhs.map {|t| t if t.status == 'TMhelix' }.compact
107
+ end
108
+
109
+ #
110
+ def to_s
111
+ [
112
+ [
113
+ ["Length:", @query_len],
114
+ ["Number of predicted TMHs:", @predicted_tmhs],
115
+ ["Exp number of AAs in THMs:", @exp_aas_in_tmhs],
116
+ ["Exp number, first 60 AAs:", @exp_first_60aa],
117
+ ["Total prob of N-in:", @total_prob_of_N_in]
118
+ ].map {|e| "\# " + [@entry_id, e].flatten.join("\t") },
119
+ tmhs.map {|ent| ent.to_s }
120
+ ].flatten.join("\n")
121
+ end
122
+
123
+
124
+ private
125
+
126
+ #
127
+ def parse_header(raw)
128
+ raw.each do |line|
129
+ next unless /^#/.match(line)
130
+
131
+ case line
132
+ when / (\S.+) Length: +(\d+)/
133
+ @entry_id = $1.strip
134
+ @query_len = $2.to_i
135
+ when /Number of predicted TMHs: +(\d+)/
136
+ @predicted_tmhs = $1.to_i
137
+ when /Exp number of AAs in TMHs: +([\d\.]+)/
138
+ @exp_aas_in_tmhs = $1.to_f
139
+ when /Exp number, first 60 AAs: +([\d\.]+)/
140
+ @exp_first_60aa = $1.to_f
141
+ when /Total prob of N-in: +([\d\.]+)/
142
+ @total_prob_of_N_in = $1.to_f
143
+ end
144
+ end
145
+ end
146
+
147
+ #
148
+ def parse_tmhs(raw)
149
+ tmhs = []
150
+ raw.each do |line|
151
+ case line
152
+ when /^[^\#]/
153
+ eid,version,status,r0,r1 = line.split(/\s+/)
154
+ tmhs << Bio::TMHMM::TMH.new(eid.strip,
155
+ version.strip,
156
+ status.strip,
157
+ Range.new(r0.to_i, r1.to_i))
158
+ end
159
+ end
160
+ tmhs
161
+ end
162
+
163
+ end # class Report
164
+
165
+
166
+ # = Container class of the trainsmembrane helix(TMH) and the other
167
+ # segments.
168
+ class TMH
169
+
170
+ # Returns
171
+ attr_accessor :entry_id
172
+
173
+ # Returns
174
+ attr_accessor :version
175
+
176
+ # Returns the status of the TMH. ("outside", "TMhelix" or "inside").
177
+ attr_accessor :status
178
+
179
+ # Returns an Range of TMH position.
180
+ attr_accessor :range
181
+
182
+ alias pos range
183
+
184
+ #
185
+ def initialize(entry_id = nil, version = nil, status = nil, range = nil)
186
+ @entry_id = entry_id
187
+ @version = version
188
+ @status = status
189
+ @range = range
190
+ end
191
+
192
+ #
193
+ def to_s
194
+ [@entry_id, @version, @status, @range.first, @range.last].join("\t")
195
+ end
196
+
197
+ end # class TMH
198
+
199
+ end # class TMHMM
200
+
201
+ end # module Bio
202
+
203
+
204
+ if __FILE__ == $0
205
+
206
+ begin
207
+ require 'pp'
208
+ alias p pp
209
+ rescue LoadError
210
+ end
211
+
212
+ Bio::TMHMM.reports(ARGF.read) do |ent|
213
+ puts '==>'
214
+ puts ent.to_s
215
+ pp ent
216
+
217
+ p [:entry_id, ent.entry_id]
218
+ p [:query_len, ent.query_len]
219
+ p [:predicted_tmhs, ent.predicted_tmhs]
220
+ p [:tmhs_size, ent.tmhs.size]
221
+ p [:exp_aas_in_tmhs, ent.exp_aas_in_tmhs]
222
+ p [:exp_first_60aa, ent.exp_first_60aa]
223
+ p [:total_prob_of_N_in, ent.total_prob_of_N_in]
224
+
225
+ ent.tmhs.each do |t|
226
+ p t
227
+ p [:entry_id, t.entry_id]
228
+ p [:version, t.version]
229
+ p [:status, t.status]
230
+ p [:range, t.range]
231
+ p [:pos, t.pos]
232
+ end
233
+
234
+ p [:helix, ent.helix]
235
+ p ent.tmhs.map {|t| t if t.status == 'TMhelix' }.compact
236
+ end
237
+
238
+ end