bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -7,7 +7,7 @@
7
7
  # Toshiaki Katayama <k@bioruby.org>
8
8
  # License:: The Ruby License
9
9
  #
10
- # $Id: xmlparser.rb,v 1.17 2007/04/05 23:35:39 trevor Exp $
10
+ # $Id:$
11
11
  #
12
12
  # == Description
13
13
  #
@@ -115,26 +115,35 @@ class Blast
115
115
  end
116
116
  end
117
117
 
118
- def xmlparser_parse_parameters(hash)
119
- labels = {
120
- 'matrix' => 'Parameters_matrix',
121
- 'expect' => 'Parameters_expect',
122
- 'include' => 'Parameters_include',
123
- 'sc-match' => 'Parameters_sc-match',
124
- 'sc-mismatch' => 'Parameters_sc-mismatch',
125
- 'gap-open' => 'Parameters_gap-open',
126
- 'gap-extend' => 'Parameters_gap-extend',
127
- 'filter' => 'Parameters_filter',
128
- 'pattern' => 'Parameters_pattern',
129
- 'entrez-query' => 'Parameters_entrez-query',
130
- }
131
- labels.each do |k,v|
118
+ # set parameter of the key as val
119
+ def xml_set_parameter(key, val)
120
+ #labels = {
121
+ # 'matrix' => 'Parameters_matrix',
122
+ # 'expect' => 'Parameters_expect',
123
+ # 'include' => 'Parameters_include',
124
+ # 'sc-match' => 'Parameters_sc-match',
125
+ # 'sc-mismatch' => 'Parameters_sc-mismatch',
126
+ # 'gap-open' => 'Parameters_gap-open',
127
+ # 'gap-extend' => 'Parameters_gap-extend',
128
+ # 'filter' => 'Parameters_filter',
129
+ # 'pattern' => 'Parameters_pattern',
130
+ # 'entrez-query' => 'Parameters_entrez-query',
131
+ #}
132
+ k = key.sub(/\AParameters\_/, '')
133
+ @parameters[k] =
132
134
  case k
133
- when 'filter', 'matrix'
134
- @parameters[k] = hash[v].to_s
135
+ when 'expect', 'include'
136
+ val.to_f
137
+ when /\Agap\-/, /\Asc\-/
138
+ val.to_i
135
139
  else
136
- @parameters[k] = hash[v].to_i
140
+ val
137
141
  end
142
+ end
143
+
144
+ def xmlparser_parse_parameters(hash)
145
+ hash.each do |k, v|
146
+ xml_set_parameter(k, v)
138
147
  end
139
148
  end
140
149
 
@@ -144,6 +153,14 @@ class Blast
144
153
  @iterations.last.num = hash[tag].to_i
145
154
  when 'Iteration_message'
146
155
  @iterations.last.message = hash[tag].to_s
156
+
157
+ # for new BLAST XML format
158
+ when 'Iteration_query-ID'
159
+ @iterations.last.query_id = hash[tag].to_s
160
+ when 'Iteration_query-def'
161
+ @iterations.last.query_def = hash[tag].to_s
162
+ when 'Iteration_query-len'
163
+ @iterations.last.query_len = hash[tag].to_i
147
164
  end
148
165
  end
149
166
 
@@ -1,10 +1,10 @@
1
1
  #
2
2
  # = bio/appl/blat/report.rb - BLAT result parser
3
3
  #
4
- # Copyright:: Copyright (C) 2004 GOTO Naohisa <ng@bioruby.org>
4
+ # Copyright:: Copyright (C) 2004, 2006, 2008 Naohisa Goto <ng@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: report.rb,v 1.13 2007/04/05 23:35:39 trevor Exp $
7
+ # $Id:$
8
8
  #
9
9
  # BLAT result parser (psl / pslx format).
10
10
  #
@@ -46,15 +46,18 @@ module Bio
46
46
  # In Bio::Blat::Report, it it nil (1 entry 1 file).
47
47
  DELIMITER = RS = nil # 1 file 1 entry
48
48
 
49
+ # Splitter for Bio::FlatFile
50
+ FLATFILE_SPLITTER = Bio::FlatFile::Splitter::LineOriented
51
+
49
52
  # Creates a new Bio::Blat::Report object from BLAT result text (String).
50
53
  # You can use Bio::FlatFile to read a file.
51
54
  # Currently, results created with options -out=psl (default) or
52
55
  # -out=pslx are supported.
53
- def initialize(text)
56
+ def initialize(text = '')
54
57
  flag = false
55
58
  head = []
56
59
  @hits = []
57
- text.each do |line|
60
+ text.each_line do |line|
58
61
  if flag then
59
62
  @hits << Hit.new(line)
60
63
  else
@@ -71,7 +74,45 @@ module Bio
71
74
  end
72
75
  end
73
76
  end
74
- @columns = parse_header(head)
77
+ @columns = parse_header(head) unless head.empty?
78
+ end
79
+
80
+ # Adds a header line if the header data is not yet given and
81
+ # the given line is suitable for header.
82
+ # Returns self if adding header line is succeeded.
83
+ # Otherwise, returns false (the line is not added).
84
+ def add_header_line(line)
85
+ return false if defined? @columns
86
+ line = line.chomp
87
+ case line
88
+ when /^\d/
89
+ @columns = (defined? @header_lines) ? parse_header(@header_lines) : []
90
+ return false
91
+ when /\A\-+\s*\z/
92
+ @columns = (defined? @header_lines) ? parse_header(@header_lines) : []
93
+ return self
94
+ else
95
+ @header_lines ||= []
96
+ @header_lines.push line
97
+ end
98
+ end
99
+
100
+ # Adds a line to the entry if the given line is regarded as
101
+ # a part of the current entry.
102
+ # If the current entry (self) is empty, or the line has the same
103
+ # query name, the line is added and returns self.
104
+ # Otherwise, returns false (the line is not added).
105
+ def add_line(line)
106
+ if /\A\s*\z/ =~ line then
107
+ return @hits.empty? ? self : false
108
+ end
109
+ hit = Hit.new(line.chomp)
110
+ if @hits.empty? or @hits.first.query.name == hit.query.name then
111
+ @hits.push hit
112
+ return self
113
+ else
114
+ return false
115
+ end
75
116
  end
76
117
 
77
118
  # hits of the result.
@@ -5,7 +5,7 @@
5
5
  # Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: emboss.rb,v 1.8 2007/04/05 23:35:39 trevor Exp $
8
+ # $Id: emboss.rb,v 1.9 2008/01/10 03:51:06 ngoto Exp $
9
9
  #
10
10
 
11
11
  module Bio
@@ -35,22 +35,34 @@ module Bio
35
35
  #
36
36
  # # Suppose that you could get the sequence for XLRHODOP by running
37
37
  # # the EMBOSS command +seqret embl:xlrhodop+ on the command line.
38
- # # Then you can get the output of that command in a Bio::EMBOSS object
39
- # # by creating a new Bio::EMBOSS object and subsequently executing it.
40
- # xlrhodop = Bio::EMBOSS.new('seqret embl:xlrhodop')
41
- # puts xlrhodop.exec
38
+ # # Then you can get the output of that command in a String object
39
+ # # by using Bio::EMBOSS.run method.
40
+ # xlrhodop = Bio::EMBOSS.run('seqret', 'embl:xlrhodop')
41
+ # puts xlrhodop
42
42
  #
43
43
  # # Or all in one go:
44
- # puts Bio::EMBOSS.new('seqret embl:xlrhodop').exec
44
+ # puts Bio::EMBOSS.run('seqret', 'embl:xlrhodop')
45
45
  #
46
46
  # # Similarly:
47
- # puts Bio::EMBOSS.new('transeq -sbegin 110 -send 1171 embl:xlrhodop')
48
- # puts Bio::EMBOSS.new('showfeat embl:xlrhodop').exec
49
- # puts Bio::EMBOSS.new('seqret embl:xlrhodop -osformat acedb').exec
47
+ # puts Bio::EMBOSS.run('transeq', '-sbegin', '110','-send', '1171',
48
+ # 'embl:xlrhodop')
49
+ # puts Bio::EMBOSS.run('showfeat', 'embl:xlrhodop')
50
+ # puts Bio::EMBOSS.run('seqret', 'embl:xlrhodop', '-osformat', 'acedb')
50
51
  #
51
52
  # # A shortcut exists for this two-step process for +seqret+ and +entret+.
52
53
  # puts Bio::EMBOSS.seqret('embl:xlrhodop')
53
54
  # puts Bio::EMBOSS.entret('embl:xlrhodop')
55
+ #
56
+ # # You can use %w() syntax.
57
+ # puts Bio::EMBOSS.run(*%w( transeq -sbegin 110 -send 1171 embl:xlrhodop ))
58
+ #
59
+ # # You can also use Shellwords.shellwords.
60
+ # require 'shellwords'
61
+ # str = 'transeq -sbegin 110 -send 1171 embl:xlrhodop'
62
+ # cmd = Shellwords.shellwords(str)
63
+ # puts Bio::EMBOSS.run(*cmd)
64
+ #
65
+
54
66
  #
55
67
  # == Pre-requisites
56
68
  #
@@ -76,8 +88,8 @@ class EMBOSS
76
88
  # puts object.exec
77
89
  # ---
78
90
  # *Arguments*:
79
- # * (required) _command_: emboss command
80
- # *Returns*:: Bio::EMBOSS object
91
+ # * (required) _arg_: argument given to the emboss seqret command
92
+ # *Returns*:: String
81
93
  def self.seqret(arg)
82
94
  str = self.retrieve('seqret', arg)
83
95
  end
@@ -92,12 +104,16 @@ class EMBOSS
92
104
  # puts object.exec
93
105
  # ---
94
106
  # *Arguments*:
95
- # * (required) _command_: emboss command
96
- # *Returns*:: Bio::EMBOSS object
107
+ # * (required) _arg_: argument given to the emboss entret command
108
+ # *Returns*:: String
97
109
  def self.entret(arg)
98
110
  str = self.retrieve('entret', arg)
99
111
  end
100
112
 
113
+ # WARNING: Bio::EMBOSS.new will be changed in the future because
114
+ # Bio::EMBOSS.new(cmd_line) is inconvenient and potential security hole.
115
+ # Using Bio::EMBOSS.run(program, options...) is strongly recommended.
116
+ #
101
117
  # Initializes a new Bio::EMBOSS object. This provides a holder that can
102
118
  # subsequently be executed (see Bio::EMBOSS.exec). The object does _not_
103
119
  # hold any actual data when initialized.
@@ -114,6 +130,7 @@ class EMBOSS
114
130
  # * (required) _command_: emboss command
115
131
  # *Returns*:: Bio::EMBOSS object
116
132
  def initialize(cmd_line)
133
+ warn 'Bio::EMBOSS.new(cmd_line) is inconvenient and potential security hole. Using Bio::EMBOSS.run(program, options...) is strongly recommended.'
117
134
  @cmd_line = cmd_line + ' -stdout -auto'
118
135
  end
119
136
 
@@ -142,6 +159,38 @@ class EMBOSS
142
159
  # Result of the executed command
143
160
  attr_reader :result
144
161
 
162
+ # Runs an emboss program and get the result as string.
163
+ # Note that "-auto -stdout" are automatically added to the options.
164
+ #
165
+ # Example 1:
166
+ #
167
+ # result = Bio::EMBOSS.run('seqret', 'embl:xlrhodop')
168
+ #
169
+ # Example 2:
170
+ #
171
+ # result = Bio::EMBOSS.run('water',
172
+ # '-asequence', 'swissprot:slpi_human',
173
+ # '-bsequence', 'swissprot:slpi_mouse')
174
+ #
175
+ # Example 3:
176
+ # options = %w( -asequence swissprot:slpi_human
177
+ # -bsequence swissprot:slpi_mouse )
178
+ # result = Bio::EMBOSS.run('needle', *options)
179
+ #
180
+ # For an overview of commands that can be used with this method, see the
181
+ # emboss website.
182
+ # ---
183
+ # *Arguments*:
184
+ # * (required) _program_: command name, or filename of an emboss program
185
+ # * _options_: options given to the emboss program
186
+ # *Returns*:: String
187
+ def self.run(program, *options)
188
+ cmd = [ program, *options ]
189
+ cmd.push '-auto'
190
+ cmd.push '-stdout'
191
+ return Bio::Command.query_command(cmd)
192
+ end
193
+
145
194
  private
146
195
 
147
196
  def self.retrieve(cmd, arg)
@@ -4,11 +4,11 @@
4
4
  # Copyright:: Copyright (C) 2001, 2002 Toshiaki Katayama <k@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: fasta.rb,v 1.25 2007/05/18 15:22:52 k Exp $
7
+ # $Id:$
8
8
  #
9
9
 
10
10
  require 'net/http'
11
- require 'cgi' unless defined?(CGI)
11
+ require 'uri'
12
12
  require 'bio/command'
13
13
  require 'shellwords'
14
14
 
@@ -154,16 +154,14 @@ class Fasta
154
154
  'style' => 'raw',
155
155
  'prog' => @program,
156
156
  'dbname' => @db,
157
- 'sequence' => CGI.escape(query),
158
- 'other_param' => CGI.escape(Bio::Command.make_command_line_unix(@options)),
157
+ 'sequence' => query,
158
+ 'other_param' => Bio::Command.make_command_line_unix(@options),
159
159
  'ktup_value' => @ktup,
160
160
  'matrix' => @matrix,
161
161
  }
162
162
 
163
- data = []
164
-
165
- form.each do |k, v|
166
- data.push("#{k}=#{v}") if v
163
+ form.keys.each do |k|
164
+ form.delete(k) unless form[k]
167
165
  end
168
166
 
169
167
  report = nil
@@ -172,7 +170,7 @@ class Fasta
172
170
  http = Bio::Command.new_http(host)
173
171
  http.open_timeout = 3000
174
172
  http.read_timeout = 6000
175
- result, = http.post(path, data.join('&'))
173
+ result = Bio::Command.http_post_form(http, path, form)
176
174
  # workaround 2006.8.1 - fixed for new batch queuing system
177
175
  case result.code
178
176
  when "302"
@@ -191,9 +189,9 @@ class Fasta
191
189
  end
192
190
  @output = result.body.to_s
193
191
  # workaround 2005.08.12
194
- re = %r{<A HREF="http://#{host}(/tmp/[^"]+)">Show all result</A>} # "
192
+ re = %r{<A HREF="http://#{host}(/tmp/[^"]+)">Show all result</A>}i # "
195
193
  if path = @output[re, 1]
196
- result, = http.get(path)
194
+ result = http.get(path)
197
195
  @output = result.body
198
196
  txt = @output.to_s.split(/\<pre\>/)[1]
199
197
  raise 'cannot understand response' unless txt
@@ -5,7 +5,7 @@
5
5
  # Mitsuteru C. Nakao <n@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: report.rb,v 1.10 2007/04/05 23:35:39 trevor Exp $
8
+ # $Id:$
9
9
  #
10
10
  # == Description
11
11
  #
@@ -75,7 +75,7 @@ class Genscan
75
75
  @isochore = nil
76
76
  @matrix = nil
77
77
 
78
- report.each("\n") do |line|
78
+ report.each_line("\n") do |line|
79
79
  case line
80
80
  when /^GENSCAN/
81
81
  parse_headline(line)
@@ -94,7 +94,7 @@ class Genscan
94
94
 
95
95
  # genes/exons
96
96
  genes_region = report[i...j]
97
- genes_region.each("\n") do |line|
97
+ genes_region.each_line("\n") do |line|
98
98
  if /Init|Intr|Term|PlyA|Prom|Sngl/ =~ line
99
99
  gn, en = line.strip.split(" +")[0].split(/\./).map {|i| i.to_i }
100
100
  add_exon(gn, en, line)
@@ -64,7 +64,7 @@ class HMMER
64
64
  @output = ''
65
65
 
66
66
  begin
67
- @options = opt.to_ary
67
+ @options = options.to_ary
68
68
  rescue NameError #NoMethodError
69
69
  # backward compatibility
70
70
  @options = Shellwords.shellwords(options)
@@ -7,7 +7,7 @@
7
7
  # Masashi Fujita <fujita@kuicr.kyoto-u.ac.jp>
8
8
  # License:: The Ruby License
9
9
  #
10
- # $Id: report.rb,v 1.13 2007/04/05 23:35:40 trevor Exp $
10
+ # $Id:$
11
11
  #
12
12
  # == Description
13
13
  #
@@ -61,7 +61,7 @@ class HMMER
61
61
  #
62
62
  def self.reports(multiple_report_text)
63
63
  ary = []
64
- multiple_report_text.each("\n//\n") do |report|
64
+ multiple_report_text.each_line("\n//\n") do |report|
65
65
  if block_given?
66
66
  yield Report.new(report)
67
67
  else
@@ -267,7 +267,7 @@ class HMMER
267
267
  program['license'] = program_data.split(/\n/)
268
268
 
269
269
  parameter = {}
270
- parameter_data.each do |x|
270
+ parameter_data.each_line do |x|
271
271
  if /^(.+?):\s+(.*?)\s*$/ =~ x
272
272
  parameter[$1] = $2
273
273
  end
@@ -281,7 +281,7 @@ class HMMER
281
281
  # Bio::HMMER::Report#parse_query_info
282
282
  def parse_query_info(data)
283
283
  hash = {}
284
- data.each do |x|
284
+ data.each_line do |x|
285
285
  if /^(.+?):\s+(.*?)\s*$/ =~ x
286
286
  hash[$1] = $2
287
287
  elsif /\s+\[(.+)\]/ =~ x
@@ -298,7 +298,7 @@ class HMMER
298
298
  data.sub!(/.+?---\n/m, '').chop!
299
299
  hits = []
300
300
  return hits if data == "\t[no hits above thresholds]\n"
301
- data.each do |l|
301
+ data.each_line do |l|
302
302
  hits.push(Hit.new(l))
303
303
  end
304
304
  hits
@@ -311,7 +311,7 @@ class HMMER
311
311
  data.sub!(/.+?---\n/m, '').chop!
312
312
  hsps=[]
313
313
  return hsps if data == "\t[no hits above thresholds]\n"
314
- data.each do |l|
314
+ data.each_line do |l|
315
315
  hsps.push(Hsp.new(l, is_hmmsearch))
316
316
  end
317
317
  return hsps
@@ -326,19 +326,19 @@ class HMMER
326
326
 
327
327
  statistical_detail = {}
328
328
  data.sub!(/(.+?)\n\n/m, '')
329
- $1.each do |l|
329
+ $1.each_line do |l|
330
330
  statistical_detail[$1] = $2.to_f if /^\s*(.+?)\s*=\s*(\S+)/ =~ l
331
331
  end
332
332
 
333
333
  total_seq_searched = nil
334
334
  data.sub!(/(.+?)\n\n/m, '')
335
- $1.each do |l|
335
+ $1.each_line do |l|
336
336
  total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l
337
337
  end
338
338
 
339
339
  whole_seq_top_hits = {}
340
340
  data.sub!(/(.+?)\n\n/m, '')
341
- $1.each do |l|
341
+ $1.each_line do |l|
342
342
  if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
343
343
  whole_seq_top_hits[$1] = $2.to_i
344
344
  elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
@@ -347,7 +347,7 @@ class HMMER
347
347
  end
348
348
 
349
349
  domain_top_hits = {}
350
- data.each do |l|
350
+ data.each_line do |l|
351
351
  if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
352
352
  domain_top_hits[$1] = $2.to_i
353
353
  elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l