bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -0,0 +1,142 @@
1
+ #
2
+ # = bio/appl/blast/ddbj.rb - Remote BLAST wrapper using DDBJ web service
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
5
+ # License:: The Ruby License
6
+ #
7
+ # $Id:$
8
+ #
9
+
10
+ require 'bio/appl/blast/remote'
11
+ require 'bio/io/ddbjxml'
12
+
13
+ module Bio::Blast::Remote
14
+
15
+ # Remote BLAST factory using DDBJ Web API for Biology
16
+ # (http://xml.nig.ac.jp/).
17
+ #
18
+ module DDBJ
19
+
20
+ # Creates a remote BLAST factory using DDBJ.
21
+ # Returns Bio::Blast object.
22
+ #
23
+ # Note for future improvement: In the future, it might return
24
+ # Bio::Blast::Remote::DDBJ or other object.
25
+ #
26
+ def self.new(program, db, options = [])
27
+ Bio::Blast.new(program, db, options, 'ddbj')
28
+ end
29
+
30
+ # Information about DDBJ BLAST.
31
+ module Information
32
+
33
+ include Bio::Blast::Remote::Information
34
+
35
+ # (private) parse database information
36
+ def _parse_databases
37
+ if defined? @parse_databases
38
+ return nil if @parse_databases
39
+ end
40
+ drv = Bio::DDBJ::XML::Blast.new
41
+ str = drv.getSupportDatabaseList
42
+
43
+ databases = {}
44
+ dbdescs = {}
45
+ key = 'blastn'
46
+ prefix = ''
47
+ databases[key] ||= []
48
+ dbdescs[key] ||= {}
49
+ str.each_line do |line|
50
+ a = line.strip.split(/\s*\-\s*/, 2)
51
+ case a.size
52
+ when 1
53
+ prefix = a[0].to_s.strip
54
+ prefix += ': ' unless prefix.empty?
55
+ key = 'blastn'
56
+ next #each_line
57
+ when 0
58
+ prefix = ''
59
+ key = 'blastp'
60
+ databases[key] ||= []
61
+ dbdescs[key] ||= {}
62
+ next #each_line
63
+ end
64
+ name = a[0].to_s.strip.freeze
65
+ desc = (prefix + a[1].to_s.strip).freeze
66
+ databases[key].push name
67
+ dbdescs[key][name] = desc
68
+ end
69
+
70
+ databases['blastp'] ||= []
71
+ dbdescs['blastp'] ||= []
72
+
73
+ databases['blastn'].freeze
74
+ databases['blastp'].freeze
75
+
76
+ databases['blastx'] = databases['blastp']
77
+ dbdescs['blastx'] = dbdescs['blastp']
78
+ databases['tblastn'] = databases['blastn']
79
+ dbdescs['tblastn'] = dbdescs['blastn']
80
+ databases['tblastx'] = databases['blastn']
81
+ dbdescs['tblastx'] = dbdescs['blastn']
82
+
83
+ @databases = databases
84
+ @database_descriptions = dbdescs
85
+ @parse_databases = true
86
+ true
87
+ end
88
+ private :_parse_databases
89
+
90
+ end #module Information
91
+
92
+ extend Information
93
+
94
+ # executes BLAST and returns result as a string
95
+ def exec_ddbj(query)
96
+ options = make_command_line_options
97
+ opt = Bio::Blast::NCBIOptions.new(options)
98
+
99
+ # SOAP objects are cached
100
+ @ddbj_remote_blast ||= Bio::DDBJ::XML::Blast.new
101
+ #@ddbj_request_manager ||= Bio::DDBJ::XML::RequestManager.new
102
+ # always use REST version to prevent warning messages
103
+ @ddbj_request_manager ||= Bio::DDBJ::XML::RequestManager::REST.new
104
+
105
+ program = opt.delete('-p')
106
+ db = opt.delete('-d')
107
+ optstr = Bio::Command.make_command_line_unix(opt.options)
108
+
109
+ # using searchParamAsync
110
+ qid = @ddbj_remote_blast.searchParamAsync(program, db, query, optstr)
111
+ @output = qid
112
+
113
+ sleeptime = 2
114
+ flag = true
115
+ while flag
116
+ if $VERBOSE then
117
+ $stderr.puts "DDBJ BLAST: ID: #{qid} -- waitng #{sleeptime} sec."
118
+ end
119
+ sleep(sleeptime)
120
+
121
+ result = @ddbj_request_manager.getAsyncResult(qid)
122
+ case result.to_s
123
+ when /The search and analysis service by WWW is very busy now/
124
+ raise result.to_s.strip + '(Alternatively, wrong options may be given.)'
125
+ when /Your job has not completed yet/
126
+ sleeptime = 5
127
+ else
128
+ flag = false
129
+ end
130
+ end while flag
131
+
132
+ @output = result
133
+ return @output
134
+ end
135
+
136
+ end #module DDBJ
137
+
138
+ # for lazy load DDBJ module
139
+ Ddbj = DDBJ
140
+
141
+ end #module Bio::Blast::Remote
142
+
@@ -4,7 +4,7 @@
4
4
  # Copyright:: Copyright (C) 2003-2006 GOTO Naohisa <ng@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: format0.rb,v 1.25 2007/12/27 17:28:57 ngoto Exp $
7
+ # $Id:$
8
8
  #
9
9
  # == Description
10
10
  #
@@ -264,8 +264,8 @@ module Bio
264
264
  begin
265
265
  q << sc.scan(/.*/)
266
266
  sc.skip(/\s*^ ?/)
267
- end until !sc.rest or r = sc.skip(/ *\( *(\d+) *letters *\)\s*\z/)
268
- @query_len = sc[1].to_i if r
267
+ end until !sc.rest or r = sc.skip(/ *\( *([\,\d]+) *letters *\)\s*\z/)
268
+ @query_len = sc[1].delete(',').to_i if r
269
269
  @query_def = q.join(' ')
270
270
  end
271
271
  end
@@ -274,7 +274,7 @@ module Bio
274
274
  # Parses the first line of the BLAST result.
275
275
  def format0_parse_header
276
276
  unless defined?(@program)
277
- if /(\w+) +([\w\-\.\d]+) *\[ *([\-\.\w]+) *\] *(\[.+\])?/ =~ @f0header.to_s
277
+ if /([\-\w]+) +([\w\-\.\d]+) *\[ *([\-\.\w]+) *\] *(\[.+\])?/ =~ @f0header.to_s
278
278
  @program = $1
279
279
  @version = "#{$1} #{$2} [#{$3}]"
280
280
  @version_number = $2
@@ -292,7 +292,17 @@ module Bio
292
292
  @f0references.push data.shift
293
293
  end
294
294
  @f0query = data.shift
295
+ # In special case, a void line is inserted after query name.
296
+ if data[0] and /\A +\( *([\,\d]+) *letters *\)\s*\z/ =~ data[0] then
297
+ @f0query.concat "\n"
298
+ @f0query.concat data.shift
299
+ end
295
300
  @f0database = data.shift
301
+ # In special case, a void line is inserted after database name.
302
+ if data[0] and /\A +[\d\,]+ +sequences\; +[\d\,]+ total +letters\s*\z/ =~ data[0] then
303
+ @f0database.concat "\n"
304
+ @f0database.concat data.shift
305
+ end
296
306
  end
297
307
 
298
308
  # Splits the statistical parameters.
@@ -340,7 +350,7 @@ module Bio
340
350
  sc = StringScanner.new(str)
341
351
  sc.skip(/\s*/)
342
352
  while sc.rest?
343
- if sc.match?(/Number of sequences better than +([e\-\.\d]+) *\: *(.+)/) then
353
+ if sc.match?(/Number of sequences better than +([e\+\-\.\d]+) *\: *(.+)/) then
344
354
  ev = sc[1]
345
355
  ev = '1' + ev if ev[0] == ?e
346
356
  @expect = ev.to_f
@@ -364,7 +374,7 @@ module Bio
364
374
  parse_colon_separated_params(@hash, @f0params)
365
375
  #p @hash
366
376
  if val = @hash['Matrix'] then
367
- if /blastn *matrix *\: *([e\-\.\d]+) +([e\-\.\d]+)/ =~ val then
377
+ if /blastn *matrix *\: *([e\+\-\.\d]+) +([e\+\-\.\d]+)/ =~ val then
368
378
  @matrix = 'blastn'
369
379
  @sc_match = $1.to_i
370
380
  @sc_mismatch = $2.to_i
@@ -373,16 +383,16 @@ module Bio
373
383
  end
374
384
  end
375
385
  if val = @hash['Gap Penalties'] then
376
- if /Existence\: *([e\-\.\d]+)/ =~ val then
386
+ if /Existence\: *([e\+\-\.\d]+)/ =~ val then
377
387
  @gap_open = $1.to_i
378
388
  end
379
- if /Extension\: *([e\-\.\d]+)/ =~ val then
389
+ if /Extension\: *([e\+\-\.\d]+)/ =~ val then
380
390
  @gap_extend = $1.to_i
381
391
  end
382
392
  end
383
393
  #@db_num = @hash['Number of Sequences'] unless defined?(@db_num)
384
394
  #@db_len = @hash['length of database'] unless defined?(@db_len)
385
- if val = @hash['effective length of database'] then
395
+ if val = @hash['effective search space'] then
386
396
  @eff_space = val.tr(',', '').to_i
387
397
  end
388
398
  @parse_params = true
@@ -529,7 +539,7 @@ module Bio
529
539
  @hits << Hit.new(data)
530
540
  r = data.first
531
541
  break unless r
532
- if /^Significant alignments for pattern/ =~ r
542
+ while /^Significant alignments for pattern/ =~ r
533
543
  data.shift
534
544
  r = data.first
535
545
  end
@@ -584,9 +594,9 @@ module Bio
584
594
  @pattern_positions = []
585
595
  @f0message.each do |r|
586
596
  sc = StringScanner.new(r)
587
- if sc.skip_until(/^ *pattern +(.+)$/) then
597
+ if sc.skip_until(/^ *pattern +([^\s]+)/) then
588
598
  @pattern = sc[1] unless @pattern
589
- sc.skip_until(/^ at position +(\d+)/)
599
+ sc.skip_until(/(?:^ *| +)at position +(\d+) +of +query +sequence/)
590
600
  @pattern_positions << sc[1].to_i
591
601
  end
592
602
  end
@@ -711,19 +721,19 @@ module Bio
711
721
  sc.skip(/ */)
712
722
  end
713
723
  sc.skip(/\s*/)
714
- while r = sc.scan(/[e\.\-\d]+/)
724
+ while r = sc.scan(/[e\+\-\.\d]+/)
715
725
  #p r
716
726
  h[s0.shift] = r
717
727
  sc.skip(/ */)
718
728
  end
719
729
  if gapped then
720
- @gapped_lambda = h['Lambda']
721
- @gapped_kappa = h['K']
722
- @gapped_entropy = h['H']
730
+ @gapped_lambda = (v = h['Lambda']) ? v.to_f : nil
731
+ @gapped_kappa = (v = h['K']) ? v.to_f : nil
732
+ @gapped_entropy = (v = h['H']) ? v.to_f : nil
723
733
  else
724
- @lambda = h['Lambda']
725
- @kappa = h['K']
726
- @entropy = h['H']
734
+ @lambda = (v = h['Lambda']) ? v.to_f : nil
735
+ @kappa = (v = h['K']) ? v.to_f : nil
736
+ @entropy = (v = h['H']) ? v.to_f : nil
727
737
  end
728
738
  end #each
729
739
  @parse_stat = true
@@ -861,7 +871,7 @@ module Bio
861
871
  d << sc.scan(/.*/)
862
872
  sc.skip(/\s*/)
863
873
  end until !sc.rest? or r = sc.skip(/ *Length *\= *([\,\d]+)\s*\z/)
864
- @len = (r ? sc[1].to_i : nil)
874
+ @len = (r ? sc[1].delete(',').to_i : nil)
865
875
  @definition = d.join(" ")
866
876
  @parse_hitname = true
867
877
  end
@@ -968,11 +978,11 @@ module Bio
968
978
  sc = StringScanner.new(@f0score)
969
979
  while sc.rest?
970
980
  sc.skip(/\s*/)
971
- if sc.skip(/Expect(?:\(\d\))? *\= *([e\-\.\d]+)/) then
981
+ if sc.skip(/Expect(?:\(\d+\))? *\= *([e\+\-\.\d]+)/) then
972
982
  ev = sc[1].to_s
973
983
  ev = '1' + ev if ev[0] == ?e
974
984
  @evalue = ev.to_f
975
- elsif sc.skip(/Score *\= *([e\-\.\d]+) *bits *\( *([e\-\.\d]+) *\)/) then
985
+ elsif sc.skip(/Score *\= *([e\+\-\.\d]+) *bits *\( *([e\+\-\.\d]+) *\)/) then
976
986
  bs = sc[1]
977
987
  bs = '1' + bs if bs[0] == ?e
978
988
  @bit_score = bs.to_f
@@ -1016,19 +1026,19 @@ module Bio
1016
1026
  if sc[2] then
1017
1027
  @hit_frame = sc[3].to_i
1018
1028
  end
1019
- elsif sc.skip(/Score *\= *([e\-\.\d]+) +\(([e\-\.\d]+) *bits *\)/) then
1029
+ elsif sc.skip(/Score *\= *([e\+\-\.\d]+) +\(([e\+\-\.\d]+) *bits *\)/) then
1020
1030
  #WU-BLAST
1021
1031
  @score = sc[1].to_i
1022
1032
  bs = sc[2]
1023
1033
  bs = '1' + bs if bs[0] == ?e
1024
1034
  @bit_score = bs.to_f
1025
- elsif sc.skip(/P *\= * ([e\-\.\d]+)/) then
1035
+ elsif sc.skip(/P *\= * ([e\+\-\.\d]+)/) then
1026
1036
  #WU-BLAST
1027
1037
  @p_sum_n = nil
1028
1038
  pv = sc[1]
1029
1039
  pv = '1' + pv if pv[0] == ?e
1030
1040
  @pvalue = pv.to_f
1031
- elsif sc.skip(/Sum +P *\( *(\d+) *\) *\= *([e\-\.\d]+)/) then
1041
+ elsif sc.skip(/Sum +P *\( *(\d+) *\) *\= *([e\+\-\.\d]+)/) then
1032
1042
  #WU-BLAST
1033
1043
  @p_sum_n = sc[1].to_i
1034
1044
  pv = sc[2]
@@ -4,7 +4,7 @@
4
4
  # Copyright:: Copyright (C) 2002, 2003, 2007 Toshiaki Katayama <k@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: format8.rb,v 1.8 2007/12/14 16:15:20 k Exp $
7
+ # $Id:$
8
8
  #
9
9
  # == Note
10
10
  #
@@ -27,7 +27,7 @@ module Bio
27
27
  hit_num = 1
28
28
  hsp_num = 1
29
29
  hit = ''
30
- data.each do |line|
30
+ data.each_line do |line|
31
31
  ary = line.chomp.split("\t")
32
32
  query_id, target_id, hsp = tab_parse_hsp(ary)
33
33
  if query_prev != query_id or target_prev != target_id
@@ -0,0 +1,263 @@
1
+ #
2
+ # = bio/appl/blast/genomenet.rb - Remote BLAST wrapper using GenomeNet
3
+ #
4
+ # Copyright:: Copyright (C) 2001,2008 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # Copyright:: Copyright (C) 2002,2003 Toshiaki Katayama <k@bioruby.org>
6
+ # Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
7
+ # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
8
+ # License:: The Ruby License
9
+ #
10
+ # $Id:$
11
+ #
12
+
13
+ require 'net/http'
14
+ require 'uri'
15
+ require 'bio/command'
16
+ require 'shellwords'
17
+ require 'bio/appl/blast/remote'
18
+
19
+ module Bio::Blast::Remote
20
+
21
+ # == Description
22
+ #
23
+ # The Bio::Blast::Remote::GenomeNet class contains methods for running
24
+ # remote BLAST searches on GenomeNet (http://blast.genome.jp/).
25
+ #
26
+ # == Usage
27
+ #
28
+ # require 'bio'
29
+ #
30
+ # # To run an actual BLAST analysis:
31
+ # # 1. create a BLAST factory
32
+ # blast_factory = Bio::Blast.remote('blastp', 'nr-aa',
33
+ # '-e 0.0001', 'genomenet')
34
+ # #or:
35
+ # blast_factory = Bio::Blast::Remote.genomenet('blastp', 'nr-aa',
36
+ # '-e 0.0001')
37
+ #
38
+ # # 2. run the actual BLAST by querying the factory
39
+ # report = blast_factory.query(sequence_text)
40
+ #
41
+ # # Then, to parse the report, see Bio::Blast::Report
42
+ #
43
+ # === Available databases for Bio::Blast::Remote::GenomeNet
44
+ #
45
+ # Up-to-date available databases can be obtained by using
46
+ # Bio::Blast::Remote::GenomeNet.databases(program).
47
+ # Short descriptions of databases
48
+ #
49
+ # ----------+-------+---------------------------------------------------
50
+ # program | query | db (supported in GenomeNet)
51
+ # ----------+-------+---------------------------------------------------
52
+ # blastp | AA | nr-aa, genes, vgenes.pep, swissprot, swissprot-upd,
53
+ # ----------+-------+ pir, prf, pdbstr
54
+ # blastx | NA |
55
+ # ----------+-------+---------------------------------------------------
56
+ # blastn | NA | nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss,
57
+ # ----------+-------+ htgs, dbsts, embl-nonst, embnonst-upd, epd,
58
+ # tblastn | AA | genes-nt, genome, vgenes.nuc
59
+ # ----------+-------+---------------------------------------------------
60
+ #
61
+ # == See also
62
+ #
63
+ # * Bio::Blast
64
+ # * Bio::Blast::Report
65
+ # * Bio::Blast::Report::Hit
66
+ # * Bio::Blast::Report::Hsp
67
+ #
68
+ # == References
69
+ #
70
+ # * http://www.ncbi.nlm.nih.gov/blast/
71
+ # * http://www.ncbi.nlm.nih.gov/Education/BLASTinfo/similarity.html
72
+ # * http://blast.genome.jp/ideas/ideas.html#blast
73
+ #
74
+ module GenomeNet
75
+
76
+ Host = "blast.genome.jp".freeze
77
+
78
+ # Creates a remote BLAST factory using GenomeNet.
79
+ # Returns Bio::Blast object.
80
+ #
81
+ # Note for future improvement: In the future, it might return
82
+ # Bio::Blast::Remote::GenomeNet or other object.
83
+ #
84
+ def self.new(program, db, options = [])
85
+ Bio::Blast.new(program, db, options, 'genomenet')
86
+ end
87
+
88
+ # Information for GenomeNet BLAST search.
89
+ module Information
90
+
91
+ include Bio::Blast::Remote::Information
92
+
93
+ # gets information from remote host and parses database information
94
+ def _parse_databases
95
+ if defined? @parse_databases
96
+ return nil if @parse_databases
97
+ end
98
+ databases = {}
99
+ dbdescs = {}
100
+ key = nil
101
+ host = Bio::Blast::Remote::Genomenet::Host
102
+ http = Bio::Command.new_http(host)
103
+ result = http.get('/')
104
+ #p result.body
105
+ result.body.each_line do |line|
106
+ case line
107
+ when /\"set\_dbtype\(this\.form\,\'(prot|nucl)\'\)\"/
108
+ key = $1
109
+ databases[key] ||= []
110
+ dbdescs[key] ||= {}
111
+ when /\<input *type\=\"radio\" *name\=\"dbname\" *value\=\"([^\"]+)\"[^\>]*\>([^\<\>]+)/
112
+ db = $1.freeze
113
+ desc = $2.strip.freeze
114
+ databases[key].push db
115
+ dbdescs[key][db] = desc
116
+ end
117
+ end
118
+
119
+ # mine-aa and mine-nt should be removed
120
+ [ 'prot', 'nucl' ].each do |mol|
121
+ ary = databases[mol] || []
122
+ hash = dbdescs[mol] || {}
123
+ [ 'mine-aa', 'mine-nt' ].each do |k|
124
+ ary.delete(k)
125
+ hash.delete(k)
126
+ end
127
+ databases[mol] = ary.freeze
128
+ dbdescs[mol] = hash
129
+ end
130
+
131
+ [ databases, dbdescs ].each do |h|
132
+ prot = h['prot']
133
+ nucl = h['nucl']
134
+ h.delete('prot')
135
+ h.delete('nucl')
136
+ h['blastp'] = prot
137
+ h['blastx'] = prot
138
+ h['blastn'] = nucl
139
+ h['tblastn'] = nucl
140
+ h['tblastx'] = nucl
141
+ end
142
+
143
+ @databases = databases
144
+ @database_descriptions = dbdescs
145
+ @parse_databases = true
146
+ true
147
+ end
148
+ private :_parse_databases
149
+
150
+ end #module Information
151
+
152
+ extend Information
153
+
154
+ private
155
+
156
+ # executes BLAST and returns result as a string
157
+ def exec_genomenet(query)
158
+ host = Host
159
+ #host = "blast.genome.jp"
160
+ #path = "/sit-bin/nph-blast"
161
+ path = "/sit-bin/blast" #2005.08.12
162
+
163
+ options = make_command_line_options
164
+ opt = Bio::Blast::NCBIOptions.new(options)
165
+
166
+ program = opt.delete('-p')
167
+ db = opt.delete('-d')
168
+
169
+ matrix = opt.delete('-M') || 'blosum62'
170
+ filter = opt.delete('-F') || 'T'
171
+
172
+ opt_V = opt.delete('-V') || 500 # default value for GenomeNet
173
+ opt_B = opt.delete('-B') || 250 # default value for GenomeNet
174
+
175
+ # format, not for form parameters, but included in option string
176
+ opt_m = opt.get('-m') || '7' # default of BioRuby GenomeNet factory
177
+ opt.set('-m', opt_m)
178
+
179
+ optstr = Bio::Command.make_command_line_unix(opt.options)
180
+
181
+ form = {
182
+ 'style' => 'raw',
183
+ 'prog' => program,
184
+ 'dbname' => db,
185
+ 'sequence' => query,
186
+ 'other_param' => optstr,
187
+ 'matrix' => matrix,
188
+ 'filter' => filter,
189
+ 'V_value' => opt_V,
190
+ 'B_value' => opt_B,
191
+ 'alignment_view' => 0,
192
+ }
193
+
194
+ form.keys.each do |k|
195
+ form.delete(k) unless form[k]
196
+ end
197
+
198
+ begin
199
+ http = Bio::Command.new_http(host)
200
+ http.open_timeout = 300
201
+ http.read_timeout = 600
202
+ result = Bio::Command.http_post_form(http, path, form)
203
+ @output = result.body
204
+
205
+ # workaround 2008.8.13
206
+ if result.code == '302' then
207
+ newuri = URI.parse(result['location'])
208
+ newpath = newuri.path
209
+ result = http.get(newpath)
210
+ @output = result.body
211
+ # waiting for BLAST finished
212
+ while /Your job ID is/ =~ @output and
213
+ /Your result will be displayed here\<br\>/ =~ @output
214
+ if /This page will be reloaded automatically in\s*((\d+)\s*min\.)?\s*(\d+)\s*sec\./ =~ @output then
215
+ reloadtime = $2.to_i * 60 + $3.to_i
216
+ reloadtime = 300 if reloadtime > 300
217
+ reloadtime = 1 if reloadtime < 1
218
+ else
219
+ reloadtime = 5
220
+ end
221
+ if $VERBOSE then
222
+ $stderr.puts "waiting #{reloadtime} sec to reload #{newuri.to_s}"
223
+ end
224
+ sleep(reloadtime)
225
+ result = http.get(newpath)
226
+ @output = result.body
227
+ end
228
+ end
229
+
230
+ # workaround 2005.08.12
231
+ if /\<A +HREF=\"(http\:\/\/blast\.genome\.jp(\/tmp\/[^\"]+))\"\>Show all result\<\/A\>/i =~ @output.to_s then
232
+ result = http.get($2)
233
+ @output = result.body
234
+ txt = @output.to_s.split(/\<pre\>/)[1]
235
+ raise 'cannot understand response' unless txt
236
+ txt.sub!(/\<\/pre\>.*\z/m, '')
237
+ txt.sub!(/.*^ \-{20,}\s*/m, '')
238
+ @output = txt.gsub(/\&lt\;/, '<')
239
+ else
240
+ raise 'cannot understand response'
241
+ end
242
+ end
243
+
244
+ # for -m 0 (NCBI BLAST default) output, html tags are removed.
245
+ if opt_m.to_i == 0 then
246
+ #@output_bak = @output
247
+ txt = @output.gsub(/^\s*\<img +src\=\"\/Fig\/arrow\_top\.gif\"\>.+$\r?\n/, '')
248
+ txt.gsub!(/^.+\<\/form\>$/, '')
249
+ txt.gsub!(/^\<form *method\=\"POST\" name\=\"clust\_check\"\>.+$\r?\n/, '')
250
+ txt.gsub!(/\<[^\>\<]+\>/m, '')
251
+ @output = txt
252
+ end
253
+
254
+ return @output
255
+ end
256
+
257
+ end # class GenomeNet
258
+
259
+ # alias for lazy load
260
+ Genomenet = GenomeNet
261
+
262
+ end # module Bio::Blast::Remote
263
+