bio 1.2.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -0,0 +1,142 @@
1
+ #
2
+ # = bio/appl/blast/ddbj.rb - Remote BLAST wrapper using DDBJ web service
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
5
+ # License:: The Ruby License
6
+ #
7
+ # $Id:$
8
+ #
9
+
10
+ require 'bio/appl/blast/remote'
11
+ require 'bio/io/ddbjxml'
12
+
13
+ module Bio::Blast::Remote
14
+
15
+ # Remote BLAST factory using DDBJ Web API for Biology
16
+ # (http://xml.nig.ac.jp/).
17
+ #
18
+ module DDBJ
19
+
20
+ # Creates a remote BLAST factory using DDBJ.
21
+ # Returns Bio::Blast object.
22
+ #
23
+ # Note for future improvement: In the future, it might return
24
+ # Bio::Blast::Remote::DDBJ or other object.
25
+ #
26
+ def self.new(program, db, options = [])
27
+ Bio::Blast.new(program, db, options, 'ddbj')
28
+ end
29
+
30
+ # Information about DDBJ BLAST.
31
+ module Information
32
+
33
+ include Bio::Blast::Remote::Information
34
+
35
+ # (private) parse database information
36
+ def _parse_databases
37
+ if defined? @parse_databases
38
+ return nil if @parse_databases
39
+ end
40
+ drv = Bio::DDBJ::XML::Blast.new
41
+ str = drv.getSupportDatabaseList
42
+
43
+ databases = {}
44
+ dbdescs = {}
45
+ key = 'blastn'
46
+ prefix = ''
47
+ databases[key] ||= []
48
+ dbdescs[key] ||= {}
49
+ str.each_line do |line|
50
+ a = line.strip.split(/\s*\-\s*/, 2)
51
+ case a.size
52
+ when 1
53
+ prefix = a[0].to_s.strip
54
+ prefix += ': ' unless prefix.empty?
55
+ key = 'blastn'
56
+ next #each_line
57
+ when 0
58
+ prefix = ''
59
+ key = 'blastp'
60
+ databases[key] ||= []
61
+ dbdescs[key] ||= {}
62
+ next #each_line
63
+ end
64
+ name = a[0].to_s.strip.freeze
65
+ desc = (prefix + a[1].to_s.strip).freeze
66
+ databases[key].push name
67
+ dbdescs[key][name] = desc
68
+ end
69
+
70
+ databases['blastp'] ||= []
71
+ dbdescs['blastp'] ||= []
72
+
73
+ databases['blastn'].freeze
74
+ databases['blastp'].freeze
75
+
76
+ databases['blastx'] = databases['blastp']
77
+ dbdescs['blastx'] = dbdescs['blastp']
78
+ databases['tblastn'] = databases['blastn']
79
+ dbdescs['tblastn'] = dbdescs['blastn']
80
+ databases['tblastx'] = databases['blastn']
81
+ dbdescs['tblastx'] = dbdescs['blastn']
82
+
83
+ @databases = databases
84
+ @database_descriptions = dbdescs
85
+ @parse_databases = true
86
+ true
87
+ end
88
+ private :_parse_databases
89
+
90
+ end #module Information
91
+
92
+ extend Information
93
+
94
+ # executes BLAST and returns result as a string
95
+ def exec_ddbj(query)
96
+ options = make_command_line_options
97
+ opt = Bio::Blast::NCBIOptions.new(options)
98
+
99
+ # SOAP objects are cached
100
+ @ddbj_remote_blast ||= Bio::DDBJ::XML::Blast.new
101
+ #@ddbj_request_manager ||= Bio::DDBJ::XML::RequestManager.new
102
+ # always use REST version to prevent warning messages
103
+ @ddbj_request_manager ||= Bio::DDBJ::XML::RequestManager::REST.new
104
+
105
+ program = opt.delete('-p')
106
+ db = opt.delete('-d')
107
+ optstr = Bio::Command.make_command_line_unix(opt.options)
108
+
109
+ # using searchParamAsync
110
+ qid = @ddbj_remote_blast.searchParamAsync(program, db, query, optstr)
111
+ @output = qid
112
+
113
+ sleeptime = 2
114
+ flag = true
115
+ while flag
116
+ if $VERBOSE then
117
+ $stderr.puts "DDBJ BLAST: ID: #{qid} -- waitng #{sleeptime} sec."
118
+ end
119
+ sleep(sleeptime)
120
+
121
+ result = @ddbj_request_manager.getAsyncResult(qid)
122
+ case result.to_s
123
+ when /The search and analysis service by WWW is very busy now/
124
+ raise result.to_s.strip + '(Alternatively, wrong options may be given.)'
125
+ when /Your job has not completed yet/
126
+ sleeptime = 5
127
+ else
128
+ flag = false
129
+ end
130
+ end while flag
131
+
132
+ @output = result
133
+ return @output
134
+ end
135
+
136
+ end #module DDBJ
137
+
138
+ # for lazy load DDBJ module
139
+ Ddbj = DDBJ
140
+
141
+ end #module Bio::Blast::Remote
142
+
@@ -4,7 +4,7 @@
4
4
  # Copyright:: Copyright (C) 2003-2006 GOTO Naohisa <ng@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: format0.rb,v 1.25 2007/12/27 17:28:57 ngoto Exp $
7
+ # $Id:$
8
8
  #
9
9
  # == Description
10
10
  #
@@ -264,8 +264,8 @@ module Bio
264
264
  begin
265
265
  q << sc.scan(/.*/)
266
266
  sc.skip(/\s*^ ?/)
267
- end until !sc.rest or r = sc.skip(/ *\( *(\d+) *letters *\)\s*\z/)
268
- @query_len = sc[1].to_i if r
267
+ end until !sc.rest or r = sc.skip(/ *\( *([\,\d]+) *letters *\)\s*\z/)
268
+ @query_len = sc[1].delete(',').to_i if r
269
269
  @query_def = q.join(' ')
270
270
  end
271
271
  end
@@ -274,7 +274,7 @@ module Bio
274
274
  # Parses the first line of the BLAST result.
275
275
  def format0_parse_header
276
276
  unless defined?(@program)
277
- if /(\w+) +([\w\-\.\d]+) *\[ *([\-\.\w]+) *\] *(\[.+\])?/ =~ @f0header.to_s
277
+ if /([\-\w]+) +([\w\-\.\d]+) *\[ *([\-\.\w]+) *\] *(\[.+\])?/ =~ @f0header.to_s
278
278
  @program = $1
279
279
  @version = "#{$1} #{$2} [#{$3}]"
280
280
  @version_number = $2
@@ -292,7 +292,17 @@ module Bio
292
292
  @f0references.push data.shift
293
293
  end
294
294
  @f0query = data.shift
295
+ # In special case, a void line is inserted after query name.
296
+ if data[0] and /\A +\( *([\,\d]+) *letters *\)\s*\z/ =~ data[0] then
297
+ @f0query.concat "\n"
298
+ @f0query.concat data.shift
299
+ end
295
300
  @f0database = data.shift
301
+ # In special case, a void line is inserted after database name.
302
+ if data[0] and /\A +[\d\,]+ +sequences\; +[\d\,]+ total +letters\s*\z/ =~ data[0] then
303
+ @f0database.concat "\n"
304
+ @f0database.concat data.shift
305
+ end
296
306
  end
297
307
 
298
308
  # Splits the statistical parameters.
@@ -340,7 +350,7 @@ module Bio
340
350
  sc = StringScanner.new(str)
341
351
  sc.skip(/\s*/)
342
352
  while sc.rest?
343
- if sc.match?(/Number of sequences better than +([e\-\.\d]+) *\: *(.+)/) then
353
+ if sc.match?(/Number of sequences better than +([e\+\-\.\d]+) *\: *(.+)/) then
344
354
  ev = sc[1]
345
355
  ev = '1' + ev if ev[0] == ?e
346
356
  @expect = ev.to_f
@@ -364,7 +374,7 @@ module Bio
364
374
  parse_colon_separated_params(@hash, @f0params)
365
375
  #p @hash
366
376
  if val = @hash['Matrix'] then
367
- if /blastn *matrix *\: *([e\-\.\d]+) +([e\-\.\d]+)/ =~ val then
377
+ if /blastn *matrix *\: *([e\+\-\.\d]+) +([e\+\-\.\d]+)/ =~ val then
368
378
  @matrix = 'blastn'
369
379
  @sc_match = $1.to_i
370
380
  @sc_mismatch = $2.to_i
@@ -373,16 +383,16 @@ module Bio
373
383
  end
374
384
  end
375
385
  if val = @hash['Gap Penalties'] then
376
- if /Existence\: *([e\-\.\d]+)/ =~ val then
386
+ if /Existence\: *([e\+\-\.\d]+)/ =~ val then
377
387
  @gap_open = $1.to_i
378
388
  end
379
- if /Extension\: *([e\-\.\d]+)/ =~ val then
389
+ if /Extension\: *([e\+\-\.\d]+)/ =~ val then
380
390
  @gap_extend = $1.to_i
381
391
  end
382
392
  end
383
393
  #@db_num = @hash['Number of Sequences'] unless defined?(@db_num)
384
394
  #@db_len = @hash['length of database'] unless defined?(@db_len)
385
- if val = @hash['effective length of database'] then
395
+ if val = @hash['effective search space'] then
386
396
  @eff_space = val.tr(',', '').to_i
387
397
  end
388
398
  @parse_params = true
@@ -529,7 +539,7 @@ module Bio
529
539
  @hits << Hit.new(data)
530
540
  r = data.first
531
541
  break unless r
532
- if /^Significant alignments for pattern/ =~ r
542
+ while /^Significant alignments for pattern/ =~ r
533
543
  data.shift
534
544
  r = data.first
535
545
  end
@@ -584,9 +594,9 @@ module Bio
584
594
  @pattern_positions = []
585
595
  @f0message.each do |r|
586
596
  sc = StringScanner.new(r)
587
- if sc.skip_until(/^ *pattern +(.+)$/) then
597
+ if sc.skip_until(/^ *pattern +([^\s]+)/) then
588
598
  @pattern = sc[1] unless @pattern
589
- sc.skip_until(/^ at position +(\d+)/)
599
+ sc.skip_until(/(?:^ *| +)at position +(\d+) +of +query +sequence/)
590
600
  @pattern_positions << sc[1].to_i
591
601
  end
592
602
  end
@@ -711,19 +721,19 @@ module Bio
711
721
  sc.skip(/ */)
712
722
  end
713
723
  sc.skip(/\s*/)
714
- while r = sc.scan(/[e\.\-\d]+/)
724
+ while r = sc.scan(/[e\+\-\.\d]+/)
715
725
  #p r
716
726
  h[s0.shift] = r
717
727
  sc.skip(/ */)
718
728
  end
719
729
  if gapped then
720
- @gapped_lambda = h['Lambda']
721
- @gapped_kappa = h['K']
722
- @gapped_entropy = h['H']
730
+ @gapped_lambda = (v = h['Lambda']) ? v.to_f : nil
731
+ @gapped_kappa = (v = h['K']) ? v.to_f : nil
732
+ @gapped_entropy = (v = h['H']) ? v.to_f : nil
723
733
  else
724
- @lambda = h['Lambda']
725
- @kappa = h['K']
726
- @entropy = h['H']
734
+ @lambda = (v = h['Lambda']) ? v.to_f : nil
735
+ @kappa = (v = h['K']) ? v.to_f : nil
736
+ @entropy = (v = h['H']) ? v.to_f : nil
727
737
  end
728
738
  end #each
729
739
  @parse_stat = true
@@ -861,7 +871,7 @@ module Bio
861
871
  d << sc.scan(/.*/)
862
872
  sc.skip(/\s*/)
863
873
  end until !sc.rest? or r = sc.skip(/ *Length *\= *([\,\d]+)\s*\z/)
864
- @len = (r ? sc[1].to_i : nil)
874
+ @len = (r ? sc[1].delete(',').to_i : nil)
865
875
  @definition = d.join(" ")
866
876
  @parse_hitname = true
867
877
  end
@@ -968,11 +978,11 @@ module Bio
968
978
  sc = StringScanner.new(@f0score)
969
979
  while sc.rest?
970
980
  sc.skip(/\s*/)
971
- if sc.skip(/Expect(?:\(\d\))? *\= *([e\-\.\d]+)/) then
981
+ if sc.skip(/Expect(?:\(\d+\))? *\= *([e\+\-\.\d]+)/) then
972
982
  ev = sc[1].to_s
973
983
  ev = '1' + ev if ev[0] == ?e
974
984
  @evalue = ev.to_f
975
- elsif sc.skip(/Score *\= *([e\-\.\d]+) *bits *\( *([e\-\.\d]+) *\)/) then
985
+ elsif sc.skip(/Score *\= *([e\+\-\.\d]+) *bits *\( *([e\+\-\.\d]+) *\)/) then
976
986
  bs = sc[1]
977
987
  bs = '1' + bs if bs[0] == ?e
978
988
  @bit_score = bs.to_f
@@ -1016,19 +1026,19 @@ module Bio
1016
1026
  if sc[2] then
1017
1027
  @hit_frame = sc[3].to_i
1018
1028
  end
1019
- elsif sc.skip(/Score *\= *([e\-\.\d]+) +\(([e\-\.\d]+) *bits *\)/) then
1029
+ elsif sc.skip(/Score *\= *([e\+\-\.\d]+) +\(([e\+\-\.\d]+) *bits *\)/) then
1020
1030
  #WU-BLAST
1021
1031
  @score = sc[1].to_i
1022
1032
  bs = sc[2]
1023
1033
  bs = '1' + bs if bs[0] == ?e
1024
1034
  @bit_score = bs.to_f
1025
- elsif sc.skip(/P *\= * ([e\-\.\d]+)/) then
1035
+ elsif sc.skip(/P *\= * ([e\+\-\.\d]+)/) then
1026
1036
  #WU-BLAST
1027
1037
  @p_sum_n = nil
1028
1038
  pv = sc[1]
1029
1039
  pv = '1' + pv if pv[0] == ?e
1030
1040
  @pvalue = pv.to_f
1031
- elsif sc.skip(/Sum +P *\( *(\d+) *\) *\= *([e\-\.\d]+)/) then
1041
+ elsif sc.skip(/Sum +P *\( *(\d+) *\) *\= *([e\+\-\.\d]+)/) then
1032
1042
  #WU-BLAST
1033
1043
  @p_sum_n = sc[1].to_i
1034
1044
  pv = sc[2]
@@ -4,7 +4,7 @@
4
4
  # Copyright:: Copyright (C) 2002, 2003, 2007 Toshiaki Katayama <k@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: format8.rb,v 1.8 2007/12/14 16:15:20 k Exp $
7
+ # $Id:$
8
8
  #
9
9
  # == Note
10
10
  #
@@ -27,7 +27,7 @@ module Bio
27
27
  hit_num = 1
28
28
  hsp_num = 1
29
29
  hit = ''
30
- data.each do |line|
30
+ data.each_line do |line|
31
31
  ary = line.chomp.split("\t")
32
32
  query_id, target_id, hsp = tab_parse_hsp(ary)
33
33
  if query_prev != query_id or target_prev != target_id
@@ -0,0 +1,263 @@
1
+ #
2
+ # = bio/appl/blast/genomenet.rb - Remote BLAST wrapper using GenomeNet
3
+ #
4
+ # Copyright:: Copyright (C) 2001,2008 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # Copyright:: Copyright (C) 2002,2003 Toshiaki Katayama <k@bioruby.org>
6
+ # Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
7
+ # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
8
+ # License:: The Ruby License
9
+ #
10
+ # $Id:$
11
+ #
12
+
13
+ require 'net/http'
14
+ require 'uri'
15
+ require 'bio/command'
16
+ require 'shellwords'
17
+ require 'bio/appl/blast/remote'
18
+
19
+ module Bio::Blast::Remote
20
+
21
+ # == Description
22
+ #
23
+ # The Bio::Blast::Remote::GenomeNet class contains methods for running
24
+ # remote BLAST searches on GenomeNet (http://blast.genome.jp/).
25
+ #
26
+ # == Usage
27
+ #
28
+ # require 'bio'
29
+ #
30
+ # # To run an actual BLAST analysis:
31
+ # # 1. create a BLAST factory
32
+ # blast_factory = Bio::Blast.remote('blastp', 'nr-aa',
33
+ # '-e 0.0001', 'genomenet')
34
+ # #or:
35
+ # blast_factory = Bio::Blast::Remote.genomenet('blastp', 'nr-aa',
36
+ # '-e 0.0001')
37
+ #
38
+ # # 2. run the actual BLAST by querying the factory
39
+ # report = blast_factory.query(sequence_text)
40
+ #
41
+ # # Then, to parse the report, see Bio::Blast::Report
42
+ #
43
+ # === Available databases for Bio::Blast::Remote::GenomeNet
44
+ #
45
+ # Up-to-date available databases can be obtained by using
46
+ # Bio::Blast::Remote::GenomeNet.databases(program).
47
+ # Short descriptions of databases
48
+ #
49
+ # ----------+-------+---------------------------------------------------
50
+ # program | query | db (supported in GenomeNet)
51
+ # ----------+-------+---------------------------------------------------
52
+ # blastp | AA | nr-aa, genes, vgenes.pep, swissprot, swissprot-upd,
53
+ # ----------+-------+ pir, prf, pdbstr
54
+ # blastx | NA |
55
+ # ----------+-------+---------------------------------------------------
56
+ # blastn | NA | nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss,
57
+ # ----------+-------+ htgs, dbsts, embl-nonst, embnonst-upd, epd,
58
+ # tblastn | AA | genes-nt, genome, vgenes.nuc
59
+ # ----------+-------+---------------------------------------------------
60
+ #
61
+ # == See also
62
+ #
63
+ # * Bio::Blast
64
+ # * Bio::Blast::Report
65
+ # * Bio::Blast::Report::Hit
66
+ # * Bio::Blast::Report::Hsp
67
+ #
68
+ # == References
69
+ #
70
+ # * http://www.ncbi.nlm.nih.gov/blast/
71
+ # * http://www.ncbi.nlm.nih.gov/Education/BLASTinfo/similarity.html
72
+ # * http://blast.genome.jp/ideas/ideas.html#blast
73
+ #
74
+ module GenomeNet
75
+
76
+ Host = "blast.genome.jp".freeze
77
+
78
+ # Creates a remote BLAST factory using GenomeNet.
79
+ # Returns Bio::Blast object.
80
+ #
81
+ # Note for future improvement: In the future, it might return
82
+ # Bio::Blast::Remote::GenomeNet or other object.
83
+ #
84
+ def self.new(program, db, options = [])
85
+ Bio::Blast.new(program, db, options, 'genomenet')
86
+ end
87
+
88
+ # Information for GenomeNet BLAST search.
89
+ module Information
90
+
91
+ include Bio::Blast::Remote::Information
92
+
93
+ # gets information from remote host and parses database information
94
+ def _parse_databases
95
+ if defined? @parse_databases
96
+ return nil if @parse_databases
97
+ end
98
+ databases = {}
99
+ dbdescs = {}
100
+ key = nil
101
+ host = Bio::Blast::Remote::Genomenet::Host
102
+ http = Bio::Command.new_http(host)
103
+ result = http.get('/')
104
+ #p result.body
105
+ result.body.each_line do |line|
106
+ case line
107
+ when /\"set\_dbtype\(this\.form\,\'(prot|nucl)\'\)\"/
108
+ key = $1
109
+ databases[key] ||= []
110
+ dbdescs[key] ||= {}
111
+ when /\<input *type\=\"radio\" *name\=\"dbname\" *value\=\"([^\"]+)\"[^\>]*\>([^\<\>]+)/
112
+ db = $1.freeze
113
+ desc = $2.strip.freeze
114
+ databases[key].push db
115
+ dbdescs[key][db] = desc
116
+ end
117
+ end
118
+
119
+ # mine-aa and mine-nt should be removed
120
+ [ 'prot', 'nucl' ].each do |mol|
121
+ ary = databases[mol] || []
122
+ hash = dbdescs[mol] || {}
123
+ [ 'mine-aa', 'mine-nt' ].each do |k|
124
+ ary.delete(k)
125
+ hash.delete(k)
126
+ end
127
+ databases[mol] = ary.freeze
128
+ dbdescs[mol] = hash
129
+ end
130
+
131
+ [ databases, dbdescs ].each do |h|
132
+ prot = h['prot']
133
+ nucl = h['nucl']
134
+ h.delete('prot')
135
+ h.delete('nucl')
136
+ h['blastp'] = prot
137
+ h['blastx'] = prot
138
+ h['blastn'] = nucl
139
+ h['tblastn'] = nucl
140
+ h['tblastx'] = nucl
141
+ end
142
+
143
+ @databases = databases
144
+ @database_descriptions = dbdescs
145
+ @parse_databases = true
146
+ true
147
+ end
148
+ private :_parse_databases
149
+
150
+ end #module Information
151
+
152
+ extend Information
153
+
154
+ private
155
+
156
+ # executes BLAST and returns result as a string
157
+ def exec_genomenet(query)
158
+ host = Host
159
+ #host = "blast.genome.jp"
160
+ #path = "/sit-bin/nph-blast"
161
+ path = "/sit-bin/blast" #2005.08.12
162
+
163
+ options = make_command_line_options
164
+ opt = Bio::Blast::NCBIOptions.new(options)
165
+
166
+ program = opt.delete('-p')
167
+ db = opt.delete('-d')
168
+
169
+ matrix = opt.delete('-M') || 'blosum62'
170
+ filter = opt.delete('-F') || 'T'
171
+
172
+ opt_V = opt.delete('-V') || 500 # default value for GenomeNet
173
+ opt_B = opt.delete('-B') || 250 # default value for GenomeNet
174
+
175
+ # format, not for form parameters, but included in option string
176
+ opt_m = opt.get('-m') || '7' # default of BioRuby GenomeNet factory
177
+ opt.set('-m', opt_m)
178
+
179
+ optstr = Bio::Command.make_command_line_unix(opt.options)
180
+
181
+ form = {
182
+ 'style' => 'raw',
183
+ 'prog' => program,
184
+ 'dbname' => db,
185
+ 'sequence' => query,
186
+ 'other_param' => optstr,
187
+ 'matrix' => matrix,
188
+ 'filter' => filter,
189
+ 'V_value' => opt_V,
190
+ 'B_value' => opt_B,
191
+ 'alignment_view' => 0,
192
+ }
193
+
194
+ form.keys.each do |k|
195
+ form.delete(k) unless form[k]
196
+ end
197
+
198
+ begin
199
+ http = Bio::Command.new_http(host)
200
+ http.open_timeout = 300
201
+ http.read_timeout = 600
202
+ result = Bio::Command.http_post_form(http, path, form)
203
+ @output = result.body
204
+
205
+ # workaround 2008.8.13
206
+ if result.code == '302' then
207
+ newuri = URI.parse(result['location'])
208
+ newpath = newuri.path
209
+ result = http.get(newpath)
210
+ @output = result.body
211
+ # waiting for BLAST finished
212
+ while /Your job ID is/ =~ @output and
213
+ /Your result will be displayed here\<br\>/ =~ @output
214
+ if /This page will be reloaded automatically in\s*((\d+)\s*min\.)?\s*(\d+)\s*sec\./ =~ @output then
215
+ reloadtime = $2.to_i * 60 + $3.to_i
216
+ reloadtime = 300 if reloadtime > 300
217
+ reloadtime = 1 if reloadtime < 1
218
+ else
219
+ reloadtime = 5
220
+ end
221
+ if $VERBOSE then
222
+ $stderr.puts "waiting #{reloadtime} sec to reload #{newuri.to_s}"
223
+ end
224
+ sleep(reloadtime)
225
+ result = http.get(newpath)
226
+ @output = result.body
227
+ end
228
+ end
229
+
230
+ # workaround 2005.08.12
231
+ if /\<A +HREF=\"(http\:\/\/blast\.genome\.jp(\/tmp\/[^\"]+))\"\>Show all result\<\/A\>/i =~ @output.to_s then
232
+ result = http.get($2)
233
+ @output = result.body
234
+ txt = @output.to_s.split(/\<pre\>/)[1]
235
+ raise 'cannot understand response' unless txt
236
+ txt.sub!(/\<\/pre\>.*\z/m, '')
237
+ txt.sub!(/.*^ \-{20,}\s*/m, '')
238
+ @output = txt.gsub(/\&lt\;/, '<')
239
+ else
240
+ raise 'cannot understand response'
241
+ end
242
+ end
243
+
244
+ # for -m 0 (NCBI BLAST default) output, html tags are removed.
245
+ if opt_m.to_i == 0 then
246
+ #@output_bak = @output
247
+ txt = @output.gsub(/^\s*\<img +src\=\"\/Fig\/arrow\_top\.gif\"\>.+$\r?\n/, '')
248
+ txt.gsub!(/^.+\<\/form\>$/, '')
249
+ txt.gsub!(/^\<form *method\=\"POST\" name\=\"clust\_check\"\>.+$\r?\n/, '')
250
+ txt.gsub!(/\<[^\>\<]+\>/m, '')
251
+ @output = txt
252
+ end
253
+
254
+ return @output
255
+ end
256
+
257
+ end # class GenomeNet
258
+
259
+ # alias for lazy load
260
+ Genomenet = GenomeNet
261
+
262
+ end # module Bio::Blast::Remote
263
+