bio 1.2.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -2,10 +2,11 @@
2
2
  # = bio/location.rb - Locations/Location class (GenBank location format)
3
3
  #
4
4
  # Copyright:: Copyright (C) 2001, 2005 Toshiaki Katayama <k@bioruby.org>
5
- # Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
5
+ # 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
6
+ # 2008 Naohisa Goto <ng@bioruby.org>
6
7
  # License:: The Ruby License
7
8
  #
8
- # $Id: location.rb,v 0.28 2007/04/05 23:35:39 trevor Exp $
9
+ # $Id:$
9
10
  #
10
11
 
11
12
  module Bio
@@ -69,7 +70,8 @@ class Location
69
70
  when /^[<>]?(\d+)\^[<>]?(\d+)$/ # (C, I) n^m
70
71
  s = $1.to_i
71
72
  e = $2.to_i
72
- if e - s != 1
73
+ carat = true
74
+ if e - s != 1 or e != 1 # assert n^n+1 or n^1
73
75
  # raise "Error: invalid range : #{location}"
74
76
  $stderr.puts "[Warning] invalid range : #{location}" if $DEBUG
75
77
  end
@@ -90,11 +92,38 @@ class Location
90
92
  @lt = lt # true if the position contains '<'
91
93
  @gt = gt # true if the position contains '>'
92
94
  @xref_id = xref_id # link to the external entry as GenBank ID
95
+ @carat = carat # true if the location indicates the site
96
+ # between two adjoining nucleotides
93
97
  end
94
98
 
95
- attr_accessor :from, :to, :strand, :sequence, :lt, :gt, :xref_id
99
+ # (Integer) start position of the location
100
+ attr_accessor :from
101
+ # (Integer) end position of the location
102
+ attr_accessor :to
96
103
 
97
- # Complements the sequence (i.e. alternates the strand).
104
+ # (Integer) strand direction of the location
105
+ # (forward => 1 or complement => -1)
106
+ attr_accessor :strand
107
+
108
+ # (String) literal sequence of the location
109
+ attr_accessor :sequence
110
+
111
+ # (true, false or nil) true if the position contains '<'
112
+ attr_accessor :lt
113
+
114
+ # (true, false or nil) true if the position contains '>'
115
+ attr_accessor :gt
116
+
117
+ # (String) link to the external entry as GenBank ID
118
+ attr_accessor :xref_id
119
+
120
+ # (true, false or nil) true if the location indicates the site
121
+ # between two adjoining nucleotides
122
+ attr_accessor :carat
123
+
124
+ # Complements the sequence location (i.e. alternates the strand).
125
+ # Note that it is destructive method (i.e. modifies itself),
126
+ # but it does not modify the "sequence" attribute.
98
127
  # ---
99
128
  # *Returns*:: the Bio::Location object
100
129
  def complement
@@ -293,6 +322,7 @@ class Locations
293
322
  # * (required) _str_: GenBank style position string
294
323
  # *Returns*:: Bio::Locations object
295
324
  def initialize(position)
325
+ @operator = nil
296
326
  if position.is_a? Array
297
327
  @locations = position
298
328
  else
@@ -301,9 +331,13 @@ class Locations
301
331
  end
302
332
  end
303
333
 
304
- # An Array of Bio::Location objects
334
+ # (Array) An Array of Bio::Location objects
305
335
  attr_accessor :locations
306
336
 
337
+ # (Symbol or nil) Operator.
338
+ # nil (means :join), :order, or :group (obsolete).
339
+ attr_accessor :operator
340
+
307
341
  # Evaluate equality of Bio::Locations object.
308
342
  def equals?(other)
309
343
  if ! other.kind_of?(Bio::Locations)
@@ -427,13 +461,71 @@ class Locations
427
461
  end
428
462
 
429
463
 
464
+ # String representation.
465
+ #
466
+ # Note: In some cases, it fails to detect whether
467
+ # "complement(join(...))" or "join(complement(..))", and whether
468
+ # "complement(order(...))" or "order(complement(..))".
469
+ #
470
+ # ---
471
+ # *Returns*:: String
472
+ def to_s
473
+ return '' if @locations.empty?
474
+ complement_join = false
475
+ locs = @locations
476
+ if locs.size >= 2 and locs.inject(true) do |flag, loc|
477
+ # check if each location is complement
478
+ (flag && (loc.strand == -1) && !loc.xref_id)
479
+ end and locs.inject(locs[0].from) do |pos, loc|
480
+ if pos then
481
+ (pos >= loc.from) ? loc.from : false
482
+ else
483
+ false
484
+ end
485
+ end then
486
+ locs = locs.reverse
487
+ complement_join = true
488
+ end
489
+ locs = locs.collect do |loc|
490
+ lt = loc.lt ? '<' : ''
491
+ gt = loc.gt ? '>' : ''
492
+ str = if loc.from == loc.to then
493
+ "#{lt}#{gt}#{loc.from.to_i}"
494
+ elsif loc.carat then
495
+ "#{lt}#{loc.from.to_i}^#{gt}#{loc.to.to_i}"
496
+ else
497
+ "#{lt}#{loc.from.to_i}..#{gt}#{loc.to.to_i}"
498
+ end
499
+ if loc.xref_id and !loc.xref_id.empty? then
500
+ str = "#{loc.xref_id}:#{str}"
501
+ end
502
+ if loc.strand == -1 and !complement_join then
503
+ str = "complement(#{str})"
504
+ end
505
+ if loc.sequence then
506
+ str = "replace(#{str},\"#{loc.sequence}\")"
507
+ end
508
+ str
509
+ end
510
+ if locs.size >= 2 then
511
+ op = (self.operator || 'join').to_s
512
+ result = "#{op}(#{locs.join(',')})"
513
+ else
514
+ result = locs[0]
515
+ end
516
+ if complement_join then
517
+ result = "complement(#{result})"
518
+ end
519
+ result
520
+ end
521
+
430
522
  private
431
523
 
432
524
 
433
525
  # Preprocessing to clean up the position notation.
434
526
  def gbl_cleanup(position)
435
527
  # sometimes position contains white spaces...
436
- position.gsub!(/\s+/, '')
528
+ position = position.gsub(/\s+/, '')
437
529
 
438
530
  # select one base # (D) n.m
439
531
  # .. n m :
@@ -456,8 +548,8 @@ class Locations
456
548
  end
457
549
  end
458
550
 
459
- # substitute order(), group() by join() # (F) group(), order()
460
- position.gsub!(/(order|group)/, 'join')
551
+ ## substitute order(), group() by join() # (F) group(), order()
552
+ #position.gsub!(/(order|group)/, 'join')
461
553
 
462
554
  return position
463
555
  end
@@ -469,8 +561,11 @@ class Locations
469
561
 
470
562
  case position
471
563
 
472
- when /^join\((.*)\)$/ # (F) join()
473
- position = $1
564
+ when /^(join|order|group)\((.*)\)$/ # (F) join()
565
+ if $1 != "join" then
566
+ @operator = $1.intern
567
+ end
568
+ position = $2
474
569
 
475
570
  join_list = [] # sub positions to join
476
571
  bracket = [] # position with bracket
@@ -6,7 +6,7 @@
6
6
  # Shuichi Kawashima <shuichi@hgc.jp>
7
7
  # License:: The Ruby License
8
8
  #
9
- # $Id: pathway.rb,v 1.36 2007/04/05 23:35:39 trevor Exp $
9
+ # $Id:$
10
10
  #
11
11
 
12
12
  require 'matrix'
@@ -184,6 +184,15 @@ class Pathway
184
184
  # object. If the first argument was assigned, the matrix will be
185
185
  # filled with the given value. The second argument indicates the
186
186
  # value of the diagonal constituents of the matrix besides the above.
187
+ #
188
+ # The result of this method depends on the order of Hash#each
189
+ # (and each_key, etc.), which may be variable with Ruby version
190
+ # and Ruby interpreter variations (JRuby, etc.).
191
+ # For a workaround to remove such dependency, you can use @index
192
+ # to set order of Hash keys. Note that this behavior might be
193
+ # changed in the future. Be careful that @index is overwritten by
194
+ # this method.
195
+ #
187
196
  def to_matrix(default_value = nil, diagonal_value = nil)
188
197
 
189
198
  #--
@@ -206,9 +215,31 @@ class Pathway
206
215
  end
207
216
  end
208
217
 
209
- # assign index number for each node
210
- @graph.keys.each_with_index do |k, i|
211
- @index[k] = i
218
+ # assign index number
219
+ if @index.empty? then
220
+ # assign index number for each node
221
+ @graph.keys.each_with_index do |k, i|
222
+ @index[k] = i
223
+ end
224
+ else
225
+ # begin workaround removing depencency to order of Hash#each
226
+ # assign index number from the preset @index
227
+ indices = @index.to_a
228
+ indices.sort! { |i0, i1| i0[1] <=> i1[1] }
229
+ indices.collect! { |i0| i0[0] }
230
+ @index.clear
231
+ v = 0
232
+ indices.each do |k, i|
233
+ if @graph[k] and !@index[k] then
234
+ @index[k] = v; v += 1
235
+ end
236
+ end
237
+ @graph.each_key do |k|
238
+ unless @index[k] then
239
+ @index[k] = v; v += 1
240
+ end
241
+ end
242
+ # end workaround removing depencency to order of Hash#each
212
243
  end
213
244
 
214
245
  if @relations.empty? # only used after clear_relations!
@@ -236,6 +267,10 @@ class Pathway
236
267
  # The dump_matrix method accepts the same arguments as to_matrix.
237
268
  # Useful when you want to check the internal state of the matrix
238
269
  # (for debug purpose etc.) easily.
270
+ #
271
+ # This method internally calls to_matrix method.
272
+ # Read documents of to_matrix for important informations.
273
+ #
239
274
  def dump_matrix(*arg)
240
275
  matrix = self.to_matrix(*arg)
241
276
  sorted = @index.sort {|a,b| a[1] <=> b[1]}
@@ -245,13 +280,43 @@ class Pathway
245
280
 
246
281
  # Pretty printer of the adjacency list.
247
282
  #
248
- # The dump_matrix method accepts the same arguments as to_matrix.
249
283
  # Useful when you want to check the internal state of the adjacency
250
284
  # list (for debug purpose etc.) easily.
285
+ #
286
+ # The result of this method depends on the order of Hash#each
287
+ # (and each_key, etc.), which may be variable with Ruby version
288
+ # and Ruby interpreter variations (JRuby, etc.).
289
+ # For a workaround to remove such dependency, you can use @index
290
+ # to set order of Hash keys. Note that this behavior might be
291
+ # changed in the future.
292
+ #
251
293
  def dump_list
294
+ # begin workaround removing depencency to order of Hash#each
295
+ if @index.empty? then
296
+ pref = nil
297
+ enum = @graph
298
+ else
299
+ pref = {}.merge(@index)
300
+ i = pref.values.max
301
+ @graph.each_key do |node|
302
+ pref[node] ||= (i += 1)
303
+ end
304
+ graph_to_a = @graph.to_a
305
+ graph_to_a.sort! { |x, y| pref[x[0]] <=> pref[y[0]] }
306
+ enum = graph_to_a
307
+ end
308
+ # end workaround removing depencency to order of Hash#each
309
+
252
310
  list = ""
253
- @graph.each do |from, hash|
311
+ enum.each do |from, hash|
254
312
  list << "#{from} => "
313
+ # begin workaround removing depencency to order of Hash#each
314
+ if pref then
315
+ ary = hash.to_a
316
+ ary.sort! { |x,y| pref[x[0]] <=> pref[y[0]] }
317
+ hash = ary
318
+ end
319
+ # end workaround removing depencency to order of Hash#each
255
320
  a = []
256
321
  hash.each do |to, relation|
257
322
  a.push("#{to} (#{relation})")
@@ -285,6 +350,7 @@ class Pathway
285
350
  sub_graph = Pathway.new([], @undirected)
286
351
  @graph.each do |from, hash|
287
352
  next unless @label[from]
353
+ sub_graph.graph[from] ||= {}
288
354
  hash.each do |to, relation|
289
355
  next unless @label[to]
290
356
  sub_graph.append(Relation.new(from, to, relation))
@@ -310,14 +376,23 @@ class Pathway
310
376
  #
311
377
  # Calculates the value of cliquishness around the 'node'. This value
312
378
  # indicates completeness of the edge density among the surrounded nodes.
379
+ #
380
+ # Note: cliquishness (clustering coefficient) for a directed graph
381
+ # is also calculated.
382
+ # Reference: http://en.wikipedia.org/wiki/Clustering_coefficient
383
+ #
384
+ # Note: Cliquishness (clustering coefficient) for a node that has
385
+ # only one neighbor node is undefined. Currently, it returns NaN,
386
+ # but the behavior may be changed in the future.
387
+ #
313
388
  def cliquishness(node)
314
389
  neighbors = @graph[node].keys
315
390
  sg = subgraph(neighbors)
316
391
  if sg.graph.size != 0
317
- edges = sg.edges / 2.0
318
- nodes = sg.nodes
319
- complete = (nodes * (nodes - 1)) / 2.0
320
- return edges/complete
392
+ edges = sg.edges
393
+ nodes = neighbors.size
394
+ complete = (nodes * (nodes - 1))
395
+ return edges.quo(complete)
321
396
  else
322
397
  return 0.0
323
398
  end
@@ -396,6 +471,13 @@ class Pathway
396
471
  # of the search.
397
472
  #
398
473
  # The weight of the edges are not considered in this method.
474
+ #
475
+ # Note: The result of this method depends on the order of Hash#each
476
+ # (and each_key, etc.), which may be variable with Ruby version
477
+ # and Ruby interpreter variations (JRuby, etc.).
478
+ # For a workaround to remove such dependency, you can use @index
479
+ # to set order of Hash keys. Note that this bahavior might be
480
+ # changed in the future.
399
481
  def depth_first_search
400
482
  visited = {}
401
483
  timestamp = {}
@@ -405,10 +487,28 @@ class Pathway
405
487
  forward_edges = {}
406
488
  count = 0
407
489
 
490
+ # begin workaround removing depencency to order of Hash#each
491
+ if @index.empty? then
492
+ preference_of_nodes = nil
493
+ else
494
+ preference_of_nodes = {}.merge(@index)
495
+ i = preference_of_nodes.values.max
496
+ @graph.each_key do |node0|
497
+ preference_of_nodes[node0] ||= (i += 1)
498
+ end
499
+ end
500
+ # end workaround removing depencency to order of Hash#each
501
+
408
502
  dfs_visit = Proc.new { |from|
409
503
  visited[from] = true
410
504
  timestamp[from] = [count += 1]
411
- @graph[from].each_key do |to|
505
+ ary = @graph[from].keys
506
+ # begin workaround removing depencency to order of Hash#each
507
+ if preference_of_nodes then
508
+ ary = ary.sort_by { |node0| preference_of_nodes[node0] }
509
+ end
510
+ # end workaround removing depencency to order of Hash#each
511
+ ary.each do |to|
412
512
  if visited[to]
413
513
  if timestamp[to].size > 1
414
514
  if timestamp[from].first < timestamp[to].first
@@ -435,7 +535,13 @@ class Pathway
435
535
  timestamp[from].push(count += 1)
436
536
  }
437
537
 
438
- @graph.each_key do |node|
538
+ ary = @graph.keys
539
+ # begin workaround removing depencency to order of Hash#each
540
+ if preference_of_nodes then
541
+ ary = ary.sort_by { |node0| preference_of_nodes[node0] }
542
+ end
543
+ # end workaround removing depencency to order of Hash#each
544
+ ary.each do |node|
439
545
  unless visited[node]
440
546
  dfs_visit.call(node)
441
547
  end
@@ -543,8 +649,8 @@ class Pathway
543
649
  end
544
650
  end
545
651
  end
546
- index.sort{|x, y| y<=>x}.each do |i|
547
- rel[i, 1] = []
652
+ index.sort{|x, y| y<=>x}.each do |idx|
653
+ rel[idx, 1] = []
548
654
  end
549
655
  mst = []
550
656
  seen = Hash.new()
@@ -1,14 +1,17 @@
1
1
  #
2
2
  # = bio/reference.rb - Journal reference classes
3
3
  #
4
- # Copyright:: Copyright (C) 2001, 2006
4
+ # Copyright:: Copyright (C) 2001, 2006, 2008
5
5
  # Toshiaki Katayama <k@bioruby.org>,
6
- # Ryan Raaum <ryan@raaum.org>
6
+ # Ryan Raaum <ryan@raaum.org>,
7
+ # Jan Aerts <jandot@bioruby.org>
7
8
  # License:: The Ruby License
8
9
  #
9
- # $Id: reference.rb,v 1.24 2007/04/05 23:35:39 trevor Exp $
10
+ # $Id:$
10
11
  #
11
12
 
13
+ require 'enumerator'
14
+
12
15
  module Bio
13
16
 
14
17
  # = DESCRIPTION
@@ -66,6 +69,9 @@ module Bio
66
69
 
67
70
  # medline identifier (typically Fixnum)
68
71
  attr_reader :medline
72
+
73
+ # DOI identifier (typically String, e.g. "10.1126/science.1110418")
74
+ attr_reader :doi
69
75
 
70
76
  # Abstract text in String.
71
77
  attr_reader :abstract
@@ -78,6 +84,15 @@ module Bio
78
84
 
79
85
  # Affiliations in an Array.
80
86
  attr_reader :affiliations
87
+
88
+ # Sequence number in EMBL/GenBank records
89
+ attr_reader :embl_gb_record_number
90
+
91
+ # Position in a sequence that this reference refers to
92
+ attr_reader :sequence_position
93
+
94
+ # Comments for the reference (typically Array of String, or nil)
95
+ attr_reader :comments
81
96
 
82
97
  # Create a new Bio::Reference object from a Hash of values.
83
98
  # Data is extracted from the values for keys:
@@ -116,23 +131,23 @@ module Bio
116
131
  # * (required) _hash_: Hash
117
132
  # *Returns*:: Bio::Reference object
118
133
  def initialize(hash)
119
- hash.default = ''
120
- @authors = hash['authors'] # [ "Hoge, J.P.", "Fuga, F.B." ]
121
- @title = hash['title'] # "Title of the study."
122
- @journal = hash['journal'] # "Theor. J. Hoge"
123
- @volume = hash['volume'] # 12
124
- @issue = hash['issue'] # 3
125
- @pages = hash['pages'] # 123-145
126
- @year = hash['year'] # 2001
127
- @pubmed = hash['pubmed'] # 12345678
128
- @medline = hash['medline'] # 98765432
129
- @abstract = hash['abstract']
134
+ @authors = hash['authors'] || [] # [ "Hoge, J.P.", "Fuga, F.B." ]
135
+ @title = hash['title'] || '' # "Title of the study."
136
+ @journal = hash['journal'] || '' # "Theor. J. Hoge"
137
+ @volume = hash['volume'] || '' # 12
138
+ @issue = hash['issue'] || '' # 3
139
+ @pages = hash['pages'] || '' # 123-145
140
+ @year = hash['year'] || '' # 2001
141
+ @pubmed = hash['pubmed'] || '' # 12345678
142
+ @medline = hash['medline'] || '' # 98765432
143
+ @doi = hash['doi']
144
+ @abstract = hash['abstract'] || ''
130
145
  @url = hash['url']
131
- @mesh = hash['mesh']
132
- @affiliations = hash['affiliations']
133
- @authors = [] if @authors.empty?
134
- @mesh = [] if @mesh.empty?
135
- @affiliations = [] if @affiliations.empty?
146
+ @mesh = hash['mesh'] || []
147
+ @embl_gb_record_number = hash['embl_gb_record_number'] || nil
148
+ @sequence_position = hash['sequence_position'] || nil
149
+ @comments = hash['comments']
150
+ @affiliations = hash['affiliations'] || []
136
151
  end
137
152
 
138
153
  # Formats the reference in a given style.
@@ -166,20 +181,22 @@ module Bio
166
181
  # ---
167
182
  # *Arguments*:
168
183
  # * (optional) _style_: String with style identifier
169
- # * (optional) _option_: Option for styles accepting one
184
+ # * (optional) _options_: Options for styles accepting one
170
185
  # *Returns*:: String
171
- def format(style = nil, option = nil)
186
+ def format(style = nil, *options)
172
187
  case style
188
+ when 'embl'
189
+ return embl
173
190
  when 'endnote'
174
191
  return endnote
175
192
  when 'bibitem'
176
- return bibitem(option)
193
+ return bibitem(*options)
177
194
  when 'bibtex'
178
- return bibtex(option)
195
+ return bibtex(*options)
179
196
  when 'rd'
180
- return rd(option)
197
+ return rd(*options)
181
198
  when /^nature$/i
182
- return nature(option)
199
+ return nature(*options)
183
200
  when /^science$/i
184
201
  return science
185
202
  when /^genome\s*_*biol/i
@@ -231,12 +248,8 @@ module Bio
231
248
  lines << "%N #{@issue}" unless @issue.to_s.empty?
232
249
  lines << "%P #{@pages}" unless @pages.empty?
233
250
  lines << "%M #{@pubmed}" unless @pubmed.to_s.empty?
234
- if @pubmed
235
- cgi = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi"
236
- opts = "cmd=Retrieve&db=PubMed&dopt=Citation&list_uids"
237
- @url = "#{cgi}?#{opts}=#{@pubmed}"
238
- end
239
- lines << "%U #{@url}" unless @url.empty?
251
+ u = @url.empty? ? pubmed_url : @url
252
+ lines << "%U #{u}" unless u.empty?
240
253
  lines << "%X #{@abstract}" unless @abstract.empty?
241
254
  @mesh.each do |term|
242
255
  lines << "%K #{term}"
@@ -245,6 +258,24 @@ module Bio
245
258
  return lines.join("\n")
246
259
  end
247
260
 
261
+ # Returns reference formatted in the EMBL style.
262
+ #
263
+ # # ref is a Bio::Reference object
264
+ # puts ref.embl
265
+ #
266
+ # RP 1-1859
267
+ # RX PUBMED; 1907511.
268
+ # RA Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.;
269
+ # RT "Nucleotide and derived amino acid sequence of the cyanogenic
270
+ # RT beta-glucosidase (linamarase) from white clover (Trifolium repens L.)";
271
+ # RL Plant Mol. Biol. 17(2):209-219(1991).
272
+ def embl
273
+ r = self
274
+ Bio::Sequence::Format::NucFormatter::Embl.new('').instance_eval {
275
+ reference_format_embl(r)
276
+ }
277
+ end
278
+
248
279
  # Returns reference formatted in the bibitem style
249
280
  #
250
281
  # # ref is a Bio::Reference object
@@ -255,11 +286,13 @@ module Bio
255
286
  # Title of the study.,
256
287
  # {\em Theor. J. Hoge}, 12(3):123--145, 2001.
257
288
  # ---
289
+ # *Arguments*:
290
+ # * (optional) _item_: label string (default: <tt>"PMID:#{pubmed}"</tt>).
258
291
  # *Returns*:: String
259
292
  def bibitem(item = nil)
260
293
  item = "PMID:#{@pubmed}" unless item
261
294
  pages = @pages.sub('-', '--')
262
- return <<-"END".collect {|line| line.strip}.join("\n")
295
+ return <<-"END".enum_for(:each_line).collect {|line| line.strip}.join("\n")
263
296
  \\bibitem{#{item}}
264
297
  #{@authors.join(', ')}
265
298
  #{@title},
@@ -298,22 +331,48 @@ module Bio
298
331
  # ---
299
332
  # *Arguments*:
300
333
  # * (optional) _section_: BiBTeX section as String
334
+ # * (optional) _label_: Label string cited by LaTeX documents.
335
+ # Default is <tt>"PMID:#{pubmed}"</tt>.
336
+ # * (optional) _keywords_: Hash of additional keywords,
337
+ # e.g. { 'abstract' => 'This is abstract.' }.
338
+ # You can also override default keywords.
339
+ # To disable default keywords, specify false as
340
+ # value, e.g. { 'url' => false, 'year' => false }.
301
341
  # *Returns*:: String
302
- def bibtex(section = nil)
342
+ def bibtex(section = nil, label = nil, keywords = {})
303
343
  section = "article" unless section
304
344
  authors = authors_join(' and ', ' and ')
305
- pages = @pages.sub('-', '--')
306
- return <<-"END".gsub(/\t/, '')
307
- @#{section}{PMID:#{@pubmed},
308
- author = {#{authors}},
309
- title = {#{@title}},
310
- journal = {#{@journal}},
311
- year = {#{@year}},
312
- volume = {#{@volume}},
313
- number = {#{@issue}},
314
- pages = {#{pages}},
315
- }
316
- END
345
+ thepages = pages.to_s.empty? ? nil : pages.sub(/\-/, '--')
346
+ unless label then
347
+ label = "PMID:#{pubmed}"
348
+ end
349
+ theurl = if !(url.to_s.empty?) then
350
+ url
351
+ elsif pmurl = pubmed_url and !(pmurl.to_s.empty?) then
352
+ pmurl
353
+ else
354
+ nil
355
+ end
356
+ hash = {
357
+ 'author' => authors.empty? ? nil : authors,
358
+ 'title' => title.to_s.empty? ? nil : title,
359
+ 'number' => issue.to_s.empty? ? nil : issue,
360
+ 'pages' => thepages,
361
+ 'url' => theurl
362
+ }
363
+ keys = %w( author title journal year volume number pages url )
364
+ keys.each do |k|
365
+ hash[k] = self.__send__(k.intern) unless hash.has_key?(k)
366
+ end
367
+ hash.merge!(keywords) { |k, v1, v2| v2.nil? ? v1 : v2 }
368
+ bib = [ "@#{section}{#{label}," ]
369
+ keys.concat((hash.keys - keys).sort)
370
+ keys.each do |kw|
371
+ ref = hash[kw]
372
+ bib.push " #{kw.ljust(12)} = {#{ref}}," if ref
373
+ end
374
+ bib.push "}\n"
375
+ return bib.join("\n")
317
376
  end
318
377
 
319
378
  # Returns reference formatted in a general/generic style.
@@ -499,6 +558,17 @@ module Bio
499
558
  "#{authors} (#{@year}) #{@title} #{@journal} #{@volume}, #{@pages}"
500
559
  end
501
560
 
561
+ # Returns a valid URL for pubmed records
562
+ #
563
+ # *Returns*:: String
564
+ def pubmed_url
565
+ unless @pubmed.to_s.empty?
566
+ cgi = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi"
567
+ opts = "cmd=Retrieve&db=PubMed&dopt=Citation&list_uids"
568
+ return "#{cgi}?#{opts}=#{@pubmed}"
569
+ end
570
+ ''
571
+ end
502
572
 
503
573
  private
504
574
 
@@ -528,61 +598,5 @@ module Bio
528
598
 
529
599
  end
530
600
 
531
- # = DESCRIPTION
532
- #
533
- # A container class for Bio::Reference objects.
534
- #
535
- # = USAGE
536
- #
537
- # refs = Bio::References.new
538
- # refs.append(Bio::Reference.new(hash))
539
- # refs.each do |reference|
540
- # ...
541
- # end
542
- #
543
- class References
544
-
545
- # Array of Bio::Reference objects
546
- attr_accessor :references
547
-
548
- # Create a new Bio::References object
549
- #
550
- # refs = Bio::References.new
551
- # ---
552
- # *Arguments*:
553
- # * (optional) __: Array of Bio::Reference objects
554
- # *Returns*:: Bio::References object
555
- def initialize(ary = [])
556
- @references = ary
557
- end
558
-
559
-
560
- # Add a Bio::Reference object to the container.
561
- #
562
- # refs.append(reference)
563
- # ---
564
- # *Arguments*:
565
- # * (required) _reference_: Bio::Reference object
566
- # *Returns*:: current Bio::References object
567
- def append(reference)
568
- @references.push(reference) if reference.is_a? Reference
569
- return self
570
- end
571
-
572
- # Iterate through Bio::Reference objects.
573
- #
574
- # refs.each do |reference|
575
- # ...
576
- # end
577
- # ---
578
- # *Block*:: yields each Bio::Reference object
579
- def each
580
- @references.each do |reference|
581
- yield reference
582
- end
583
- end
584
-
585
- end
586
-
587
601
  end
588
602