bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -2,10 +2,11 @@
2
2
  # = bio/location.rb - Locations/Location class (GenBank location format)
3
3
  #
4
4
  # Copyright:: Copyright (C) 2001, 2005 Toshiaki Katayama <k@bioruby.org>
5
- # Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
5
+ # 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
6
+ # 2008 Naohisa Goto <ng@bioruby.org>
6
7
  # License:: The Ruby License
7
8
  #
8
- # $Id: location.rb,v 0.28 2007/04/05 23:35:39 trevor Exp $
9
+ # $Id:$
9
10
  #
10
11
 
11
12
  module Bio
@@ -69,7 +70,8 @@ class Location
69
70
  when /^[<>]?(\d+)\^[<>]?(\d+)$/ # (C, I) n^m
70
71
  s = $1.to_i
71
72
  e = $2.to_i
72
- if e - s != 1
73
+ carat = true
74
+ if e - s != 1 or e != 1 # assert n^n+1 or n^1
73
75
  # raise "Error: invalid range : #{location}"
74
76
  $stderr.puts "[Warning] invalid range : #{location}" if $DEBUG
75
77
  end
@@ -90,11 +92,38 @@ class Location
90
92
  @lt = lt # true if the position contains '<'
91
93
  @gt = gt # true if the position contains '>'
92
94
  @xref_id = xref_id # link to the external entry as GenBank ID
95
+ @carat = carat # true if the location indicates the site
96
+ # between two adjoining nucleotides
93
97
  end
94
98
 
95
- attr_accessor :from, :to, :strand, :sequence, :lt, :gt, :xref_id
99
+ # (Integer) start position of the location
100
+ attr_accessor :from
101
+ # (Integer) end position of the location
102
+ attr_accessor :to
96
103
 
97
- # Complements the sequence (i.e. alternates the strand).
104
+ # (Integer) strand direction of the location
105
+ # (forward => 1 or complement => -1)
106
+ attr_accessor :strand
107
+
108
+ # (String) literal sequence of the location
109
+ attr_accessor :sequence
110
+
111
+ # (true, false or nil) true if the position contains '<'
112
+ attr_accessor :lt
113
+
114
+ # (true, false or nil) true if the position contains '>'
115
+ attr_accessor :gt
116
+
117
+ # (String) link to the external entry as GenBank ID
118
+ attr_accessor :xref_id
119
+
120
+ # (true, false or nil) true if the location indicates the site
121
+ # between two adjoining nucleotides
122
+ attr_accessor :carat
123
+
124
+ # Complements the sequence location (i.e. alternates the strand).
125
+ # Note that it is destructive method (i.e. modifies itself),
126
+ # but it does not modify the "sequence" attribute.
98
127
  # ---
99
128
  # *Returns*:: the Bio::Location object
100
129
  def complement
@@ -293,6 +322,7 @@ class Locations
293
322
  # * (required) _str_: GenBank style position string
294
323
  # *Returns*:: Bio::Locations object
295
324
  def initialize(position)
325
+ @operator = nil
296
326
  if position.is_a? Array
297
327
  @locations = position
298
328
  else
@@ -301,9 +331,13 @@ class Locations
301
331
  end
302
332
  end
303
333
 
304
- # An Array of Bio::Location objects
334
+ # (Array) An Array of Bio::Location objects
305
335
  attr_accessor :locations
306
336
 
337
+ # (Symbol or nil) Operator.
338
+ # nil (means :join), :order, or :group (obsolete).
339
+ attr_accessor :operator
340
+
307
341
  # Evaluate equality of Bio::Locations object.
308
342
  def equals?(other)
309
343
  if ! other.kind_of?(Bio::Locations)
@@ -427,13 +461,71 @@ class Locations
427
461
  end
428
462
 
429
463
 
464
+ # String representation.
465
+ #
466
+ # Note: In some cases, it fails to detect whether
467
+ # "complement(join(...))" or "join(complement(..))", and whether
468
+ # "complement(order(...))" or "order(complement(..))".
469
+ #
470
+ # ---
471
+ # *Returns*:: String
472
+ def to_s
473
+ return '' if @locations.empty?
474
+ complement_join = false
475
+ locs = @locations
476
+ if locs.size >= 2 and locs.inject(true) do |flag, loc|
477
+ # check if each location is complement
478
+ (flag && (loc.strand == -1) && !loc.xref_id)
479
+ end and locs.inject(locs[0].from) do |pos, loc|
480
+ if pos then
481
+ (pos >= loc.from) ? loc.from : false
482
+ else
483
+ false
484
+ end
485
+ end then
486
+ locs = locs.reverse
487
+ complement_join = true
488
+ end
489
+ locs = locs.collect do |loc|
490
+ lt = loc.lt ? '<' : ''
491
+ gt = loc.gt ? '>' : ''
492
+ str = if loc.from == loc.to then
493
+ "#{lt}#{gt}#{loc.from.to_i}"
494
+ elsif loc.carat then
495
+ "#{lt}#{loc.from.to_i}^#{gt}#{loc.to.to_i}"
496
+ else
497
+ "#{lt}#{loc.from.to_i}..#{gt}#{loc.to.to_i}"
498
+ end
499
+ if loc.xref_id and !loc.xref_id.empty? then
500
+ str = "#{loc.xref_id}:#{str}"
501
+ end
502
+ if loc.strand == -1 and !complement_join then
503
+ str = "complement(#{str})"
504
+ end
505
+ if loc.sequence then
506
+ str = "replace(#{str},\"#{loc.sequence}\")"
507
+ end
508
+ str
509
+ end
510
+ if locs.size >= 2 then
511
+ op = (self.operator || 'join').to_s
512
+ result = "#{op}(#{locs.join(',')})"
513
+ else
514
+ result = locs[0]
515
+ end
516
+ if complement_join then
517
+ result = "complement(#{result})"
518
+ end
519
+ result
520
+ end
521
+
430
522
  private
431
523
 
432
524
 
433
525
  # Preprocessing to clean up the position notation.
434
526
  def gbl_cleanup(position)
435
527
  # sometimes position contains white spaces...
436
- position.gsub!(/\s+/, '')
528
+ position = position.gsub(/\s+/, '')
437
529
 
438
530
  # select one base # (D) n.m
439
531
  # .. n m :
@@ -456,8 +548,8 @@ class Locations
456
548
  end
457
549
  end
458
550
 
459
- # substitute order(), group() by join() # (F) group(), order()
460
- position.gsub!(/(order|group)/, 'join')
551
+ ## substitute order(), group() by join() # (F) group(), order()
552
+ #position.gsub!(/(order|group)/, 'join')
461
553
 
462
554
  return position
463
555
  end
@@ -469,8 +561,11 @@ class Locations
469
561
 
470
562
  case position
471
563
 
472
- when /^join\((.*)\)$/ # (F) join()
473
- position = $1
564
+ when /^(join|order|group)\((.*)\)$/ # (F) join()
565
+ if $1 != "join" then
566
+ @operator = $1.intern
567
+ end
568
+ position = $2
474
569
 
475
570
  join_list = [] # sub positions to join
476
571
  bracket = [] # position with bracket
@@ -6,7 +6,7 @@
6
6
  # Shuichi Kawashima <shuichi@hgc.jp>
7
7
  # License:: The Ruby License
8
8
  #
9
- # $Id: pathway.rb,v 1.36 2007/04/05 23:35:39 trevor Exp $
9
+ # $Id:$
10
10
  #
11
11
 
12
12
  require 'matrix'
@@ -184,6 +184,15 @@ class Pathway
184
184
  # object. If the first argument was assigned, the matrix will be
185
185
  # filled with the given value. The second argument indicates the
186
186
  # value of the diagonal constituents of the matrix besides the above.
187
+ #
188
+ # The result of this method depends on the order of Hash#each
189
+ # (and each_key, etc.), which may be variable with Ruby version
190
+ # and Ruby interpreter variations (JRuby, etc.).
191
+ # For a workaround to remove such dependency, you can use @index
192
+ # to set order of Hash keys. Note that this behavior might be
193
+ # changed in the future. Be careful that @index is overwritten by
194
+ # this method.
195
+ #
187
196
  def to_matrix(default_value = nil, diagonal_value = nil)
188
197
 
189
198
  #--
@@ -206,9 +215,31 @@ class Pathway
206
215
  end
207
216
  end
208
217
 
209
- # assign index number for each node
210
- @graph.keys.each_with_index do |k, i|
211
- @index[k] = i
218
+ # assign index number
219
+ if @index.empty? then
220
+ # assign index number for each node
221
+ @graph.keys.each_with_index do |k, i|
222
+ @index[k] = i
223
+ end
224
+ else
225
+ # begin workaround removing depencency to order of Hash#each
226
+ # assign index number from the preset @index
227
+ indices = @index.to_a
228
+ indices.sort! { |i0, i1| i0[1] <=> i1[1] }
229
+ indices.collect! { |i0| i0[0] }
230
+ @index.clear
231
+ v = 0
232
+ indices.each do |k, i|
233
+ if @graph[k] and !@index[k] then
234
+ @index[k] = v; v += 1
235
+ end
236
+ end
237
+ @graph.each_key do |k|
238
+ unless @index[k] then
239
+ @index[k] = v; v += 1
240
+ end
241
+ end
242
+ # end workaround removing depencency to order of Hash#each
212
243
  end
213
244
 
214
245
  if @relations.empty? # only used after clear_relations!
@@ -236,6 +267,10 @@ class Pathway
236
267
  # The dump_matrix method accepts the same arguments as to_matrix.
237
268
  # Useful when you want to check the internal state of the matrix
238
269
  # (for debug purpose etc.) easily.
270
+ #
271
+ # This method internally calls to_matrix method.
272
+ # Read documents of to_matrix for important informations.
273
+ #
239
274
  def dump_matrix(*arg)
240
275
  matrix = self.to_matrix(*arg)
241
276
  sorted = @index.sort {|a,b| a[1] <=> b[1]}
@@ -245,13 +280,43 @@ class Pathway
245
280
 
246
281
  # Pretty printer of the adjacency list.
247
282
  #
248
- # The dump_matrix method accepts the same arguments as to_matrix.
249
283
  # Useful when you want to check the internal state of the adjacency
250
284
  # list (for debug purpose etc.) easily.
285
+ #
286
+ # The result of this method depends on the order of Hash#each
287
+ # (and each_key, etc.), which may be variable with Ruby version
288
+ # and Ruby interpreter variations (JRuby, etc.).
289
+ # For a workaround to remove such dependency, you can use @index
290
+ # to set order of Hash keys. Note that this behavior might be
291
+ # changed in the future.
292
+ #
251
293
  def dump_list
294
+ # begin workaround removing depencency to order of Hash#each
295
+ if @index.empty? then
296
+ pref = nil
297
+ enum = @graph
298
+ else
299
+ pref = {}.merge(@index)
300
+ i = pref.values.max
301
+ @graph.each_key do |node|
302
+ pref[node] ||= (i += 1)
303
+ end
304
+ graph_to_a = @graph.to_a
305
+ graph_to_a.sort! { |x, y| pref[x[0]] <=> pref[y[0]] }
306
+ enum = graph_to_a
307
+ end
308
+ # end workaround removing depencency to order of Hash#each
309
+
252
310
  list = ""
253
- @graph.each do |from, hash|
311
+ enum.each do |from, hash|
254
312
  list << "#{from} => "
313
+ # begin workaround removing depencency to order of Hash#each
314
+ if pref then
315
+ ary = hash.to_a
316
+ ary.sort! { |x,y| pref[x[0]] <=> pref[y[0]] }
317
+ hash = ary
318
+ end
319
+ # end workaround removing depencency to order of Hash#each
255
320
  a = []
256
321
  hash.each do |to, relation|
257
322
  a.push("#{to} (#{relation})")
@@ -285,6 +350,7 @@ class Pathway
285
350
  sub_graph = Pathway.new([], @undirected)
286
351
  @graph.each do |from, hash|
287
352
  next unless @label[from]
353
+ sub_graph.graph[from] ||= {}
288
354
  hash.each do |to, relation|
289
355
  next unless @label[to]
290
356
  sub_graph.append(Relation.new(from, to, relation))
@@ -310,14 +376,23 @@ class Pathway
310
376
  #
311
377
  # Calculates the value of cliquishness around the 'node'. This value
312
378
  # indicates completeness of the edge density among the surrounded nodes.
379
+ #
380
+ # Note: cliquishness (clustering coefficient) for a directed graph
381
+ # is also calculated.
382
+ # Reference: http://en.wikipedia.org/wiki/Clustering_coefficient
383
+ #
384
+ # Note: Cliquishness (clustering coefficient) for a node that has
385
+ # only one neighbor node is undefined. Currently, it returns NaN,
386
+ # but the behavior may be changed in the future.
387
+ #
313
388
  def cliquishness(node)
314
389
  neighbors = @graph[node].keys
315
390
  sg = subgraph(neighbors)
316
391
  if sg.graph.size != 0
317
- edges = sg.edges / 2.0
318
- nodes = sg.nodes
319
- complete = (nodes * (nodes - 1)) / 2.0
320
- return edges/complete
392
+ edges = sg.edges
393
+ nodes = neighbors.size
394
+ complete = (nodes * (nodes - 1))
395
+ return edges.quo(complete)
321
396
  else
322
397
  return 0.0
323
398
  end
@@ -396,6 +471,13 @@ class Pathway
396
471
  # of the search.
397
472
  #
398
473
  # The weight of the edges are not considered in this method.
474
+ #
475
+ # Note: The result of this method depends on the order of Hash#each
476
+ # (and each_key, etc.), which may be variable with Ruby version
477
+ # and Ruby interpreter variations (JRuby, etc.).
478
+ # For a workaround to remove such dependency, you can use @index
479
+ # to set order of Hash keys. Note that this bahavior might be
480
+ # changed in the future.
399
481
  def depth_first_search
400
482
  visited = {}
401
483
  timestamp = {}
@@ -405,10 +487,28 @@ class Pathway
405
487
  forward_edges = {}
406
488
  count = 0
407
489
 
490
+ # begin workaround removing depencency to order of Hash#each
491
+ if @index.empty? then
492
+ preference_of_nodes = nil
493
+ else
494
+ preference_of_nodes = {}.merge(@index)
495
+ i = preference_of_nodes.values.max
496
+ @graph.each_key do |node0|
497
+ preference_of_nodes[node0] ||= (i += 1)
498
+ end
499
+ end
500
+ # end workaround removing depencency to order of Hash#each
501
+
408
502
  dfs_visit = Proc.new { |from|
409
503
  visited[from] = true
410
504
  timestamp[from] = [count += 1]
411
- @graph[from].each_key do |to|
505
+ ary = @graph[from].keys
506
+ # begin workaround removing depencency to order of Hash#each
507
+ if preference_of_nodes then
508
+ ary = ary.sort_by { |node0| preference_of_nodes[node0] }
509
+ end
510
+ # end workaround removing depencency to order of Hash#each
511
+ ary.each do |to|
412
512
  if visited[to]
413
513
  if timestamp[to].size > 1
414
514
  if timestamp[from].first < timestamp[to].first
@@ -435,7 +535,13 @@ class Pathway
435
535
  timestamp[from].push(count += 1)
436
536
  }
437
537
 
438
- @graph.each_key do |node|
538
+ ary = @graph.keys
539
+ # begin workaround removing depencency to order of Hash#each
540
+ if preference_of_nodes then
541
+ ary = ary.sort_by { |node0| preference_of_nodes[node0] }
542
+ end
543
+ # end workaround removing depencency to order of Hash#each
544
+ ary.each do |node|
439
545
  unless visited[node]
440
546
  dfs_visit.call(node)
441
547
  end
@@ -543,8 +649,8 @@ class Pathway
543
649
  end
544
650
  end
545
651
  end
546
- index.sort{|x, y| y<=>x}.each do |i|
547
- rel[i, 1] = []
652
+ index.sort{|x, y| y<=>x}.each do |idx|
653
+ rel[idx, 1] = []
548
654
  end
549
655
  mst = []
550
656
  seen = Hash.new()
@@ -1,14 +1,17 @@
1
1
  #
2
2
  # = bio/reference.rb - Journal reference classes
3
3
  #
4
- # Copyright:: Copyright (C) 2001, 2006
4
+ # Copyright:: Copyright (C) 2001, 2006, 2008
5
5
  # Toshiaki Katayama <k@bioruby.org>,
6
- # Ryan Raaum <ryan@raaum.org>
6
+ # Ryan Raaum <ryan@raaum.org>,
7
+ # Jan Aerts <jandot@bioruby.org>
7
8
  # License:: The Ruby License
8
9
  #
9
- # $Id: reference.rb,v 1.24 2007/04/05 23:35:39 trevor Exp $
10
+ # $Id:$
10
11
  #
11
12
 
13
+ require 'enumerator'
14
+
12
15
  module Bio
13
16
 
14
17
  # = DESCRIPTION
@@ -66,6 +69,9 @@ module Bio
66
69
 
67
70
  # medline identifier (typically Fixnum)
68
71
  attr_reader :medline
72
+
73
+ # DOI identifier (typically String, e.g. "10.1126/science.1110418")
74
+ attr_reader :doi
69
75
 
70
76
  # Abstract text in String.
71
77
  attr_reader :abstract
@@ -78,6 +84,15 @@ module Bio
78
84
 
79
85
  # Affiliations in an Array.
80
86
  attr_reader :affiliations
87
+
88
+ # Sequence number in EMBL/GenBank records
89
+ attr_reader :embl_gb_record_number
90
+
91
+ # Position in a sequence that this reference refers to
92
+ attr_reader :sequence_position
93
+
94
+ # Comments for the reference (typically Array of String, or nil)
95
+ attr_reader :comments
81
96
 
82
97
  # Create a new Bio::Reference object from a Hash of values.
83
98
  # Data is extracted from the values for keys:
@@ -116,23 +131,23 @@ module Bio
116
131
  # * (required) _hash_: Hash
117
132
  # *Returns*:: Bio::Reference object
118
133
  def initialize(hash)
119
- hash.default = ''
120
- @authors = hash['authors'] # [ "Hoge, J.P.", "Fuga, F.B." ]
121
- @title = hash['title'] # "Title of the study."
122
- @journal = hash['journal'] # "Theor. J. Hoge"
123
- @volume = hash['volume'] # 12
124
- @issue = hash['issue'] # 3
125
- @pages = hash['pages'] # 123-145
126
- @year = hash['year'] # 2001
127
- @pubmed = hash['pubmed'] # 12345678
128
- @medline = hash['medline'] # 98765432
129
- @abstract = hash['abstract']
134
+ @authors = hash['authors'] || [] # [ "Hoge, J.P.", "Fuga, F.B." ]
135
+ @title = hash['title'] || '' # "Title of the study."
136
+ @journal = hash['journal'] || '' # "Theor. J. Hoge"
137
+ @volume = hash['volume'] || '' # 12
138
+ @issue = hash['issue'] || '' # 3
139
+ @pages = hash['pages'] || '' # 123-145
140
+ @year = hash['year'] || '' # 2001
141
+ @pubmed = hash['pubmed'] || '' # 12345678
142
+ @medline = hash['medline'] || '' # 98765432
143
+ @doi = hash['doi']
144
+ @abstract = hash['abstract'] || ''
130
145
  @url = hash['url']
131
- @mesh = hash['mesh']
132
- @affiliations = hash['affiliations']
133
- @authors = [] if @authors.empty?
134
- @mesh = [] if @mesh.empty?
135
- @affiliations = [] if @affiliations.empty?
146
+ @mesh = hash['mesh'] || []
147
+ @embl_gb_record_number = hash['embl_gb_record_number'] || nil
148
+ @sequence_position = hash['sequence_position'] || nil
149
+ @comments = hash['comments']
150
+ @affiliations = hash['affiliations'] || []
136
151
  end
137
152
 
138
153
  # Formats the reference in a given style.
@@ -166,20 +181,22 @@ module Bio
166
181
  # ---
167
182
  # *Arguments*:
168
183
  # * (optional) _style_: String with style identifier
169
- # * (optional) _option_: Option for styles accepting one
184
+ # * (optional) _options_: Options for styles accepting one
170
185
  # *Returns*:: String
171
- def format(style = nil, option = nil)
186
+ def format(style = nil, *options)
172
187
  case style
188
+ when 'embl'
189
+ return embl
173
190
  when 'endnote'
174
191
  return endnote
175
192
  when 'bibitem'
176
- return bibitem(option)
193
+ return bibitem(*options)
177
194
  when 'bibtex'
178
- return bibtex(option)
195
+ return bibtex(*options)
179
196
  when 'rd'
180
- return rd(option)
197
+ return rd(*options)
181
198
  when /^nature$/i
182
- return nature(option)
199
+ return nature(*options)
183
200
  when /^science$/i
184
201
  return science
185
202
  when /^genome\s*_*biol/i
@@ -231,12 +248,8 @@ module Bio
231
248
  lines << "%N #{@issue}" unless @issue.to_s.empty?
232
249
  lines << "%P #{@pages}" unless @pages.empty?
233
250
  lines << "%M #{@pubmed}" unless @pubmed.to_s.empty?
234
- if @pubmed
235
- cgi = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi"
236
- opts = "cmd=Retrieve&db=PubMed&dopt=Citation&list_uids"
237
- @url = "#{cgi}?#{opts}=#{@pubmed}"
238
- end
239
- lines << "%U #{@url}" unless @url.empty?
251
+ u = @url.empty? ? pubmed_url : @url
252
+ lines << "%U #{u}" unless u.empty?
240
253
  lines << "%X #{@abstract}" unless @abstract.empty?
241
254
  @mesh.each do |term|
242
255
  lines << "%K #{term}"
@@ -245,6 +258,24 @@ module Bio
245
258
  return lines.join("\n")
246
259
  end
247
260
 
261
+ # Returns reference formatted in the EMBL style.
262
+ #
263
+ # # ref is a Bio::Reference object
264
+ # puts ref.embl
265
+ #
266
+ # RP 1-1859
267
+ # RX PUBMED; 1907511.
268
+ # RA Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.;
269
+ # RT "Nucleotide and derived amino acid sequence of the cyanogenic
270
+ # RT beta-glucosidase (linamarase) from white clover (Trifolium repens L.)";
271
+ # RL Plant Mol. Biol. 17(2):209-219(1991).
272
+ def embl
273
+ r = self
274
+ Bio::Sequence::Format::NucFormatter::Embl.new('').instance_eval {
275
+ reference_format_embl(r)
276
+ }
277
+ end
278
+
248
279
  # Returns reference formatted in the bibitem style
249
280
  #
250
281
  # # ref is a Bio::Reference object
@@ -255,11 +286,13 @@ module Bio
255
286
  # Title of the study.,
256
287
  # {\em Theor. J. Hoge}, 12(3):123--145, 2001.
257
288
  # ---
289
+ # *Arguments*:
290
+ # * (optional) _item_: label string (default: <tt>"PMID:#{pubmed}"</tt>).
258
291
  # *Returns*:: String
259
292
  def bibitem(item = nil)
260
293
  item = "PMID:#{@pubmed}" unless item
261
294
  pages = @pages.sub('-', '--')
262
- return <<-"END".collect {|line| line.strip}.join("\n")
295
+ return <<-"END".enum_for(:each_line).collect {|line| line.strip}.join("\n")
263
296
  \\bibitem{#{item}}
264
297
  #{@authors.join(', ')}
265
298
  #{@title},
@@ -298,22 +331,48 @@ module Bio
298
331
  # ---
299
332
  # *Arguments*:
300
333
  # * (optional) _section_: BiBTeX section as String
334
+ # * (optional) _label_: Label string cited by LaTeX documents.
335
+ # Default is <tt>"PMID:#{pubmed}"</tt>.
336
+ # * (optional) _keywords_: Hash of additional keywords,
337
+ # e.g. { 'abstract' => 'This is abstract.' }.
338
+ # You can also override default keywords.
339
+ # To disable default keywords, specify false as
340
+ # value, e.g. { 'url' => false, 'year' => false }.
301
341
  # *Returns*:: String
302
- def bibtex(section = nil)
342
+ def bibtex(section = nil, label = nil, keywords = {})
303
343
  section = "article" unless section
304
344
  authors = authors_join(' and ', ' and ')
305
- pages = @pages.sub('-', '--')
306
- return <<-"END".gsub(/\t/, '')
307
- @#{section}{PMID:#{@pubmed},
308
- author = {#{authors}},
309
- title = {#{@title}},
310
- journal = {#{@journal}},
311
- year = {#{@year}},
312
- volume = {#{@volume}},
313
- number = {#{@issue}},
314
- pages = {#{pages}},
315
- }
316
- END
345
+ thepages = pages.to_s.empty? ? nil : pages.sub(/\-/, '--')
346
+ unless label then
347
+ label = "PMID:#{pubmed}"
348
+ end
349
+ theurl = if !(url.to_s.empty?) then
350
+ url
351
+ elsif pmurl = pubmed_url and !(pmurl.to_s.empty?) then
352
+ pmurl
353
+ else
354
+ nil
355
+ end
356
+ hash = {
357
+ 'author' => authors.empty? ? nil : authors,
358
+ 'title' => title.to_s.empty? ? nil : title,
359
+ 'number' => issue.to_s.empty? ? nil : issue,
360
+ 'pages' => thepages,
361
+ 'url' => theurl
362
+ }
363
+ keys = %w( author title journal year volume number pages url )
364
+ keys.each do |k|
365
+ hash[k] = self.__send__(k.intern) unless hash.has_key?(k)
366
+ end
367
+ hash.merge!(keywords) { |k, v1, v2| v2.nil? ? v1 : v2 }
368
+ bib = [ "@#{section}{#{label}," ]
369
+ keys.concat((hash.keys - keys).sort)
370
+ keys.each do |kw|
371
+ ref = hash[kw]
372
+ bib.push " #{kw.ljust(12)} = {#{ref}}," if ref
373
+ end
374
+ bib.push "}\n"
375
+ return bib.join("\n")
317
376
  end
318
377
 
319
378
  # Returns reference formatted in a general/generic style.
@@ -499,6 +558,17 @@ module Bio
499
558
  "#{authors} (#{@year}) #{@title} #{@journal} #{@volume}, #{@pages}"
500
559
  end
501
560
 
561
+ # Returns a valid URL for pubmed records
562
+ #
563
+ # *Returns*:: String
564
+ def pubmed_url
565
+ unless @pubmed.to_s.empty?
566
+ cgi = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi"
567
+ opts = "cmd=Retrieve&db=PubMed&dopt=Citation&list_uids"
568
+ return "#{cgi}?#{opts}=#{@pubmed}"
569
+ end
570
+ ''
571
+ end
502
572
 
503
573
  private
504
574
 
@@ -528,61 +598,5 @@ module Bio
528
598
 
529
599
  end
530
600
 
531
- # = DESCRIPTION
532
- #
533
- # A container class for Bio::Reference objects.
534
- #
535
- # = USAGE
536
- #
537
- # refs = Bio::References.new
538
- # refs.append(Bio::Reference.new(hash))
539
- # refs.each do |reference|
540
- # ...
541
- # end
542
- #
543
- class References
544
-
545
- # Array of Bio::Reference objects
546
- attr_accessor :references
547
-
548
- # Create a new Bio::References object
549
- #
550
- # refs = Bio::References.new
551
- # ---
552
- # *Arguments*:
553
- # * (optional) __: Array of Bio::Reference objects
554
- # *Returns*:: Bio::References object
555
- def initialize(ary = [])
556
- @references = ary
557
- end
558
-
559
-
560
- # Add a Bio::Reference object to the container.
561
- #
562
- # refs.append(reference)
563
- # ---
564
- # *Arguments*:
565
- # * (required) _reference_: Bio::Reference object
566
- # *Returns*:: current Bio::References object
567
- def append(reference)
568
- @references.push(reference) if reference.is_a? Reference
569
- return self
570
- end
571
-
572
- # Iterate through Bio::Reference objects.
573
- #
574
- # refs.each do |reference|
575
- # ...
576
- # end
577
- # ---
578
- # *Block*:: yields each Bio::Reference object
579
- def each
580
- @references.each do |reference|
581
- yield reference
582
- end
583
- end
584
-
585
- end
586
-
587
601
  end
588
602