bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -0,0 +1,127 @@
1
+ #
2
+ # test/unit/bio/db/test_medline.rb - Unit test for Bio::MEDLINE
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Collaborative Drug Discovery, Inc. <github@collaborativedrug.com>
5
+ # License:: The Ruby License
6
+ #
7
+
8
+ require 'pathname'
9
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
10
+ $:.unshift(libpath) unless $:.include?(libpath)
11
+
12
+ require 'test/unit'
13
+ require 'bio/db/medline'
14
+
15
+ module Bio
16
+ class TestMEDLINE < Test::Unit::TestCase
17
+ def test_authors
18
+ assert_equal(["Kane, D. W.",
19
+ "Hohman, M. M.",
20
+ "Cerami, E. G.",
21
+ "McCormick, M. W.",
22
+ "Kuhlmann, K. F.",
23
+ "Byrd, J. A."], Bio::MEDLINE.new(AGILE).authors)
24
+ end
25
+
26
+ def test_authors_with_suffix
27
+ assert_equal(["Jenkins, F. A. Jr"], Bio::MEDLINE.new("AU - Jenkins FA Jr").authors)
28
+ end
29
+
30
+ def test_authors_with_last_name_all_caps
31
+ assert_equal(["GARTLER, S. M."], Bio::MEDLINE.new("AU - GARTLER SM").authors)
32
+ end
33
+
34
+ AGILE = <<-EOMED
35
+ PMID- 16734914
36
+ OWN - NLM
37
+ STAT- MEDLINE
38
+ DA - 20060811
39
+ DCOM- 20060928
40
+ LR - 20081120
41
+ IS - 1471-2105 (Electronic)
42
+ VI - 7
43
+ DP - 2006
44
+ TI - Agile methods in biomedical software development: a multi-site experience
45
+ report.
46
+ PG - 273
47
+ AB - BACKGROUND: Agile is an iterative approach to software development that
48
+ relies on strong collaboration and automation to keep pace with dynamic
49
+ environments. We have successfully used agile development approaches to
50
+ create and maintain biomedical software, including software for
51
+ bioinformatics. This paper reports on a qualitative study of our
52
+ experiences using these methods. RESULTS: We have found that agile methods
53
+ are well suited to the exploratory and iterative nature of scientific
54
+ inquiry. They provide a robust framework for reproducing scientific
55
+ results and for developing clinical support systems. The agile development
56
+ approach also provides a model for collaboration between software
57
+ engineers and researchers. We present our experience using agile
58
+ methodologies in projects at six different biomedical software development
59
+ organizations. The organizations include academic, commercial and
60
+ government development teams, and included both bioinformatics and
61
+ clinical support applications. We found that agile practices were a match
62
+ for the needs of our biomedical projects and contributed to the success of
63
+ our organizations. CONCLUSION: We found that the agile development
64
+ approach was a good fit for our organizations, and that these practices
65
+ should be applicable and valuable to other biomedical software development
66
+ efforts. Although we found differences in how agile methods were used, we
67
+ were also able to identify a set of core practices that were common to all
68
+ of the groups, and that could be a focus for others seeking to adopt these
69
+ methods.
70
+ AD - SRA International, 4300 Fair Lakes Court, Fairfax, VA 22033, USA.
71
+ david_kane@sra.com
72
+ FAU - Kane, David W
73
+ AU - Kane DW
74
+ FAU - Hohman, Moses M
75
+ AU - Hohman MM
76
+ FAU - Cerami, Ethan G
77
+ AU - Cerami EG
78
+ FAU - McCormick, Michael W
79
+ AU - McCormick MW
80
+ FAU - Kuhlmann, Karl F
81
+ AU - Kuhlmann KF
82
+ FAU - Byrd, Jeff A
83
+ AU - Byrd JA
84
+ LA - eng
85
+ GR - U01 MH061915-03/MH/NIMH NIH HHS/United States
86
+ GR - U01 MH061915-04/MH/NIMH NIH HHS/United States
87
+ GR - U01 MH61915/MH/NIMH NIH HHS/United States
88
+ PT - Journal Article
89
+ PT - Research Support, N.I.H., Extramural
90
+ PT - Research Support, Non-U.S. Gov't
91
+ DEP - 20060530
92
+ PL - England
93
+ TA - BMC Bioinformatics
94
+ JT - BMC bioinformatics
95
+ JID - 100965194
96
+ SB - IM
97
+ MH - Algorithms
98
+ MH - Automation
99
+ MH - Computational Biology/*methods
100
+ MH - Computers
101
+ MH - Database Management Systems
102
+ MH - Databases, Genetic
103
+ MH - Diffusion of Innovation
104
+ MH - Hospital Information Systems
105
+ MH - Hospitals
106
+ MH - Humans
107
+ MH - Medical Informatics
108
+ MH - Multicenter Studies as Topic
109
+ MH - Programming Languages
110
+ MH - Software
111
+ MH - *Software Design
112
+ MH - Systems Integration
113
+ PMC - PMC1539031
114
+ OID - NLM: PMC1539031
115
+ EDAT- 2006/06/01 09:00
116
+ MHDA- 2006/09/29 09:00
117
+ CRDT- 2006/06/01 09:00
118
+ PHST- 2005/11/17 [received]
119
+ PHST- 2006/05/30 [accepted]
120
+ PHST- 2006/05/30 [aheadofprint]
121
+ AID - 1471-2105-7-273 [pii]
122
+ AID - 10.1186/1471-2105-7-273 [doi]
123
+ PST - epublish
124
+ SO - BMC Bioinformatics. 2006 May 30;7:273.
125
+ EOMED
126
+ end
127
+ end
@@ -5,13 +5,17 @@
5
5
  #
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: test_nexus.rb,v 1.2 2007/04/05 23:35:43 trevor Exp $
8
+ # $Id:$
9
9
  #
10
10
  # == Description
11
11
  #
12
12
  # This file contains unit tests for Bio::Nexus.
13
13
  #
14
14
 
15
+ require 'pathname'
16
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
17
+ $:.unshift(libpath) unless $:.include?(libpath)
18
+
15
19
  require 'test/unit'
16
20
  require 'bio/db/nexus'
17
21
 
@@ -1233,7 +1233,7 @@ module Bio
1233
1233
  "P11616",
1234
1234
  "P23275",
1235
1235
  "Q01338"]
1236
- assert_equal(data, @obj.list_truepositive)
1236
+ assert_equal(data.sort, @obj.list_truepositive.sort)
1237
1237
  end
1238
1238
 
1239
1239
  def test_list_falsenegative
@@ -1301,7 +1301,7 @@ module Bio
1301
1301
  "P77933",
1302
1302
  "P34529",
1303
1303
  "Q00126"]
1304
- assert_equal(data, @obj.list_falsenegative)
1304
+ assert_equal(data.sort, @obj.list_falsenegative.sort)
1305
1305
  end
1306
1306
 
1307
1307
  def test_list_falsepositive
@@ -1353,7 +1353,7 @@ module Bio
1353
1353
  "Q95137",
1354
1354
  "Q62953",
1355
1355
  "Q95195"]
1356
- assert_equal(data, @obj.list_falsepositive)
1356
+ assert_equal(data.sort, @obj.list_falsepositive.sort)
1357
1357
  end
1358
1358
 
1359
1359
  def test_list_potentialhit
@@ -1405,7 +1405,7 @@ module Bio
1405
1405
  "Q95137",
1406
1406
  "Q62953",
1407
1407
  "Q95195"]
1408
- assert_equal(data, @obj.list_potentialhit)
1408
+ assert_equal(data.sort, @obj.list_potentialhit.sort)
1409
1409
  end
1410
1410
 
1411
1411
  def test_list_unknown
@@ -0,0 +1,375 @@
1
+ #
2
+ # = test/unit/bio/io/flatfile/test_autodetection.rb - unit test for Bio::FlatFile::AutoDetect
3
+ #
4
+ # Copyright (C) 2006 Naohisa Goto <ng@bioruby.org>
5
+ #
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id: test_flatfile.rb,v 1.2 2007/04/05 23:35:43 trevor Exp $
9
+ #
10
+
11
+ require 'pathname'
12
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
13
+ $:.unshift(libpath) unless $:.include?(libpath)
14
+
15
+ require 'test/unit'
16
+ require 'bio'
17
+
18
+ module Bio::TestFlatFile
19
+
20
+ # testing default AutoDetect's behavior
21
+ class TestDefaultAutoDetect < Test::Unit::TestCase
22
+
23
+ bioruby_root = Pathname.new(File.join(File.dirname(__FILE__),
24
+ ['..'] * 5)).cleanpath.to_s
25
+ TestDataPath = Pathname.new(File.join(bioruby_root,
26
+ 'test', 'data')).cleanpath.to_s
27
+
28
+ def setup
29
+ @ad = Bio::FlatFile::AutoDetect.default
30
+ end
31
+
32
+ def test_genbank
33
+ # modified from GenBank AB009803.1
34
+ # (E-mail and telephone/FAX numbers are removed from original entry)
35
+ text = <<__END_OF_TEXT__
36
+ LOCUS AB009803 81 bp DNA linear PRI 14-APR-2000
37
+ DEFINITION Homo sapiens gene for osteonidogen, intron 4.
38
+ ACCESSION AB009803
39
+ VERSION AB009803.1 GI:2749808
40
+ KEYWORDS osteonidogen.
41
+ SOURCE Homo sapiens (human)
42
+ ORGANISM Homo sapiens
43
+ Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
44
+ Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
45
+ Catarrhini; Hominidae; Homo.
46
+ REFERENCE 1
47
+ AUTHORS Ohno,I., Okubo,K. and Matsubara,K.
48
+ TITLE Human osteonidogen gene: intron-exon junctions and chromosomal
49
+ localization
50
+ JOURNAL Published Only in Database (1998)
51
+ REFERENCE 2 (bases 1 to 81)
52
+ AUTHORS Ohno,I., Okubo,K. and Matsubara,K.
53
+ TITLE Direct Submission
54
+ JOURNAL Submitted (13-DEC-1997) Ikko Ohno, Institute for Molecular and
55
+ Cellular Biology, Osaka University, Molecular Genetics; 1-3
56
+ Yamada-oka, Suita, Osaka 565, Japan
57
+ FEATURES Location/Qualifiers
58
+ source 1..81
59
+ /organism="Homo sapiens"
60
+ /mol_type="genomic DNA"
61
+ /db_xref="taxon:9606"
62
+ /chromosome="14"
63
+ /map="14q21-22"
64
+ /clone_lib="Lambda FIX II STRATAGENE"
65
+ intron 1..81
66
+ /number=4
67
+ ORIGIN
68
+ 1 gtaggatctc ccctccagat tctgatctgt cctccccctt gcatccaaca cctacttatt
69
+ 61 ggccattcta tcctgaaaca g
70
+ //
71
+ __END_OF_TEXT__
72
+ assert_equal(Bio::GenBank, @ad.autodetect(text))
73
+ end
74
+
75
+ def test_genpept
76
+ # modified from: NCBI: P04637.2 GI:129369
77
+ # (to shorten data, many elements are omitted)
78
+ text = <<__END_OF_TEXT__
79
+ LOCUS P04637 393 aa linear PRI 01-JUL-2008
80
+ DEFINITION Cellular tumor antigen p53 (Tumor suppressor p53) (Phosphoprotein
81
+ p53) (Antigen NY-CO-13).
82
+ ACCESSION P04637
83
+ VERSION P04637.2 GI:129369
84
+ KEYWORDS 3D-structure; Acetylation; Activator; Alternative splicing;
85
+ Anti-oncogene; Apoptosis; Cell cycle; Covalent protein-RNA linkage;
86
+ Cytoplasm; Disease mutation; DNA-binding; Endoplasmic reticulum;
87
+ Glycoprotein; Host-virus interaction; Li-Fraumeni syndrome;
88
+ Metal-binding; Methylation; Nucleus; Phosphoprotein; Polymorphism;
89
+ Transcription; Transcription regulation; Ubl conjugation; Zinc.
90
+ SOURCE Homo sapiens (human)
91
+ ORGANISM Homo sapiens
92
+ Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
93
+ Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
94
+ Catarrhini; Hominidae; Homo.
95
+ REFERENCE 1 (residues 1 to 393)
96
+ AUTHORS Zakut-Houri,R., Bienz-Tadmor,B., Givol,D. and Oren,M.
97
+ TITLE Human p53 cellular tumor antigen: cDNA sequence and expression in
98
+ COS cells
99
+ JOURNAL EMBO J. 4 (5), 1251-1255 (1985)
100
+ PUBMED 4006916
101
+ REMARK NUCLEOTIDE SEQUENCE [MRNA].
102
+ FEATURES Location/Qualifiers
103
+ source 1..393
104
+ /organism="Homo sapiens"
105
+ /db_xref="taxon:9606"
106
+ gene 1..393
107
+ /gene="TP53"
108
+ /note="synonym: P53"
109
+ Protein 1..393
110
+ /gene="TP53"
111
+ /product="Cellular tumor antigen p53"
112
+ ORIGIN
113
+ 1 meepqsdpsv epplsqetfs dlwkllpenn vlsplpsqam ddlmlspddi eqwftedpgp
114
+ 61 deaprmpeaa ppvapapaap tpaapapaps wplsssvpsq ktyqgsygfr lgflhsgtak
115
+ 121 svtctyspal nkmfcqlakt cpvqlwvdst pppgtrvram aiykqsqhmt evvrrcphhe
116
+ 181 rcsdsdglap pqhlirvegn lrveylddrn tfrhsvvvpy eppevgsdct tihynymcns
117
+ 241 scmggmnrrp iltiitleds sgnllgrnsf evrvcacpgr drrteeenlr kkgephhelp
118
+ 301 pgstkralpn ntssspqpkk kpldgeyftl qirgrerfem frelnealel kdaqagkepg
119
+ 361 gsrahsshlk skkgqstsrh kklmfktegp dsd
120
+ //
121
+ __END_OF_TEXT__
122
+
123
+ assert_equal(Bio::GenPept, @ad.autodetect(text))
124
+ end
125
+
126
+ def test_medline
127
+ # PMID: 13054692
128
+ text = <<__END_OF_TEXT__
129
+ PMID- 13054692
130
+ OWN - NLM
131
+ STAT- MEDLINE
132
+ DA - 19531201
133
+ DCOM- 20030501
134
+ LR - 20061115
135
+ PUBM- Print
136
+ IS - 0028-0836 (Print)
137
+ VI - 171
138
+ IP - 4356
139
+ DP - 1953 Apr 25
140
+ TI - Molecular structure of nucleic acids; a structure for deoxyribose nucleic acid.
141
+ PG - 737-8
142
+ FAU - WATSON, J D
143
+ AU - WATSON JD
144
+ FAU - CRICK, F H
145
+ AU - CRICK FH
146
+ LA - eng
147
+ PT - Journal Article
148
+ PL - Not Available
149
+ TA - Nature
150
+ JT - Nature
151
+ JID - 0410462
152
+ RN - 0 (Nucleic Acids)
153
+ SB - OM
154
+ MH - *Nucleic Acids
155
+ OID - CLML: 5324:25254:447
156
+ OTO - NLM
157
+ OT - *NUCLEIC ACIDS
158
+ EDAT- 1953/04/25
159
+ MHDA- 1953/04/25 00:01
160
+ PST - ppublish
161
+ SO - Nature. 1953 Apr 25;171(4356):737-8.
162
+ __END_OF_TEXT__
163
+
164
+ assert_equal(Bio::MEDLINE, @ad.autodetect(text))
165
+ end
166
+
167
+ def test_embl_oldrelease
168
+ fn = File.join(TestDataPath, 'embl', 'AB090716.embl')
169
+ text = File.read(fn)
170
+ assert_equal(Bio::EMBL, @ad.autodetect(text))
171
+ end
172
+
173
+ def test_embl
174
+ fn = File.join(TestDataPath, 'embl', 'AB090716.embl.rel89')
175
+ text = File.read(fn)
176
+ assert_equal(Bio::EMBL, @ad.autodetect(text))
177
+ end
178
+
179
+ def test_sptr
180
+ fn = File.join(TestDataPath, 'uniprot', 'p53_human.uniprot')
181
+ text = File.read(fn)
182
+ assert_equal(Bio::SPTR, @ad.autodetect(text))
183
+ end
184
+
185
+ def test_prosite
186
+ fn = File.join(TestDataPath, 'prosite', 'prosite.dat')
187
+ text = File.read(fn)
188
+ assert_equal(Bio::PROSITE, @ad.autodetect(text))
189
+ end
190
+
191
+ def test_transfac
192
+ # Dummy data; Generated from random data
193
+ text = <<__END_OF_TEXT__
194
+ AC M99999
195
+ XX
196
+ ID V$XXXX_99
197
+ XX
198
+ DT 13.01.98 (created); ewi.
199
+ DT 31.12.99 (updated); ewi.
200
+ XX
201
+ NA XXXX
202
+ XX
203
+ DE example gene protein
204
+ XX
205
+ BF T99998; XXXX; Species: human, Homo sapiens.
206
+ BF T99999; XXXX; Species: mouse, Mus musculus.
207
+ XX
208
+ P0 A C G T
209
+ 01 1 2 2 2 N
210
+ 02 0 2 2 3 N
211
+ 03 1 1 5 0 G
212
+ 04 3 1 1 2 N
213
+ 05 7 0 0 0 A
214
+ 06 2 0 1 4 W
215
+ 07 0 1 6 0 G
216
+ 08 0 3 0 4 Y
217
+ 09 6 1 0 0 A
218
+ 10 1 1 0 5 T
219
+ XX
220
+ BA 7 functional elements in 3 genes
221
+ XX
222
+ CC compiled sequences
223
+ XX
224
+ RN [1]
225
+ RA Anonymou S., Whoam I.
226
+ RT Example article title for XXXX
227
+ RL J. Example. 99:990-999 (1999).
228
+ __END_OF_TEXT__
229
+
230
+ assert_equal(Bio::TRANSFAC, @ad.autodetect(text))
231
+ end
232
+
233
+ def test_aaindex1
234
+ fn = File.join(TestDataPath, 'aaindex', 'PRAM900102')
235
+ text = File.read(fn)
236
+ assert_equal(Bio::AAindex1, @ad.autodetect(text))
237
+ end
238
+
239
+ def test_aaindex2
240
+ fn = File.join(TestDataPath, 'aaindex', 'DAYM780301')
241
+ text = File.read(fn)
242
+ assert_equal(Bio::AAindex2, @ad.autodetect(text))
243
+ end
244
+
245
+ # def test_litdb
246
+ # end
247
+
248
+ # def test_brite
249
+ # end
250
+
251
+ # def test_orthology
252
+ # end
253
+
254
+ # def test_drug
255
+ # end
256
+
257
+ # def test_glycan
258
+ # end
259
+
260
+ # def test_enzyme
261
+ # end
262
+
263
+ # def test_compound
264
+ # end
265
+
266
+ # def test_reaction
267
+ # end
268
+
269
+ # def test_genes
270
+ # end
271
+
272
+ # def test_genome
273
+ # end
274
+
275
+ def test_maxml_cluster
276
+ # dummy empty data
277
+ text = <<__END_OF_TEXT__
278
+ <?xml version="1.0"?>
279
+ <!DOCTYPE maxml-clusters SYSTEM "http://fantom.gsc.riken.go.jp/maxml/maxml.dtd"><maxml-clusters>
280
+ </maxml-clusters>
281
+ __END_OF_TEXT__
282
+ assert_equal(Bio::FANTOM::MaXML::Cluster, @ad.autodetect(text))
283
+ end
284
+
285
+ def test_maxml_sequence
286
+ # dummy empty data
287
+ text = <<__END_OF_TEXT__
288
+ <?xml version="1.0"?>
289
+ <!DOCTYPE maxml-sequences SYSTEM "http://fantom.gsc.riken.go.jp/maxml/maxml.dtd">
290
+ <maxml-sequences>
291
+ </maxml-sequences>
292
+ __END_OF_TEXT__
293
+ assert_equal(Bio::FANTOM::MaXML::Sequence, @ad.autodetect(text))
294
+ end
295
+
296
+ # def test_pdb
297
+ # end
298
+
299
+ # def test_chemicalcomponent
300
+ # end
301
+
302
+ # def test_clustal
303
+ # end
304
+
305
+ # def test_gcg_msf
306
+ # end
307
+
308
+ # def test_gcg_seq
309
+ # end
310
+
311
+ def test_blastxml
312
+ fn = File.join(TestDataPath, 'blast', '2.2.15.blastp.m7')
313
+ text = File.read(fn)
314
+ assert_equal(Bio::Blast::Report, @ad.autodetect(text))
315
+ end
316
+
317
+ # def test_wublast
318
+ # end
319
+
320
+ # def test_wutblast
321
+ # end
322
+
323
+ def test_blast
324
+ fn = File.join(TestDataPath, 'blast', 'b0002.faa.m0')
325
+ text = File.read(fn)
326
+ assert_equal(Bio::Blast::Default::Report, @ad.autodetect(text))
327
+ end
328
+
329
+ # def test_tblast
330
+ # end
331
+
332
+ # def test_blat
333
+ # end
334
+
335
+ # def test_spidey
336
+ # end
337
+
338
+ def test_hmmer
339
+ fn = File.join(TestDataPath, 'HMMER', 'hmmpfam.out')
340
+ text = File.read(fn)
341
+ assert_equal(Bio::HMMER::Report, @ad.autodetect(text))
342
+
343
+ fn = File.join(TestDataPath, 'HMMER', 'hmmsearch.out')
344
+ text = File.read(fn)
345
+ assert_equal(Bio::HMMER::Report, @ad.autodetect(text))
346
+ end
347
+
348
+ # def test_sim4
349
+ # end
350
+
351
+ def test_fastaformat
352
+ fn = File.join(TestDataPath, 'fasta', 'example1.txt')
353
+ text = File.read(fn)
354
+ assert_equal(Bio::FastaFormat, @ad.autodetect(text))
355
+
356
+ fn = File.join(TestDataPath, 'fasta', 'example2.txt')
357
+ text = File.read(fn)
358
+ assert_equal(Bio::FastaFormat, @ad.autodetect(text))
359
+ end
360
+
361
+ def test_fastanumericformat
362
+ text = <<__END_OF_TEXT__
363
+ >sample
364
+ 30 21 16 11 8 6 3 34 28 34 28 28 35 28 28 37 33 15 27 28 28
365
+ 27 37 33 17 27 27 28 28 33 26 33 26 28 27 37 33 15 27 26 27
366
+ 28 37 33 16 34 26 27 33 26 28 33 25 28 28 38 34 23 13 2
367
+ __END_OF_TEXT__
368
+
369
+ assert_equal(Bio::FastaNumericFormat, @ad.autodetect(text))
370
+ end
371
+
372
+ end #class TestDefaultAutoDetect
373
+
374
+ end #module Bio::TestFlatFile
375
+