bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -0,0 +1,88 @@
1
+ = KNOWN_ISSUES.rdoc - Known issues and bugs in BioRuby
2
+ Copyright:: Copyright (C) 2009 Naohisa Goto <ng@bioruby.org>
3
+ License:: The Ruby License
4
+
5
+ = Known issues and bugs in BioRuby
6
+
7
+ Below are known issues and bugs in BioRuby. They will be fixed in the future,
8
+ except items with (WONT_FIX) tags.
9
+
10
+ == 1. Ruby version specific issues
11
+
12
+ === Ruby 1.9.1 or later
13
+
14
+ Some classes/modules/methods still may not work or may return incorrect
15
+ results in Ruby 1.9.1, especially those not covered by the unit tests.
16
+
17
+ ==== String encodings
18
+
19
+ Currently, BioRuby do not care string encodings. In some cases,
20
+ Encoding::CompatibilityError may be raised.
21
+
22
+ === Ruby 1.9.0
23
+
24
+ (WONT_FIX) Ruby 1.9.0 is NOT supported because it isn't a stable release.
25
+ Use Ruby 1.9.1 or later.
26
+
27
+ === Ruby 1.8.2 or earlier
28
+
29
+ (WONT_FIX) In some cases, temporary files and directories may not be
30
+ removed because of the lack of FileUtils.remove_entry_secure.
31
+
32
+ (WONT_FIX) We will soon end support for Ruby 1.8.2. Note that Ruby
33
+ 1.8.1 or earlier is no longer supported, as described in README.rdoc.
34
+
35
+ === Issues about SOAP/WSDL
36
+
37
+ SOAP4R (SOAP and WSDL implementation) is no longer bundled with Ruby 1.9.
38
+ In addition, because of the API changes in recent SOAP4R, some
39
+ classes/modules using SOAP4R may not work.
40
+
41
+ === Problem with REXML DoS vulnerability patch before 09-Nov-2008
42
+
43
+ (WONT_FIX) If you have applied a patch taken from
44
+ http://www.ruby-lang.org/en/news/2008/08/23/dos-vulnerability-in-rexml/
45
+ before 09 Nov 2008 12:40 +0900, because of the bug in the patch,
46
+ parsing of Blast XML results with REXML parser may fail. The bug is already
47
+ fixed and new patch is available on the above URL. Note that some Linux
48
+ distributions would have incorporated the patch in their manners, and may
49
+ have the same problem.
50
+
51
+ == 2. OS and/or architecture-dependent issues
52
+
53
+ === Microsoft Windows
54
+
55
+ ==== Text mode issues
56
+
57
+ Following 4 tests failed on mswin32 (and maybe on mingw32 and bccwin32)
58
+ because of the conversion of line feed codes in the text mode.
59
+
60
+ * test_ended_pos and test_start_pos in test/unit/bio/io/test_flatfile.rb
61
+ * test_pos in test/unit/bio/io/flatfile/test_buffer.rb
62
+ * test_entry_pos in test/unit/bio/appl/blast/test_rpsblast.rb
63
+
64
+ This indicates that br_bioflat.rb and Bio::FlatFileIndex may create
65
+ incorrect indexes on mswin32, mingw32, and bccwin32. In addition,
66
+ Bio::FlatFile may return incorrect data.
67
+
68
+ ==== Windows 95/98/98SE/ME
69
+
70
+ (WONT_FIX) Some methods that call external programs may not work in
71
+ Windows 95/98/98SE/ME because of the limitation of COMMAND.COM.
72
+
73
+ === OpenVMS, BeOS, OS/2, djgpp, Windows CE
74
+
75
+ (WONT_FIX) BioRuby may not work on these platforms.
76
+
77
+ == 3. Known issues and bugs in BioRuby
78
+
79
+ === Bio::Ensembl
80
+
81
+ Due to the renewal of Ensembl web site, Bio::Ensembl does not work for
82
+ the latest Ensembl. For a workaround, use an archive server. For example,
83
+ "jul2008.archive.ensembl.org" seems to be the last server before the renewal.
84
+ human = Bio::Ensembl.new("Homo_sapiens", "jul2008.archive.ensembl.org")
85
+
86
+ Alternatively, consider using Ruby Ensembl API.
87
+ * http://github.com/jandot/ruby-ensembl-api
88
+
@@ -0,0 +1,252 @@
1
+ --
2
+ = README.rdoc - README for BioRuby
3
+ Copyright:: Copyright (C) 2001-2007 Toshiaki Katayama <k@bioruby.org>,
4
+ Copyright (C) 2008 Jan Aerts <jandot@bioruby.org>
5
+ License:: The Ruby License
6
+ * The above statement is limited to this file. See below about BioRuby's
7
+ copyright and license.
8
+ ++
9
+
10
+ = BioRuby
11
+
12
+ Copyright (C) 2001-2009 Toshiaki Katayama <k@bioruby.org>
13
+
14
+ BioRuby is an open source Ruby library for developing bioinformatics
15
+ software. Object oriented scripting language Ruby has many features
16
+ suitable for bioinformatics research, for example, clear syntax to
17
+ express complex objects, regular expressions for text handling as
18
+ powerful as Perl's, a wide variety of libraries including web service
19
+ etc. As the syntax of the Ruby language is simple and very clean, we
20
+ believe that it is easy to learn for beginners, easy to use for
21
+ biologists, and also powerful enough for the software developers.
22
+
23
+ In BioRuby, you can retrieve biological database entries from flat
24
+ files, internet web servers and local relational databases. These
25
+ database entries can be parsed to extract information you need.
26
+ Biological sequences can be treated with the fulfilling methods of the
27
+ Ruby's String class and with regular expressions. Daily tools like
28
+ Blast, Fasta, Hmmer and many other software packages for biological
29
+ analysis can be executed within the BioRuby script, and the results
30
+ can be fully parsed to extract the portion you need. BioRuby supports
31
+ major biological database formats and provides many ways for accessing
32
+ them through flatfile indexing, SQL, web services etc. Various web
33
+ services including KEGG API can be easily utilized by BioRuby.
34
+
35
+
36
+ == FOR MORE INFORMATION
37
+
38
+ === Documents in this distribution
39
+
40
+ ==== General information
41
+
42
+ README.rdoc:: This file. General information and installation procedure.
43
+ KNOWN_ISSUES.rdoc:: Known issues and bugs in BioRuby.
44
+ doc/Changes-1.3.rdoc:: News and incompatible changes from 1.2.1 to 1.3.0.
45
+ doc/Changes-0.7.rd:: News and incompatible changes from 0.6.4 to 1.2.1.
46
+
47
+ ==== Tutorials and other useful information
48
+
49
+ doc/Tutorial.rd:: BioRuby Tutorial.
50
+ doc/Tutorial.rd.html:: HTML version of Tutorial.rd.
51
+ doc/KEGG_API.rd:: Documents about KEGG API, including usage of Bio::KEGG::API.
52
+
53
+ ==== BioRuby development
54
+
55
+ ChangeLog:: History of changes.
56
+ README_DEV.rdoc:: Describes ways to contribute to the BioRuby project, including coding styles and documentation guidelines.
57
+
58
+ ==== Documents written in Japanese
59
+
60
+ doc/Tutorial.rd.ja:: BioRuby Tutorial written in Japanese.
61
+ doc/Tutorial.rd.ja.html:: HTML version of Tutorial.rd.ja.
62
+ doc/KEGG_API.rd.ja:: Japanese translation of KEGG_API.rd.
63
+
64
+
65
+ === WWW
66
+
67
+ BioRuby's official website is at http://bioruby.org/.
68
+ You will find links to related resources including downloads,
69
+ mailing lists, Wiki documentation etc. in the top page.
70
+
71
+ * http://bioruby.org/
72
+
73
+
74
+ == WHERE TO OBTAIN
75
+
76
+ === WWW
77
+
78
+ The stable release is freely available from the BioRuby website.
79
+
80
+ * http://bioruby.org/archive/
81
+
82
+ === RubyGems
83
+
84
+ RubyGems[URL:http://rubyforge.org/projects/rubygems/] version of
85
+ the BioRuby package is also available for easy installation.
86
+
87
+ * http://rubyforge.org/projects/bioruby/
88
+
89
+ === git
90
+
91
+ If you need the latest development version, this is provided at
92
+
93
+ * http://github.com/bioruby/bioruby
94
+
95
+ and can be obtained by the following procedure:
96
+
97
+ % git clone git://github.com/bioruby/bioruby.git
98
+
99
+ ==== CVS
100
+
101
+ CVS is now deprecated as source control has moved to github. Please use git
102
+ instead of CVS. For historical purposes: the anonymous CVS was provided at
103
+
104
+ * http://cvs.bioruby.org/
105
+
106
+ and could be obtained by the following procedure.
107
+
108
+ % cvs -d :pserver:cvs@code.open-bio.org:/home/repository/bioruby login
109
+ CVS password: cvs (login with a password 'cvs' for the first time)
110
+ % cvs -d :pserver:cvs@code.open-bio.org:/home/repository/bioruby co bioruby
111
+
112
+
113
+ == REQUIREMENTS
114
+
115
+ * Ruby 1.8.2 or later -- http://www.ruby-lang.org/
116
+ * Ruby 1.8.5 or later is recommended.
117
+ * Not yet fully ready with Ruby 1.9, although many components can now work
118
+ in Ruby 1.9.1.
119
+
120
+ == OPTIONAL REQUIREMENTS
121
+
122
+ Some optional libraries can be utilized to extend BioRuby's functionality.
123
+ If your needs meets the following conditions, install them from the "Ruby
124
+ Application Archive" at http://raa.ruby-lang.org/, RubyForge at
125
+ http://rubyforge.org/, or the following web sites.
126
+
127
+ For faster parsing of the BLAST XML output format:
128
+
129
+ * {RAA:xmlparser}[http://raa.ruby-lang.org/project/xmlparser/]
130
+
131
+ Creating faster flatfile index using Berkley DB:
132
+
133
+ * {RAA:bdb}[http://raa.ruby-lang.org/project/bdb/]
134
+
135
+ Accessing BioSQL database created by other Open Bio* libraries:
136
+
137
+ * {RubyForge:ActiveRecord}[http://rubyforge.org/projects/activerecord/]
138
+ and at least one driver (or adapter) from
139
+ {RubyForge:MySQL/Ruby}[http://rubyforge.org/projects/mysql-ruby/],
140
+ {RubyForge:postgres-pr}[http://rubyforge.org/projects/postgres-pr], or
141
+ {RubyForge:ActiveRecord Oracle enhanced adapter}[http://rubyforge.org/projects/oracle-enhanced/].
142
+ * For BioRuby 1.2.1 or older version,
143
+ {RubyForge:Ruby/DBI}[http://rubyforge.org/projects/ruby-dbi] and
144
+ at least one driver from MySQL/Ruby, postgres-pr, or
145
+ {RubyForge:ruby-oci8}[http://ruby-oci8.rubyforge.org/]
146
+ (note that ruby-oci8 can only work with ruby-dbi 0.2.2).
147
+
148
+
149
+ == INSTALL
150
+
151
+ In the bioruby source directory (such as bioruby-x.x.x/), run setup.rb
152
+ as follows:
153
+
154
+ % su
155
+ # ruby setup.rb
156
+
157
+ These simple step installs this program under the default location of
158
+ Ruby libraries. You can also install files into your favorite directory
159
+ by supplying setup.rb some options. Try "ruby setup.rb --help".
160
+
161
+ If your operating system supports 'sudo' command (such as Mac OS X),
162
+ try the following procedure instead of the above.
163
+
164
+ % sudo ruby setup.rb
165
+
166
+ For older version users: "install.rb" is now renamed to "setup.rb".
167
+ The options "config", "setup", and "install" are still available.
168
+
169
+ % ruby setup.rb config
170
+ % ruby setup.rb setup
171
+ % su
172
+ # ruby setup.rb install
173
+
174
+ You can run tests by
175
+
176
+ % ruby setup.rb test
177
+
178
+ and run
179
+
180
+ % ruby setup.rb --help
181
+
182
+ for more details.
183
+
184
+ === RubyGems
185
+
186
+ If you are using RubyGems, just type
187
+
188
+ % gems install bio
189
+
190
+
191
+ == SETUP
192
+
193
+ If you want to use the OBDA (Open Bio Database Access) to obtain database
194
+ entries, copy a sample configuration file in the BioRuby distribution
195
+
196
+ bioruby-x.x.x/etc/bioinformatics/seqdatabase.ini
197
+
198
+ to
199
+
200
+ /etc/bioinformatics/seqdatabase.ini (system wide configuration)
201
+
202
+ or
203
+
204
+ ~/.bioinformatics/seqdatabase.ini (personal configuration)
205
+
206
+ and change the contents according to your preference. For more
207
+ information on the OBDA, see http://obda.open-bio.org/ .
208
+
209
+
210
+ == USAGE
211
+
212
+ You can load all BioRuby classes just by requiring 'bio.rb'. All the
213
+ BioRuby classes and modules are located under the module name 'Bio' to
214
+ separate the name space.
215
+
216
+ #!/usr/bin/env ruby
217
+ require 'bio'
218
+
219
+ You can also read other documentation in the 'doc' directory.
220
+
221
+ bioruby-x.x.x/doc/
222
+
223
+ === RubyGems
224
+
225
+ With RubyGems, you need to load 'rubygems' library before using 'bio'.
226
+
227
+ #!/usr/bin/env ruby
228
+ require 'rubygems'
229
+ require 'bio'
230
+
231
+ With old version of RubyGems, use 'require_gem' which was deprecated in
232
+ RubyGems 0.9.0 and removed in RubyGems 1.0.1.
233
+
234
+ #!/usr/bin/env ruby
235
+ require 'rubygems'
236
+ require_gem 'bio'
237
+
238
+
239
+ == LICENSE
240
+
241
+ BioRuby can be freely distributed under the same terms as Ruby.
242
+
243
+ Note that setup.rb included in the BioRuby package comes from
244
+ {RAA:setup}[http://raa.ruby-lang.org/project/setup/] developed by Minero Aoki
245
+ (http://i.loveruby.net/en/projects/setup/).
246
+
247
+
248
+ == CONTACT
249
+
250
+ Current staff of the BioRuby project can be reached by sending e-mail
251
+ to <staff@bioruby.org>.
252
+
@@ -0,0 +1,285 @@
1
+ = README.DEV
2
+
3
+ Copyright:: Copyright (C) 2005, 2006 Toshiaki Katayama <k@bioruby.org>
4
+ Copyright:: Copyright (C) 2006, 2008 Jan Aerts <jandot@bioruby.org>
5
+
6
+ = HOW TO CONTRIBUTE TO THE BIORUBY PROJECT?
7
+
8
+ There are many possible ways to contribute to the BioRuby project,
9
+ such as:
10
+
11
+ * Join the discussion on the BioRuby mailing list
12
+ * Send a bug report or write a bug fix patch
13
+ * Add and correct documentation
14
+ * Develop code for new features, etc.
15
+
16
+ All of these are welcome! However, this document describes the last option,
17
+ how to contribute your code to the BioRuby distribution.
18
+
19
+ We would like to include your contribution as long as the scope of
20
+ your module meets the field of bioinformatics.
21
+
22
+ == Git
23
+
24
+ Bioruby is now under git source control at http://github.com/bioruby/bioruby.
25
+ There are two basic ways to contribute: with patches or pull requests. Both are
26
+ explained on the bioruby wiki at http://bioruby.open-bio.org/wiki.
27
+
28
+ = LICENSE
29
+
30
+ If you would like your module to be included in the BioRuby distribution,
31
+ you need to give us right to change the license of your module to make it
32
+ compatible with other modules in BioRuby.
33
+
34
+ BioRuby was previously distributed under the LGPL license, but now is
35
+ distributed under the same terms as Ruby.
36
+
37
+ = CODING STYLE
38
+
39
+ You will need to follow the typical coding styles of the BioRuby modules:
40
+
41
+ == Use the following naming conventions
42
+
43
+ * CamelCase for module and class names
44
+ * '_'-separated_lowercase for method names
45
+ * '_'-separated_lowercase for variable names
46
+ * all UPPERCASE for constants
47
+
48
+ == Indentation must not include tabs
49
+
50
+ * Use 2 spaces for indentation.
51
+ * Don't replace spaces to tabs.
52
+
53
+ == Comments
54
+
55
+ Don't use <tt>=begin</tt> and <tt>=end</tt> blocks for comments. If you need to
56
+ add comments, include it in the RDoc documentation.
57
+
58
+ == Documentation should be written in the RDoc format in the source code
59
+
60
+ The RDoc format is becoming the popular standard for Ruby documentation.
61
+ We are now in transition from the previously used RD format to the RDoc
62
+ format in API documentation.
63
+
64
+ Additional tutorial documentation and working examples are encouraged
65
+ with your contribution. You may use the header part of the file for
66
+ this purpose as demonstrated in the previous section.
67
+
68
+ == Standard documentation
69
+
70
+ === of files
71
+
72
+ Each file should start with a header, which covers the following topics:
73
+ * copyright
74
+ * license
75
+ * description of the file (_not_ the classes; see below)
76
+ * any references, if appropriate
77
+
78
+ The header should be formatted as follows:
79
+
80
+ #
81
+ # = bio/db/hoge.rb - Hoge database parser classes
82
+ #
83
+ # Copyright:: Copyright (C) 2001, 2003-2005 Bio R. Hacker <brh@example.org>,
84
+ # Copyright:: Copyright (C) 2006 Chem R. Hacker <crh@example.org>
85
+ #
86
+ # License:: The Ruby License
87
+ #
88
+ # == Description
89
+ #
90
+ # This file contains classes that implement an interface to the Hoge database.
91
+ #
92
+ # == References
93
+ #
94
+ # * Hoge F. et al., The Hoge database, Nucleic. Acid. Res. 123:100--123 (2030)
95
+ # * http://hoge.db/
96
+ #
97
+
98
+ require 'foo'
99
+
100
+ module Bio
101
+
102
+ autoload :Bar, 'bio/bar'
103
+
104
+ class Hoge
105
+ :
106
+ end # Hoge
107
+
108
+ end # Bio
109
+
110
+ === of classes and methods within those files
111
+
112
+ Classes and methods should be documented in a standardized format, as in the
113
+ following example (from lib/bio/sequence.rb):
114
+
115
+ # == Description
116
+ #
117
+ # Bio::Sequence objects represent annotated sequences in bioruby.
118
+ # A Bio::Sequence object is a wrapper around the actual sequence,
119
+ # represented as either a Bio::Sequence::NA or a Bio::Sequence::AA object.
120
+ # For most users, this encapsulation will be completely transparent.
121
+ # Bio::Sequence responds to all methods defined for Bio::Sequence::NA/AA
122
+ # objects using the same arguments and returning the same values (even though
123
+ # these methods are not documented specifically for Bio::Sequence).
124
+ #
125
+ # == Usage
126
+ #
127
+ # require 'bio'
128
+ #
129
+ # # Create a nucleic or amino acid sequence
130
+ # dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA')
131
+ # rna = Bio::Sequence.auto('augcaugcaugcaugcaaaa')
132
+ # aa = Bio::Sequence.auto('ACDEFGHIKLMNPQRSTVWYU')
133
+ #
134
+ # # Print in FASTA format
135
+ # puts dna.output(:fasta)
136
+ #
137
+ # # Print all codons
138
+ # dna.window_search(3,3) do |codon|
139
+ # puts codon
140
+ # end
141
+ #
142
+ class Sequence
143
+
144
+ # Create a new Bio::Sequence object
145
+ #
146
+ # s = Bio::Sequence.new('atgc')
147
+ # puts s # => 'atgc'
148
+ #
149
+ # Note that this method does not intialize the contained sequence
150
+ # as any kind of bioruby object, only as a simple string
151
+ #
152
+ # puts s.seq.class # => String
153
+ #
154
+ # See Bio::Sequence#na, Bio::Sequence#aa, and Bio::Sequence#auto
155
+ # for methods to transform the basic String of a just created
156
+ # Bio::Sequence object to a proper bioruby object
157
+ # ---
158
+ # *Arguments*:
159
+ # * (required) _str_: String or Bio::Sequence::NA/AA object
160
+ # *Returns*:: Bio::Sequence object
161
+ def initialize(str)
162
+ @seq = str
163
+ end
164
+
165
+ # The sequence identifier. For example, for a sequence
166
+ # of Genbank origin, this is the accession number.
167
+ attr_accessor :entry_id
168
+
169
+ # An Array of Bio::Feature objects
170
+ attr_accessor :features
171
+ end # Sequence
172
+
173
+ Preceding the class definition (<tt>class Sequence</tt>), there is at least a
174
+ description and a usage example. Please use the +Description+ and +Usage+
175
+ headings. If appropriate, refer to other classes that interact with or are
176
+ related to the class.
177
+
178
+ The code in the usage example should, if possible, be in a format that a user
179
+ can copy-and-paste into a new script to run. It should illustrate the most
180
+ important uses of the class. If possible and if it would not clutter up the
181
+ example too much, try to provide any input data directly into the usage example,
182
+ instead of refering to ARGV or ARGF for input.
183
+
184
+ dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA')
185
+
186
+ Otherwise, describe the input shortly, for example:
187
+
188
+ # input should be string consisting of nucleotides
189
+ dna = Bio::Sequence.auto(ARGF.read)
190
+
191
+ Methods should be preceded by a comment that describes what the method does,
192
+ including any relevant usage examples. (In contrast to the documentation for
193
+ the class itself, headings are not required.) In addition, any arguments should
194
+ be listed, as well as the type of thing that is returned by the method. The
195
+ format of this information is as follows:
196
+
197
+ # ---
198
+ # *Arguments*:
199
+ # * (required) _str_: String or Bio::Sequence::NA
200
+ # * (optional) _nr_: a number that means something
201
+ # *Returns*:: true or false
202
+
203
+ Attribute accessors can be preceded by a short description.
204
+
205
+ == Exception handling
206
+
207
+ Don't use
208
+
209
+ $stderr.puts "WARNING"
210
+
211
+ in your code. Instead, try to avoid printing error messages. For fatal errors,
212
+ use +raise+ with an appropriate message.
213
+
214
+ == Testing code should use 'test/unit'
215
+
216
+ Unit tests should come with your modules by which you can assure what
217
+ you meant to do with each method. The test code is useful to make
218
+ maintenance easy and ensure stability. The use of
219
+
220
+ if __FILE__ == $0
221
+
222
+ is deprecated.
223
+
224
+ == Using autoload
225
+
226
+ To quicken the initial load time we have replaced most of 'require' to
227
+ 'autoload' since BioRuby version 0.7. During this change, we have found
228
+ some tips:
229
+
230
+ You should not separate the same namespace into several files.
231
+
232
+ * For example, if you have separated definitions of the Bio::Foo
233
+ class into two files (e.g. 'bio/foo.rb' and 'bio/bar.rb'), you
234
+ need to resolve the dependencies (including the load order)
235
+ yourself.
236
+
237
+ * If you have a defined Bio::Foo in 'bio/foo.rb' and a defined
238
+ Bio::Foo::Bar in 'bio/foo/bar.rb' add the following line in the
239
+ 'bio/foo.rb' file:
240
+
241
+ autoload :Bar, 'bio/foo/bar'
242
+
243
+ You should not put several top level namespaces in one file.
244
+
245
+ * For example, if you have Bio::A, Bio::B and Bio::C in the file
246
+ 'bio/foo.rb', you need
247
+
248
+ autoload :A, 'bio/foo'
249
+ autoload :B, 'bio/foo'
250
+ autoload :C, 'bio/foo'
251
+
252
+ to load the module automatically (instead of require 'bio/foo').
253
+ In this case, you should put them under the new namespace like
254
+ Bio::Foo::A, Bio::Foo::B and Bio::Foo::C in the file 'bio/foo',
255
+ then use
256
+
257
+ autoload :Foo, 'bio/foo'
258
+
259
+ so autoload can be written in 1 line.
260
+
261
+ = NAMESPACE
262
+
263
+ Your module should be located under the top-level module Bio and put under
264
+ the 'bioruby/lib/bio' directory. The class/module names and the
265
+ file names should be short and descriptive.
266
+
267
+ There are already several sub directories in 'bioruby/lib':
268
+
269
+ bio/*.rb -- general and widely used basic classes
270
+ bio/appl/ -- wrapper and parser for the external applications
271
+ bio/data/ -- basic biological data
272
+ bio/db/ -- flatfile database entry parsers
273
+ bio/io/ -- I/O interfaces for files, RDB, web services etc.
274
+ bio/util/ -- utilities and algorithms for bioinformatics
275
+
276
+ If your module doesn't match any of the above, please propose
277
+ an appropriate directory name when you contribute. Please let the staff
278
+ discuss on namespaces (class names), API (method names) before commiting
279
+ a new module or making changes on existing modules.
280
+
281
+ = MAINTENANCE
282
+
283
+ Finally, please maintain the code you've contributed. Please let us know (on
284
+ the bioruby list) before you commit, so that users can discuss on the change.
285
+