bio 1.2.1 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
data/bioruby.gemspec.erb
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'bio'
|
3
|
+
s.version = "<% ###### Below is executed in ERB environment ######
|
4
|
+
# Version can be specified by the environment variable
|
5
|
+
env_ver = ENV['BIORUBY_GEM_VERSION']
|
6
|
+
env_ver = nil if env_ver.to_s.strip.empty?
|
7
|
+
|
8
|
+
# By default, determined from lib/bio/version.rb
|
9
|
+
load "./lib/bio/version.rb" unless defined?(BIO_VERSION_RB_LOADED)
|
10
|
+
case Bio::BIORUBY_EXTRA_VERSION
|
11
|
+
when nil
|
12
|
+
suffix = nil
|
13
|
+
when /\A\.(\d+)\z/
|
14
|
+
suffix = $1
|
15
|
+
when /\-alpha(\d+)/
|
16
|
+
decrement = true
|
17
|
+
suffix = 9000 + $1.to_i
|
18
|
+
when /\-pre(\d+)/
|
19
|
+
decrement = true
|
20
|
+
suffix = 9500 + $1.to_i
|
21
|
+
when /\-rc(\d+)/
|
22
|
+
decrement = true
|
23
|
+
suffix = 9900 + $1.to_i
|
24
|
+
else
|
25
|
+
suffix = "0000"
|
26
|
+
end
|
27
|
+
ver = Bio::BIORUBY_VERSION.reverse.collect do |i|
|
28
|
+
if decrement then
|
29
|
+
i -= 1
|
30
|
+
i < 0 ? (i += 10) : decrement = false
|
31
|
+
end
|
32
|
+
i
|
33
|
+
end.reverse
|
34
|
+
ver.push suffix if suffix
|
35
|
+
%><%=
|
36
|
+
(env_ver || ver.join('.'))
|
37
|
+
###### Above is executed in ERB environment ######
|
38
|
+
%>"
|
39
|
+
|
40
|
+
s.author = "BioRuby project"
|
41
|
+
s.email = "staff@bioruby.org"
|
42
|
+
s.homepage = "http://bioruby.org/"
|
43
|
+
s.rubyforge_project = "bioruby"
|
44
|
+
s.summary = "Bioinformatics library"
|
45
|
+
s.description = "BioRuby is a library for bioinformatics (biology + information science)."
|
46
|
+
|
47
|
+
s.platform = Gem::Platform::RUBY
|
48
|
+
s.files = [
|
49
|
+
<% ###### Below is executed in ERB environment ######
|
50
|
+
# Gets file list from the "git ls-files" command.
|
51
|
+
files = (`git ls-files` rescue nil).to_s.split(/\r?\n/)
|
52
|
+
files.delete_if { |x| x.empty? }
|
53
|
+
# When git-ls-files isn't available, creates a list from current files.
|
54
|
+
if !($?.success?) or files.size <= 0 then
|
55
|
+
files =
|
56
|
+
[ "README.rdoc", "README_DEV.rdoc",
|
57
|
+
"ChangeLog", "KNOWN_ISSUES.rdoc",
|
58
|
+
"Rakefile", "bioruby.gemspec.erb",
|
59
|
+
"bioruby.gemspec", "setup.rb",
|
60
|
+
"extconf.rb", "rdoc.zsh"
|
61
|
+
] + Dir.glob("{bin,doc,etc,lib,sample,test}/**/*").delete_if do |item|
|
62
|
+
case item
|
63
|
+
when /(\A|\/)CVS(\z|\/)/, /(\A|\/)rdoc(\z|\/)/, /\~\z/
|
64
|
+
true
|
65
|
+
else
|
66
|
+
false
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
%><%=
|
71
|
+
files.sort.collect { |x| x.dump }.join(",\n ")
|
72
|
+
###### Above is executed in ERB environment ######
|
73
|
+
%>
|
74
|
+
]
|
75
|
+
|
76
|
+
s.has_rdoc = true
|
77
|
+
s.extra_rdoc_files = [
|
78
|
+
<%= ###### Below is executed in ERB environment ######
|
79
|
+
# Files whose suffix are .rdoc are selected.
|
80
|
+
rdoc_files = files.find_all { |item| /\.rdoc\z/ =~ item }
|
81
|
+
# Fail safe settings
|
82
|
+
if rdoc_files.empty? then
|
83
|
+
rdoc_files = [ 'README.rdoc', 'README_DEV.rdoc',
|
84
|
+
'doc/Changes-1.3.rdoc' ]
|
85
|
+
end
|
86
|
+
rdoc_files.push "ChangeLog" unless rdoc_files.include?("ChangeLog")
|
87
|
+
rdoc_files.sort.collect { |x| x.dump }.join(",\n ")
|
88
|
+
###### Above is executed in ERB environment ######
|
89
|
+
%>
|
90
|
+
]
|
91
|
+
s.rdoc_options << '--main' << 'README.rdoc'
|
92
|
+
s.rdoc_options << '--title' << 'BioRuby API documentation'
|
93
|
+
s.rdoc_options << '--exclude' << '\.yaml\z'
|
94
|
+
s.rdoc_options << '--line-numbers' << '--inline-source'
|
95
|
+
|
96
|
+
s.require_path = 'lib'
|
97
|
+
s.autorequire = 'bio'
|
98
|
+
|
99
|
+
s.bindir = "bin"
|
100
|
+
s.executables = [
|
101
|
+
<%= ###### Below is executed in ERB environment ######
|
102
|
+
# Files in bin/ directory are selected.
|
103
|
+
exec_files = files.find_all { |item| /\Abin\// =~ item }
|
104
|
+
# Non-executable files are removed from the list.
|
105
|
+
exec_files.delete_if { |item| !File.executable?(item) }
|
106
|
+
# strip "bin/"
|
107
|
+
exec_files.collect! { |item| item.sub(/\Abin\//, '') }
|
108
|
+
# Fail safe settings
|
109
|
+
if exec_files.empty? then
|
110
|
+
exec_files = [ "bioruby", "br_biofetch.rb", "br_biogetseq.rb", "br_bioflat.rb", "br_pmfetch.rb" ]
|
111
|
+
end
|
112
|
+
exec_files.sort.collect { |x| x.dump }.join(",\n ")
|
113
|
+
###### Above is executed in ERB environment ######
|
114
|
+
%>
|
115
|
+
]
|
116
|
+
s.default_executable = "bioruby"
|
117
|
+
end
|
data/doc/Changes-0.7.rd
CHANGED
@@ -338,6 +338,13 @@ In 1.1.0:
|
|
338
338
|
instead of a string or nil: score, percent_identity, percent_positive,
|
339
339
|
percent_gaps.
|
340
340
|
|
341
|
+
--- BioRuby Shell
|
342
|
+
|
343
|
+
In 1.1.0:
|
344
|
+
|
345
|
+
* Shell commands seq, ent, obj are renamed to getseq, getent, getobj,
|
346
|
+
respectively.
|
347
|
+
|
341
348
|
=== Deleted files
|
342
349
|
|
343
350
|
: lib/bio/db/genbank.rb
|
@@ -0,0 +1,239 @@
|
|
1
|
+
= Incompatible and important changes since the BioRuby 1.2.1 release
|
2
|
+
|
3
|
+
A lot of changes have been made to the BioRuby after the version 1.2.1
|
4
|
+
is released.
|
5
|
+
|
6
|
+
== New features
|
7
|
+
|
8
|
+
=== Support for sequence output with improvements of Bio::Sequence
|
9
|
+
|
10
|
+
The outputting of EMBL and GenBank formatted text are now supported in the
|
11
|
+
Bio::Sequence class. See the document of Bio::Sequence#output for details.
|
12
|
+
You can also create Bio::Sequence objects from many kinds of data such as
|
13
|
+
Bio::GenBank, Bio::EMBL, and Bio::FastaFormat by using the to_biosequence
|
14
|
+
method.
|
15
|
+
|
16
|
+
=== BioSQL support
|
17
|
+
|
18
|
+
BioSQL support is completely rewritten by using ActiveRecord.
|
19
|
+
|
20
|
+
=== Bio::Blast
|
21
|
+
|
22
|
+
Bio::Blast#reports can parse NCBI default (-m 0) format and tabular (-m 8)
|
23
|
+
format, in addition to XML (-m 7) format.
|
24
|
+
|
25
|
+
Bio::Blast::Report now supports XML format with multiple query sequences
|
26
|
+
generated by blastall 2.2.14 or later.
|
27
|
+
|
28
|
+
Bio::Blast.remote supports DDBJ, in addition to GenomeNet.
|
29
|
+
In addition, a list of available blast databases on remote sites
|
30
|
+
can be obtained by using Bio::Blast::Remote::DDBJ.databases and
|
31
|
+
Bio::Blast::Remote::GenomeNet.databases methods. Note that the above
|
32
|
+
remote blast methods may be changed in the future to support NCBI.
|
33
|
+
|
34
|
+
Bio::Blast::RPSBlast::Report is newly added, a parser for NCBI RPS Blast
|
35
|
+
(Reversed Position Specific Blast) default (-m 0 option) results.
|
36
|
+
|
37
|
+
=== Bio::GFF::GFF2 and Bio::GFF::GFF3
|
38
|
+
|
39
|
+
The outputting of GFF2/GFF3-formatted text is now supported. However, many
|
40
|
+
incompatible changes have been made (See below for details).
|
41
|
+
|
42
|
+
=== Bio::Hinv
|
43
|
+
|
44
|
+
H-Invitational Database web service (REST) client class is newly added.
|
45
|
+
|
46
|
+
=== Bio::NCBI::REST
|
47
|
+
|
48
|
+
NCBI E-Utilities client class is newly added.
|
49
|
+
|
50
|
+
=== Bio::PAML::Codeml and Bio::PAML::Codeml::Report
|
51
|
+
|
52
|
+
Bio::PAML::Codeml, wrapper for PAML codeml program, and
|
53
|
+
Bio::PAML::Codeml::Report, parser for codeml result are newly added,
|
54
|
+
though some of them are still under construction and too specific to
|
55
|
+
particular use cases.
|
56
|
+
|
57
|
+
=== Bio::Locations
|
58
|
+
|
59
|
+
New method Bio::Locations#to_s is added to support output of features.
|
60
|
+
|
61
|
+
=== Bio::TogoWS::REST
|
62
|
+
|
63
|
+
TogoWS REST client class is newly added. Information about TogoWS REST service
|
64
|
+
can be found on http://togows.dbcls.jp/site/en/rest.html.
|
65
|
+
|
66
|
+
== Deprecated classes
|
67
|
+
|
68
|
+
=== Bio::Features
|
69
|
+
|
70
|
+
Bio::Features is obsoleted and changed to an array of Bio::Feature object
|
71
|
+
with some backward compatibility methods. The backward compatibility methods
|
72
|
+
will soon be removed in the future.
|
73
|
+
|
74
|
+
=== Bio::References
|
75
|
+
|
76
|
+
Bio::References is obsoleted and changed to an array of Bio::Reference object
|
77
|
+
with some backward compatibility methods. The backward compatibility methods
|
78
|
+
will soon be removed in the future.
|
79
|
+
|
80
|
+
== Incompatible changes
|
81
|
+
|
82
|
+
=== Bio::BIORUBY_VERSION
|
83
|
+
|
84
|
+
Definition of the constant Bio::BIORUBY_VERSION is moved from lib/bio.rb to
|
85
|
+
lib/bio/version.rb. Normally, the autoload mechanism of Ruby correctly loads
|
86
|
+
the version.rb, but special scripts directly using bio.rb may be needed to
|
87
|
+
be changed.
|
88
|
+
|
89
|
+
Bio::BIORUBY_VERSION is changed to be frozen.
|
90
|
+
|
91
|
+
New constants Bio::BIORUBY_EXTRA_VERSION and Bio::BIORUBY_VERSION_ID are
|
92
|
+
added. See their RDoc for details.
|
93
|
+
|
94
|
+
=== Bio::Sequence
|
95
|
+
|
96
|
+
Bio::Sequence#date is removed. Alternatively, date_created or date_modified
|
97
|
+
can be used.
|
98
|
+
|
99
|
+
Bio::Sequence#taxonomy is changed to be an alias of classification, and
|
100
|
+
the data type is changed to an array of string.
|
101
|
+
|
102
|
+
=== Bio::Locations and Bio::Location
|
103
|
+
|
104
|
+
A carat in a location (e.g. "123^124") is now parsed, instead of being
|
105
|
+
replaced by "..". To distinguish from normal "..", a new attribute
|
106
|
+
Bio::Location#carat is used.
|
107
|
+
|
108
|
+
"order(...)" or "group(...)" are also parsed, instead of being regarded
|
109
|
+
as "join(...)". To distinguish from "join(...)", a new attribute
|
110
|
+
Bio::Locations#operator is used. When "order(...)" or "group(...)",
|
111
|
+
the attribute is set to :order or :group, respectively. Note that
|
112
|
+
"group(...)" is already deprecated in EMBL/GenBank/DDBJ.
|
113
|
+
|
114
|
+
=== Bio::Blast
|
115
|
+
|
116
|
+
Return value of Bio::Blast#exec_* is changed to String instead of Report
|
117
|
+
object. Parsing the string is now processed in Bio::Blast#query method.
|
118
|
+
|
119
|
+
Bio::Blast#exec_genomenet_tab and Bio::Blast#server="genomenet_tab" is
|
120
|
+
deprecated.
|
121
|
+
|
122
|
+
Bio::Blast#options=() can now change the following attributes: program, db,
|
123
|
+
format, matrix, and filter.
|
124
|
+
|
125
|
+
Bio::Blast.reports now supports default (-m 0) and tabular (-m 8) formats.
|
126
|
+
Old implementation (only supports XML) is renamed to Bio::Blast.reports_xml,
|
127
|
+
to keep compatibility for older BLAST XML documents which might not be parsed
|
128
|
+
by the new Bio::Blast.reports nor Bio::FlatFile, although we are not sure
|
129
|
+
whether such documents really exist or not.
|
130
|
+
|
131
|
+
=== Bio::Blast::Default::Report and Bio::Blast::WU::Report
|
132
|
+
|
133
|
+
Iteration#lambda, #kappa, #entropy, #gapped_lambda, #gapped_kappa,
|
134
|
+
and #gapped_entropy, and the same methods in the Report class are
|
135
|
+
changed to return float or nil instead of string or nil.
|
136
|
+
|
137
|
+
=== Bio::Blat
|
138
|
+
|
139
|
+
When reading BLAT psl (or pslx) data by using Bio::FlatFile, it checks
|
140
|
+
each query name and returns a new entry object when the query name is
|
141
|
+
changed from previous queries. This is, data is stored to two or more
|
142
|
+
Bio::Blat::Report objects, instead of previous version's behavior
|
143
|
+
(always reads all data at once and stores to a Bio::Blat::Report object).
|
144
|
+
|
145
|
+
=== Bio::GFF, Bio::GFF::GFF2 and Bio::GFF::GFF3
|
146
|
+
|
147
|
+
Bio::GFF::Record#comments is renamed to #comment, and #comments= is
|
148
|
+
renamed to #comment=, because they only allow a single String (or nil)
|
149
|
+
and the plural form "comments" may be confusable. The "comments" and
|
150
|
+
"comments=" methods can still be used, but warning messages will be
|
151
|
+
shown when using in GFF2::Record and GFF3::Record objects.
|
152
|
+
|
153
|
+
See below about GFF2 and/or GFF3 specific changes.
|
154
|
+
|
155
|
+
=== Bio::GFF::GFF2 and Bio::GFF::GFF3
|
156
|
+
|
157
|
+
Bio::GFF::GFF2::Record.new and Bio::GFF::GFF3::Record.new can also
|
158
|
+
get 9 arguments corresponding to GFF columns, which helps to create
|
159
|
+
Record object directly without formatted text.
|
160
|
+
|
161
|
+
Bio::GFF::GFF2::Record#start, #end, and #frame return Integer or nil,
|
162
|
+
and #score returns Float or nil, instead of String or nil.
|
163
|
+
The same changes are also made to Bio::GFF::GFF3::Record.
|
164
|
+
|
165
|
+
Bio::GFF::GFF2::Record#attributes and Bio::GFF::GFF3::Record#attributes
|
166
|
+
are changed to return a nested Array, containing [ tag, value ] pairs,
|
167
|
+
because of supporting multiple tags in the same tag names. If you want
|
168
|
+
to get a Hash, use Record#attributes_to_hash method, though some
|
169
|
+
tag-value pairs in the same tag names may be lost. Note that
|
170
|
+
Bio::GFF::Record#attribute still returns a Hash for compatibility.
|
171
|
+
|
172
|
+
New methods for getting, setting and manipulating attributes are added
|
173
|
+
to Bio::GFF::GFF2::Record and Bio::GFF::GFF3::Record classes:
|
174
|
+
attribute, get_attribute, get_attributes, set_attribute, replace_attributes,
|
175
|
+
add_attribute, delete_attribute, delete_attributes, sort_attributes_by_tag!.
|
176
|
+
It is recommended to use these methods instead of directly manipulating
|
177
|
+
the array returned by Record#attributes.
|
178
|
+
|
179
|
+
Bio::GFF::GFF2#to_s, Bio::GFF::GFF3#to_s, Bio::GFF::GFF2::Record#to_s,
|
180
|
+
and Bio::GFF::GFF3::Record#to_s are added to support output of
|
181
|
+
GFF2/GFF3 data.
|
182
|
+
|
183
|
+
=== Bio::GFF::GFF2
|
184
|
+
|
185
|
+
GFF2 attribute values are now automatically unescaped. In addition,
|
186
|
+
if a value of an attribute is consisted of two or more tokens delimited
|
187
|
+
by spaces, an object of the new class Bio::GFF::GFF2::Record::Value is
|
188
|
+
returned instead of String. The new class Bio::GFF::GFF2::Record::Value
|
189
|
+
aims to store a parsed value of an attribute. If you really want to get
|
190
|
+
unparsed string, Bio::GFF::GFF2::Record::Value#to_s can be used.
|
191
|
+
|
192
|
+
The metadata (lines beginning with "##") are parsed to
|
193
|
+
Bio::GFF::GFF2::MetaData objects and are stored to Bio::GFF::GFF2#metadata
|
194
|
+
as an array, except the "##gff-version" line. The "##gff-version" version
|
195
|
+
string is stored to the Bio::GFF::GFF2#gff_version as a string.
|
196
|
+
|
197
|
+
=== Bio::GFF::GFF3
|
198
|
+
|
199
|
+
Aliases of columns which are renamed in the GFF3 specification are added
|
200
|
+
to the Bio::GFF::GFF3::Record class: seqid (column 1; alias of "seqname"),
|
201
|
+
feature_type (column 3; alias of "feature"; in the GFF3 spec, it is
|
202
|
+
called "type", but because "type" is already used by Ruby, we use
|
203
|
+
"feature_type"), phase (column 8; formerly "frame"). Original names can
|
204
|
+
still be used because they are only aliases.
|
205
|
+
|
206
|
+
Sequences bundled within GFF3 after "##FASTA" are now supported
|
207
|
+
(Bio::GFF::GFF3#sequences).
|
208
|
+
|
209
|
+
GFF3 attribute keys and values are automatically unescaped. Each attribute
|
210
|
+
value is stored as a string, except for special attributes listed below:
|
211
|
+
* Bio::GFF::GFF3::Record::Target to store a "Target" attribute.
|
212
|
+
* Bio::GFF::GFF3::Record::Gap to store a "Gap" attribute.
|
213
|
+
|
214
|
+
The metadata (lines beginning with "##") are parsed to
|
215
|
+
Bio::GFF::GFF3::MetaData objects and stored to Bio::GFF::GFF3#metadata
|
216
|
+
as an array, except "##gff-version", "##sequence-region", "###",
|
217
|
+
and "##FASTA" lines.
|
218
|
+
* "##gff-version" version string is stored to Bio::GFF::GFF3#gff_version.
|
219
|
+
* "##sequence-region" lines are parsed to Bio::GFF::GFF3::SequenceRegion
|
220
|
+
objects and stored to Bio::GFF::GFF3#sequence_regions as an array.
|
221
|
+
* "###" lines are parsed to Bio::GFF::GFF3::RecordBoundary objects.
|
222
|
+
* "##FASTA" is regarded as the beginning of bundled sequences.
|
223
|
+
|
224
|
+
=== Bio::Pathway
|
225
|
+
|
226
|
+
Bio::Pathway#cliquishness is changed to calculate cliquishness (clustering
|
227
|
+
coefficient) for not only undirected graphs but also directed graphs.
|
228
|
+
|
229
|
+
In Bio::Pathway#to_matrix, dump_matrix, dump_list, and depth_first_search
|
230
|
+
methods, to avoid dependency to the order of objects in Hash#each (and
|
231
|
+
each_keys etc.), Bio::Pathway#index is used to specify preferences of
|
232
|
+
nodes in a graph.
|
233
|
+
|
234
|
+
=== Bio::SQL and BioSQL related classes
|
235
|
+
|
236
|
+
BioSQL support is completely rewritten by using ActiveRecord. See documents
|
237
|
+
in lib/bio/io/sql.rb, lib/bio/io/biosql, and lib/bio/db/biosql for details
|
238
|
+
of changes and usage of the classes/modules.
|
239
|
+
|
data/doc/Tutorial.rd
CHANGED
@@ -1,49 +1,79 @@
|
|
1
|
+
# This document is generated with a version of rd2html (part of Hiki)
|
2
|
+
#
|
3
|
+
# A possible test run could be from rdtool (on Debian package rdtool)
|
4
|
+
#
|
5
|
+
# ruby -I lib ./bin/rd2 ~/cvs/opensource/bioruby/doc/Tutorial.rd
|
6
|
+
#
|
7
|
+
# or with style sheet:
|
8
|
+
#
|
9
|
+
# ruby -I lib ./bin/rd2 -r rd/rd2html-lib.rb --with-c
|
10
|
+
ss=bioruby.css ~/cvs/opensource/bioruby/doc/Tutorial.rd > ~/bioruby.html
|
11
|
+
#
|
12
|
+
# in Debian:
|
13
|
+
#
|
14
|
+
# rd2 -r rd/rd2html-lib --with-css="/home/wrk/izip/cvs/opensource/bioruby/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css" Tutorial.rd > index.html
|
15
|
+
#
|
16
|
+
# A common problem is tabs in the text file! TABs are not allowed.
|
17
|
+
#
|
18
|
+
# To add tests run Toshiaki's bioruby shell and paste in the query plus
|
19
|
+
# results.
|
20
|
+
#
|
21
|
+
# To run the embedded Ruby doctests you can get the doctest.rb from Pjotr.
|
22
|
+
|
1
23
|
=begin
|
24
|
+
#doctest Testing bioruby
|
2
25
|
|
3
|
-
|
26
|
+
= BioRuby Tutorial
|
4
27
|
|
5
|
-
|
28
|
+
Editor: PjotrPrins <p .at. bioruby.org>
|
6
29
|
|
7
|
-
|
30
|
+
* Copyright (C) 2001-2003 KATAYAMA Toshiaki <k .at. bioruby.org>
|
31
|
+
* Copyright (C) 2005-2008 Pjotr Prins, Naohisa Goto and others
|
8
32
|
|
9
|
-
|
33
|
+
The latest version resides in the CVS repository ./doc/((<Tutorial.rd|URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/*checkout*/bioruby/doc/Tutorial.rd?rev=HEAD&cvsroot=bioruby&content-type=text/plain>)). This one was updated:
|
10
34
|
|
11
|
-
|
35
|
+
$Id: Tutorial.rd,v 1.22 2008/05/19 12:22:05 pjotr Exp $
|
12
36
|
|
13
|
-
|
14
|
-
repository. Please edit the file there otherwise changes may get
|
15
|
-
lost. See ((<BioRuby Developer Information>)) for CVS and mailing list
|
16
|
-
access.
|
17
|
-
|
18
|
-
= BioRuby Tutorial
|
37
|
+
in preparation for the ((<BioHackathlon 2008|URL:http://hackathon.dbcls.jp/>))
|
19
38
|
|
20
39
|
== Introduction
|
21
40
|
|
22
|
-
This is a tutorial for using Bioruby.
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
(
|
41
|
+
This is a tutorial for using Bioruby. A basic knowledge of Ruby is required.
|
42
|
+
If you want to know more about the programming langauge Ruby we recommend the
|
43
|
+
excellent book ((<Programming Ruby|URL:http://www.pragprog.com/titles/ruby>))
|
44
|
+
by Dave Thomas and Andy Hunt - some of it is online
|
45
|
+
((<here|URL:http://www.rubycentral.com/pickaxe/>)).
|
27
46
|
|
28
|
-
For
|
29
|
-
reading' at the end.
|
47
|
+
For BioRuby you need to install Ruby and the BioRuby package on your computer
|
30
48
|
|
31
49
|
You can check whether Ruby is installed on your computer and what
|
32
50
|
version it has with the
|
33
51
|
|
34
|
-
|
52
|
+
% ruby -v
|
35
53
|
|
36
54
|
command. Showing something like:
|
37
55
|
|
38
56
|
ruby 1.8.5 (2006-08-25) [powerpc-linux]
|
39
57
|
|
58
|
+
If you see no such thing you'll have to install Ruby using your installation
|
59
|
+
manager. For more information see the
|
60
|
+
((<Ruby|URL:http://www.ruby-lang.org/en/>)) website.
|
61
|
+
|
62
|
+
Once Ruby is works download and install Bioruby using the links on the
|
63
|
+
((<Bioruby|URL:http://bioruby.org/>)) website.
|
64
|
+
|
65
|
+
A lot of BioRuby's documentation exists in the source code and unit tests. To
|
66
|
+
really dive in you will need the latest source code tree. The embedded rdoc
|
67
|
+
documentation can be viewed online at
|
68
|
+
((<bioruby's rdoc|URL:http://bioruby.org/rdoc/>)). But first lets start!
|
40
69
|
|
41
70
|
== Trying Bioruby
|
42
71
|
|
43
72
|
Bioruby comes with its own shell. After unpacking the sources run the
|
44
73
|
following command
|
45
74
|
|
46
|
-
|
75
|
+
./bin/bioruby or
|
76
|
+
ruby -I lib bin/bioruby
|
47
77
|
|
48
78
|
and you should see a prompt
|
49
79
|
|
@@ -52,10 +82,14 @@ and you should see a prompt
|
|
52
82
|
Now test the following:
|
53
83
|
|
54
84
|
bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa")
|
55
|
-
|
56
|
-
|
57
|
-
bioruby>
|
58
|
-
ttttgcatgcat
|
85
|
+
==> "atgcatgcaaaa"
|
86
|
+
|
87
|
+
bioruby> seq.complement
|
88
|
+
==> "ttttgcatgcat"
|
89
|
+
|
90
|
+
See the the Bioruby shell section below for more tweaking. If you have trouble running
|
91
|
+
examples also check the section below on trouble shooting. You can also post a
|
92
|
+
question to the mailing list. BioRuby developers usually try to help.
|
59
93
|
|
60
94
|
== Working with nucleic / amino acid sequences (Bio::Sequence class)
|
61
95
|
|
@@ -68,33 +102,48 @@ calculated, and so on. When translating into amino acid sequences the
|
|
68
102
|
frame can be specified and optionally the condon table selected (as
|
69
103
|
defined in codontable.rb).
|
70
104
|
|
105
|
+
bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa")
|
106
|
+
==> "atgcatgcaaaa"
|
107
|
+
|
108
|
+
# complemental sequence (Bio::Sequence::NA object)
|
109
|
+
bioruby> seq.complement
|
110
|
+
==> "ttttgcatgcat"
|
111
|
+
|
112
|
+
bioruby> seq.subseq(3,8) # gets subsequence of positions 3 to 8
|
113
|
+
==> "gcatgc"
|
114
|
+
bioruby> seq.gc_percent
|
115
|
+
==> 33
|
116
|
+
bioruby> seq.composition
|
117
|
+
==> {"a"=>6, "c"=>2, "g"=>2, "t"=>2}
|
118
|
+
bioruby> seq.translate
|
119
|
+
==> "MHAK"
|
120
|
+
bioruby> seq.translate(2) # translate from frame 2
|
121
|
+
==> "CMQ"
|
122
|
+
bioruby> seq.translate(1,11) # codon table 11
|
123
|
+
==> "MHAK"
|
124
|
+
bioruby> seq.translate.codes
|
125
|
+
==> ["Met", "His", "Ala", "Lys"]
|
126
|
+
bioruby> seq.translate.names
|
127
|
+
==> ["methionine", "histidine", "alanine", "lysine"]
|
128
|
+
bioruby> seq.translate.composition
|
129
|
+
==> {"K"=>1, "A"=>1, "M"=>1, "H"=>1}
|
130
|
+
bioruby> seq.translate.molecular_weight
|
131
|
+
==> 485.605
|
132
|
+
bioruby> seq.complement.translate
|
133
|
+
==> "FCMH"
|
134
|
+
|
135
|
+
get a random sequence with the same NA count:
|
136
|
+
|
137
|
+
bioruby> counts = {'a'=>seq.count('a'),'c'=>seq.count('c'),'g'=>seq.count('g'),'t'=>seq.count('t')}
|
138
|
+
==> {"a"=>6, "c"=>2, "g"=>2, "t"=>2}
|
139
|
+
bioruby!> randomseq = Bio::Sequence::NA.randomize(counts)
|
140
|
+
==!> "aaacatgaagtc"
|
141
|
+
|
142
|
+
bioruby!> print counts
|
143
|
+
a6c2g2t2
|
144
|
+
bioruby!> p counts
|
145
|
+
{"a"=>6, "c"=>2, "g"=>2, "t"=>2}
|
71
146
|
|
72
|
-
#!/usr/bin/env ruby
|
73
|
-
|
74
|
-
require 'bio'
|
75
|
-
|
76
|
-
seq = Bio::Sequence::NA.new("atgcatgcaaaa")
|
77
|
-
|
78
|
-
puts seq # original sequence
|
79
|
-
puts seq.complement # complemental sequence (Bio::Sequence::NA object)
|
80
|
-
puts seq.subseq(3,8) # gets subsequence of positions 3 to 8
|
81
|
-
|
82
|
-
p seq.gc_percent # GC percent (BioRuby 0.6.X: Float, BioRuby 0.7 or later: Integer)
|
83
|
-
p seq.composition # nucleic acid compositions (Hash)
|
84
|
-
|
85
|
-
puts seq.translate # translation (Bio::Sequence::AA object)
|
86
|
-
puts seq.translate(2) # translation from frame 2 (default is frame 1)
|
87
|
-
puts seq.translate(1,11) # using codon table No.11 (see http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi)
|
88
|
-
|
89
|
-
p seq.translate.codes # shows three-letter codes (Array)
|
90
|
-
p seq.translate.names # shows amino acid names (Array)
|
91
|
-
p seq.translate.composition # amino acid compositions (Hash)
|
92
|
-
p seq.translate.molecular_weight # calculating molecular weight (Float)
|
93
|
-
|
94
|
-
puts seq.complement.translate # translation of complemental strand
|
95
|
-
|
96
|
-
counts = {'a'=>seq.count('a'),'c'=>seq.count('c'),'g'=>seq.count('g'),'t'=>seq.count('t')}
|
97
|
-
p randomseq = Bio::Sequence::NA.randomize(counts) # reshuffle sequence with same freq.
|
98
147
|
|
99
148
|
The p, print and puts methods are standard Ruby ways of outputting to
|
100
149
|
the screen. If you want to know more about standard Ruby commands you
|
@@ -105,9 +154,9 @@ Windows). For example
|
|
105
154
|
% ri p
|
106
155
|
% ri File.open
|
107
156
|
|
108
|
-
Nucleic acid sequence is an object of
|
109
|
-
amino acid sequence is an object of
|
110
|
-
methods are in the parent
|
157
|
+
Nucleic acid sequence is an object of Bio::Sequence::NA class, and
|
158
|
+
amino acid sequence is an object of Bio::Sequence::AA class. Shared
|
159
|
+
methods are in the parent Bio::Sequence class.
|
111
160
|
|
112
161
|
As Bio::Sequence class inherits Ruby's String class, you can use
|
113
162
|
String class methods. For example, to get a subsequence, you can
|
@@ -116,15 +165,12 @@ not only use subseq(from, to) but also String#[].
|
|
116
165
|
Please take note that the Ruby's string's are base 0 - i.e. the first letter
|
117
166
|
has index 0, for example:
|
118
167
|
|
119
|
-
s = 'abc'
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
>ab
|
127
|
-
|
168
|
+
bioruby> s = 'abc'
|
169
|
+
==> "abc"
|
170
|
+
bioruby> s[0].chr
|
171
|
+
==> "a"
|
172
|
+
bioruby> s[0..1]
|
173
|
+
==> "ab"
|
128
174
|
|
129
175
|
So when using String methods, you should subtract 1 from positions
|
130
176
|
conventionally used in biology. (subseq method will throw an exception if you
|
@@ -136,55 +182,71 @@ way of writing concise and clear code using 'closures'. Each sliding
|
|
136
182
|
window creates a subsequence which is supplied to the enclosed block
|
137
183
|
through a variable named +s+.
|
138
184
|
|
139
|
-
|
140
|
-
the default one base at a time)
|
185
|
+
Show average percentage of GC content for 20 bases (stepping the default one base at a time)
|
141
186
|
|
142
|
-
|
143
|
-
|
144
|
-
end
|
187
|
+
bioruby> seq = Bio::Sequence::NA.new("atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa")
|
188
|
+
==> "atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa"
|
145
189
|
|
190
|
+
bioruby> a=[]; seq.window_search(20) { |s| a.push s.gc_percent }
|
191
|
+
bioruby> a
|
192
|
+
==> [30, 35, 40, 40, 35, 35, 35, 30, 25, 30, 30, 30, 35, 35, 35, 35, 35, 40, 45, 45, 45, 45, 40, 35, 40, 40, 40, 40, 40, 35, 35, 35, 30, 30, 30]
|
193
|
+
|
194
|
+
|
146
195
|
Since the class of each subsequence is the same as original sequence
|
147
196
|
(Bio::Sequence::NA or Bio::Sequence::AA or Bio::Sequence), you can
|
148
197
|
use all methods on the subsequence. For example,
|
149
198
|
|
150
|
-
|
199
|
+
Shows translation results for 15 bases shifting a codon at a time
|
200
|
+
|
201
|
+
bioruby> a = []
|
202
|
+
bioruby> seq.window_search(15, 3) do |s|
|
203
|
+
bioruby> a.push s.translate
|
204
|
+
bioruby> end
|
205
|
+
bioruby> a
|
206
|
+
==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]
|
151
207
|
|
152
|
-
seq.window_search(15, 3) do |s|
|
153
|
-
puts s.translate
|
154
|
-
end
|
155
208
|
|
156
209
|
Finally, the window_search method returns the last leftover
|
157
210
|
subsequence. This allows for example
|
158
211
|
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
212
|
+
Divide a genome sequence into sections of 10000bp and
|
213
|
+
output FASTA formatted sequences (line width 60 chars). The 1000bp at the
|
214
|
+
start and end of each subsequence overlapped. At the 3' end of the sequence
|
215
|
+
the leftover is also added:
|
163
216
|
|
164
217
|
i = 1
|
218
|
+
textwidth=60
|
165
219
|
remainder = seq.window_search(10000, 9000) do |s|
|
166
|
-
puts s.to_fasta("segment #{i}",
|
220
|
+
puts s.to_fasta("segment #{i}", textwidth)
|
167
221
|
i += 1
|
168
222
|
end
|
169
|
-
|
223
|
+
if remainder
|
224
|
+
puts remainder.to_fasta("segment #{i}", textwidth)
|
225
|
+
end
|
170
226
|
|
171
227
|
If you don't want the overlapping window, set window size and stepping
|
172
228
|
size to equal values.
|
173
229
|
|
174
230
|
Other examples
|
175
231
|
|
176
|
-
|
232
|
+
Count the codon usage
|
177
233
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
234
|
+
bioruby> codon_usage = Hash.new(0)
|
235
|
+
bioruby> seq.window_search(3, 3) do |s|
|
236
|
+
bioruby> codon_usage[s] += 1
|
237
|
+
bioruby> end
|
238
|
+
bioruby> codon_usage
|
239
|
+
==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}
|
182
240
|
|
183
|
-
* Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)
|
184
241
|
|
185
|
-
|
186
|
-
|
187
|
-
|
242
|
+
Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)
|
243
|
+
|
244
|
+
bioruby> a = []
|
245
|
+
bioruby> seq.window_search(10, 10) do |s|
|
246
|
+
bioruby> a.push s.molecular_weight
|
247
|
+
bioruby> end
|
248
|
+
bioruby> a
|
249
|
+
==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]
|
188
250
|
|
189
251
|
In most cases, sequences are read from files or retrieved from databases.
|
190
252
|
For example:
|
@@ -210,6 +272,10 @@ For example, translates my_naseq.txt:
|
|
210
272
|
|
211
273
|
% ruby na2aa.rb my_naseq.txt
|
212
274
|
|
275
|
+
or use a pipe!
|
276
|
+
|
277
|
+
% cat my_naseq.txt|ruby na2aa.rb
|
278
|
+
|
213
279
|
Outputs
|
214
280
|
|
215
281
|
VAIFPKAMTGAKNQSSDICLMPHVGLIRRGQRRIRHLVQMSDAA*
|
@@ -218,8 +284,9 @@ You can also write this, a bit fanciful, as a one-liner script.
|
|
218
284
|
|
219
285
|
% ruby -r bio -e 'p Bio::Sequence::NA.new($<.read).translate' my_naseq.txt
|
220
286
|
|
221
|
-
In the next section we will retrieve data from databases instead of
|
222
|
-
|
287
|
+
In the next section we will retrieve data from databases instead of using raw
|
288
|
+
sequence files. One generic example of the above can be found in
|
289
|
+
./sample/na2aa.rb.
|
223
290
|
|
224
291
|
== Parsing GenBank data (Bio::GenBank class)
|
225
292
|
|
@@ -243,7 +310,8 @@ the data:
|
|
243
310
|
|
244
311
|
print ">#{gb.accession} " # Accession
|
245
312
|
puts gb.definition # Definition
|
246
|
-
puts gb.naseq # Nucleic acid sequence
|
313
|
+
puts gb.naseq # Nucleic acid sequence
|
314
|
+
# (Bio::Sequence::NA object)
|
247
315
|
end
|
248
316
|
|
249
317
|
But that has the disadvantage the code is tied to GenBank input. A more
|
@@ -251,9 +319,9 @@ generic method is to use Bio::FlatFile which allows you to use different
|
|
251
319
|
input formats:
|
252
320
|
|
253
321
|
#!/usr/bin/env ruby
|
254
|
-
|
322
|
+
|
255
323
|
require 'bio'
|
256
|
-
|
324
|
+
|
257
325
|
ff = Bio::FlatFile.new(Bio::GenBank, ARGF)
|
258
326
|
ff.each_entry do |gb|
|
259
327
|
definition = "#{gb.accession} #{gb.definition}"
|
@@ -288,9 +356,6 @@ Again another option is to use the Bio::DB.open class:
|
|
288
356
|
puts gb.naseq.to_fasta(definition, 60)
|
289
357
|
end
|
290
358
|
|
291
|
-
(TRANSLATOR'S NOTE: Bio::DB.open have not been used so well.)
|
292
|
-
(EDITOR's NOTE: Test code)
|
293
|
-
|
294
359
|
Next, we are going to parse the GenBank 'features', which is normally
|
295
360
|
very complicated:
|
296
361
|
|
@@ -333,12 +398,12 @@ very complicated:
|
|
333
398
|
end
|
334
399
|
end
|
335
400
|
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
401
|
+
Note: In this example Feature#assoc method makes a Hash from a
|
402
|
+
feature object. It is useful because you can get data from the hash
|
403
|
+
by using qualifiers as keys.
|
404
|
+
(But there is a risk some information is lost when two or more
|
405
|
+
qualifiers are the same. Therefore an Array is returned by
|
406
|
+
Feature#feature)
|
342
407
|
|
343
408
|
Bio::Sequence#splicing splices subsequence from nucleic acid sequence
|
344
409
|
according to location information used in GenBank, EMBL and DDBJ.
|
@@ -352,11 +417,11 @@ feature style location text but also Bio::Locations object. For more
|
|
352
417
|
information about location format and Bio::Locations class, see
|
353
418
|
bio/location.rb.
|
354
419
|
|
355
|
-
|
420
|
+
Splice according to location string used in a GenBank entry
|
356
421
|
|
357
422
|
naseq.splicing('join(2035..2050,complement(1775..1818),13..345')
|
358
423
|
|
359
|
-
|
424
|
+
Generate Bio::Locations object and pass the splicing method
|
360
425
|
|
361
426
|
locs = Bio::Locations.new('join((8298.8300)..10206,1..855)')
|
362
427
|
naseq.splicing(locs)
|
@@ -364,17 +429,16 @@ bio/location.rb.
|
|
364
429
|
You can also use the splicing method for amino acid sequences
|
365
430
|
(Bio::Sequence::AA objects).
|
366
431
|
|
367
|
-
|
432
|
+
Splicing peptide from a protein (e.g. signal peptide)
|
368
433
|
|
369
434
|
aaseq.splicing('21..119')
|
370
435
|
|
371
|
-
(EDITOR's NOTE: why use STRINGs here?)
|
372
436
|
|
373
437
|
=== More databases
|
374
438
|
|
375
439
|
Databases in BioRuby are essentially accessed like that of GenBank
|
376
|
-
with classes like Bio::GenBank, Bio::KEGG::GENES
|
377
|
-
|
440
|
+
with classes like Bio::GenBank, Bio::KEGG::GENES. A full list can be found in
|
441
|
+
the ./lib/bio/db directory of the BioRuby source tree.
|
378
442
|
|
379
443
|
In many cases the Bio::DatabaseClass acts as a factory pattern
|
380
444
|
and recognises the database type automatically - returning a
|
@@ -401,7 +465,14 @@ database class?
|
|
401
465
|
end
|
402
466
|
|
403
467
|
An example that can take any input, filter using a regular expression to output
|
404
|
-
to a FASTA file can be found in sample/any2fasta.rb.
|
468
|
+
to a FASTA file can be found in sample/any2fasta.rb. With this technique it is
|
469
|
+
possible to write a Unix type grep/sort pipe for sequence information. One
|
470
|
+
example using scripts in the BIORUBY sample folder:
|
471
|
+
|
472
|
+
fastagrep.rb '/At|Dm/' database.seq | fastasort.rb
|
473
|
+
|
474
|
+
greps the database for Arabidopsis and Drosophila entries and sorts the output
|
475
|
+
to FASTA.
|
405
476
|
|
406
477
|
Other methods to extract specific data from database objects can be
|
407
478
|
different between databases, though some methods are common (see the
|
@@ -427,35 +498,30 @@ multiple Bio::Reference objects as an Array. And some classes have a
|
|
427
498
|
Bio::Alignment class in bio/alignment.rb is a container class like Ruby's Hash,
|
428
499
|
Array and BioPerl's Bio::SimpleAlign. A very simple example is:
|
429
500
|
|
430
|
-
|
431
|
-
|
432
|
-
seqs = [ 'atgca', 'aagca', 'acgca', 'acgcg' ]
|
433
|
-
seqs = seqs.collect{ |x| Bio::Sequence::NA.new(x) }
|
434
|
-
|
501
|
+
bioruby> seqs = [ 'atgca', 'aagca', 'acgca', 'acgcg' ]
|
502
|
+
bioruby> seqs = seqs.collect{ |x| Bio::Sequence::NA.new(x) }
|
435
503
|
# creates alignment object
|
436
|
-
a = Bio::Alignment.new(seqs)
|
437
|
-
|
438
|
-
|
439
|
-
p a.consensus # ==> "a?gc?"
|
440
|
-
|
504
|
+
bioruby> a = Bio::Alignment.new(seqs)
|
505
|
+
bioruby> a.consensus
|
506
|
+
==> "a?gc?"
|
441
507
|
# shows IUPAC consensus
|
442
|
-
|
443
|
-
|
508
|
+
a.consensus_iupac
|
509
|
+
==> "ahgcr"
|
444
510
|
# iterates over each seq
|
445
511
|
a.each { |x| p x }
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
512
|
+
# ==>
|
513
|
+
# "atgca"
|
514
|
+
# "aagca"
|
515
|
+
# "acgca"
|
516
|
+
# "acgcg"
|
451
517
|
# iterates over each site
|
452
518
|
a.each_site { |x| p x }
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
519
|
+
# ==>
|
520
|
+
# ["a", "a", "a", "a"]
|
521
|
+
# ["t", "a", "c", "c"]
|
522
|
+
# ["g", "g", "g", "g"]
|
523
|
+
# ["c", "c", "c", "c"]
|
524
|
+
# ["a", "a", "a", "g"]
|
459
525
|
|
460
526
|
# doing alignment by using CLUSTAL W.
|
461
527
|
# clustalw command must be installed.
|
@@ -469,21 +535,22 @@ library of commonly used REs (from REBASE) which can be used to cut single
|
|
469
535
|
stranded RNA or dubbel stranded DNA into fragments. To list all enzymes:
|
470
536
|
|
471
537
|
rebase = Bio::RestrictionEnzyme.rebase
|
472
|
-
|
473
|
-
|
538
|
+
rebase.each do |enzyme_name, info|
|
539
|
+
p enzyme_name
|
474
540
|
end
|
475
541
|
|
476
542
|
and cut a sequence with an enzyme follow up with:
|
477
543
|
|
478
|
-
res = seq.cut_with_enzyme('EcoRII', {:max_permutations => 0},
|
544
|
+
res = seq.cut_with_enzyme('EcoRII', {:max_permutations => 0},
|
545
|
+
{:view_ranges => true})
|
479
546
|
if res.kind_of? Symbol #error
|
480
547
|
err = Err.find_by_code(res.to_s)
|
481
548
|
unless err
|
482
549
|
err = Err.new(:code => res.to_s)
|
483
550
|
end
|
484
551
|
end
|
485
|
-
|
486
|
-
|
552
|
+
res.each do |frag|
|
553
|
+
em = EnzymeMatch.new
|
487
554
|
|
488
555
|
em.p_left = frag.p_left
|
489
556
|
em.p_right = frag.p_right
|
@@ -493,7 +560,7 @@ and cut a sequence with an enzyme follow up with:
|
|
493
560
|
em.err = nil
|
494
561
|
em.enzyme = ar_enz
|
495
562
|
em.sequence = ar_seq
|
496
|
-
|
563
|
+
p em
|
497
564
|
end
|
498
565
|
|
499
566
|
|
@@ -510,21 +577,21 @@ local machine.
|
|
510
577
|
Install the fasta program on your machine (the command name looks like
|
511
578
|
fasta34. FASTA can be downloaded from ftp://ftp.virginia.edu/pub/fasta/).
|
512
579
|
First, you must prepare your FASTA-formatted database sequence file
|
513
|
-
target.pep and FASTA-formatted query.pep.
|
514
|
-
we should provide sample data to readers.)
|
580
|
+
target.pep and FASTA-formatted query.pep.
|
515
581
|
|
516
582
|
#!/usr/bin/env ruby
|
517
583
|
|
518
584
|
require 'bio'
|
519
585
|
|
520
|
-
# Creates FASTA factory object ("ssearch" instead of
|
586
|
+
# Creates FASTA factory object ("ssearch" instead of
|
587
|
+
# "fasta34" can also work)
|
521
588
|
factory = Bio::Fasta.local('fasta34', ARGV.pop)
|
522
589
|
(EDITOR's NOTE: not consistent pop command)
|
523
590
|
|
524
|
-
# Reads FASTA-formatted files (TRANSLATOR'S NOTE: something wrong in Japanese text)
|
525
591
|
ff = Bio::FlatFile.new(Bio::FastaFormat, ARGF)
|
526
592
|
|
527
|
-
# Iterates over each entry. the variable "entry" is a
|
593
|
+
# Iterates over each entry. the variable "entry" is a
|
594
|
+
# Bio::FastaFormat object:
|
528
595
|
ff.each do |entry|
|
529
596
|
# shows definition line (begins with '>') to the standard error output
|
530
597
|
$stderr.puts "Searching ... " + entry.definition
|
@@ -536,7 +603,8 @@ we should provide sample data to readers.)
|
|
536
603
|
report.each do |hit|
|
537
604
|
# If E-value is smaller than 0.0001
|
538
605
|
if hit.evalue < 0.0001
|
539
|
-
# shows identifier of query and hit, E-value, start and
|
606
|
+
# shows identifier of query and hit, E-value, start and
|
607
|
+
# end positions of homologous region
|
540
608
|
print "#{hit.query_id} : evalue #{hit.evalue}\t#{hit.target_id} at "
|
541
609
|
p hit.lap_at
|
542
610
|
end
|
@@ -550,7 +618,6 @@ We named above script as f_search.rb. You can execute as follows:
|
|
550
618
|
In above script, the variable "factory" is a factory object for executing
|
551
619
|
FASTA many times easily. Instead of using Fasta#query method,
|
552
620
|
Bio::Sequence#fasta method can be used.
|
553
|
-
(TRANSLATOR'S NOTE: Bio::Sequence#fasta are not so frequently used.)
|
554
621
|
|
555
622
|
seq = ">test seq\nYQVLEEIGRGSFGSVRKVIHIPTKKLLVRKDIKYGHMNSKE"
|
556
623
|
seq.fasta(factory)
|
@@ -566,7 +633,6 @@ Bio::Fasta#query returns Bio::Fasta::Report object.
|
|
566
633
|
We can get almost all information described in FASTA report text
|
567
634
|
with the Report object. For example, getting information for hits:
|
568
635
|
|
569
|
-
|
570
636
|
report.each do |hit|
|
571
637
|
puts hit.evalue # E-value
|
572
638
|
puts hit.sw # Smith-Waterman score (*)
|
@@ -575,15 +641,19 @@ with the Report object. For example, getting information for hits:
|
|
575
641
|
puts hit.query_id # identifier of query sequence
|
576
642
|
puts hit.query_def # definition(comment line) of query sequence
|
577
643
|
puts hit.query_len # length of query sequence
|
578
|
-
puts hit.query_seq #
|
644
|
+
puts hit.query_seq # sequence of homologous region
|
579
645
|
puts hit.target_id # identifier of hit sequence
|
580
646
|
puts hit.target_def # definition(comment line) of hit sequence
|
581
647
|
puts hit.target_len # length of hit sequence
|
582
|
-
puts hit.target_seq # hit
|
583
|
-
puts hit.query_start # start position of homologous
|
584
|
-
|
585
|
-
puts hit.
|
586
|
-
|
648
|
+
puts hit.target_seq # hit of homologous region of hit sequence
|
649
|
+
puts hit.query_start # start position of homologous
|
650
|
+
# region in query sequence
|
651
|
+
puts hit.query_end # end position of homologous region
|
652
|
+
# in query sequence
|
653
|
+
puts hit.target_start # start posiotion of homologous region
|
654
|
+
# in hit(target) sequence
|
655
|
+
puts hit.target_end # end position of homologous region
|
656
|
+
# in hit(target) sequence
|
587
657
|
puts hit.lap_at # array of above four numbers
|
588
658
|
end
|
589
659
|
|
@@ -676,25 +746,25 @@ There are some additional BLAST methods, for example, bit_score and
|
|
676
746
|
midline.
|
677
747
|
|
678
748
|
report.each do |hit|
|
679
|
-
puts hit.bit_score
|
680
|
-
puts hit.query_seq
|
681
|
-
puts hit.midline
|
682
|
-
puts hit.target_seq
|
683
|
-
|
684
|
-
puts hit.evalue
|
685
|
-
puts hit.identity
|
686
|
-
puts hit.overlap
|
687
|
-
puts hit.query_id
|
688
|
-
puts hit.query_def
|
689
|
-
puts hit.query_len
|
690
|
-
puts hit.target_id
|
691
|
-
puts hit.target_def
|
692
|
-
puts hit.target_len
|
693
|
-
puts hit.query_start
|
694
|
-
puts hit.query_end
|
695
|
-
puts hit.target_start
|
696
|
-
puts hit.target_end
|
697
|
-
puts hit.lap_at
|
749
|
+
puts hit.bit_score
|
750
|
+
puts hit.query_seq
|
751
|
+
puts hit.midline
|
752
|
+
puts hit.target_seq
|
753
|
+
|
754
|
+
puts hit.evalue
|
755
|
+
puts hit.identity
|
756
|
+
puts hit.overlap
|
757
|
+
puts hit.query_id
|
758
|
+
puts hit.query_def
|
759
|
+
puts hit.query_len
|
760
|
+
puts hit.target_id
|
761
|
+
puts hit.target_def
|
762
|
+
puts hit.target_len
|
763
|
+
puts hit.query_start
|
764
|
+
puts hit.query_end
|
765
|
+
puts hit.target_start
|
766
|
+
puts hit.target_end
|
767
|
+
puts hit.lap_at
|
698
768
|
end
|
699
769
|
|
700
770
|
For simplicity and API compatibility, some information such as score
|
@@ -1131,39 +1201,66 @@ to be written...
|
|
1131
1201
|
|
1132
1202
|
== The BioRuby example programs
|
1133
1203
|
|
1134
|
-
Some sample programs are stored in samples/
|
1135
|
-
Some programs are obsolete. Since samples are not enough,
|
1136
|
-
practical and interesting samples are welcome.
|
1204
|
+
Some sample programs are stored in ./samples/ directory. Run for example:
|
1137
1205
|
|
1138
|
-
|
1206
|
+
./sample/na2aa.rb test/data/fasta/example1.txt
|
1207
|
+
|
1208
|
+
== Unit testing and doctests
|
1139
1209
|
|
1140
|
-
|
1141
|
-
|
1210
|
+
BioRuby comes with an extensive testing framework with over 1300 tests and 2700
|
1211
|
+
assertions. To run the unit tests:
|
1212
|
+
|
1213
|
+
cd test
|
1214
|
+
ruby runner.rb
|
1215
|
+
|
1216
|
+
We have also started with doctest for Ruby. We are porting the examples
|
1217
|
+
in this tutorial to doctest - more info upcoming.
|
1142
1218
|
|
1143
1219
|
== Further reading
|
1144
1220
|
|
1145
|
-
See the BioRuby in anger Wiki
|
1146
|
-
|
1221
|
+
See the BioRuby in anger Wiki. A lot of BioRuby's documentation exists in the
|
1222
|
+
source code and unit tests. To really dive in you will need the latest source
|
1223
|
+
code tree. The embedded rdoc documentation can be viewed online at
|
1224
|
+
((<URL:http://bioruby.org/rdoc/>)).
|
1225
|
+
|
1226
|
+
== BioRuby Shell
|
1227
|
+
|
1228
|
+
The BioRuby shell implementation you find in ./lib/bio/shell. It is very interesting
|
1229
|
+
as it uses IRB (the Ruby intepreter) which is a powerful environment described in
|
1230
|
+
((<Programming Ruby's irb chapter|URL:http://ruby-doc.org/docs/ProgrammingRuby/html/irb.html>)). IRB commands can directly be typed in the shell, e.g.
|
1231
|
+
|
1232
|
+
bioruby!> IRB.conf[:PROMPT_MODE]
|
1233
|
+
==!> :PROMPT_C
|
1147
1234
|
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1235
|
+
optionally you also may want to install the optional Ruby readline support -
|
1236
|
+
with Debian libreadline-ruby. To edit a previous line you may have to press
|
1237
|
+
line down (arrow down) first.
|
1238
|
+
|
1239
|
+
= Helpful tools
|
1240
|
+
|
1241
|
+
Apart from rdoc you may also want to use rtags - which allows jumping around
|
1242
|
+
source code by clicking on class and method names.
|
1243
|
+
|
1244
|
+
cd bioruby/lib
|
1245
|
+
rtags -R --vi
|
1246
|
+
|
1247
|
+
For a tutorial see ((<URL:http://rtags.rubyforge.org/>))
|
1151
1248
|
|
1152
1249
|
= APPENDIX
|
1153
1250
|
|
1154
1251
|
== KEGG API
|
1155
1252
|
|
1156
|
-
Please refer to KEGG_API.rd.ja (
|
1253
|
+
Please refer to KEGG_API.rd.ja (English version: ((<URL:http://www.genome.jp/kegg/soap/doc/keggapi_manual.html>)) ) and
|
1157
1254
|
|
1158
1255
|
* ((<URL:http://www.genome.jp/kegg/soap/>))
|
1159
1256
|
|
1160
1257
|
== Comparing BioProjects
|
1161
1258
|
|
1162
|
-
For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see ((<http://sciruby.codeforpeople.com/sr.cgi/BioProjects>))
|
1259
|
+
For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see ((<URL:http://sciruby.codeforpeople.com/sr.cgi/BioProjects>))
|
1163
1260
|
|
1164
1261
|
== Using BioRuby with R
|
1165
1262
|
|
1166
|
-
Using Ruby with R Pjotr wrote a section on SciRuby. See ((<
|
1263
|
+
Using Ruby with R Pjotr wrote a section on SciRuby. See ((<URL:http://sciruby.codeforpeople.com/sr.cgi/RubyWithRlang>))
|
1167
1264
|
|
1168
1265
|
== Using BioPerl or BioPython from Ruby
|
1169
1266
|
|
@@ -1180,5 +1277,20 @@ painful, as the gem standard for packages evolved late and some still
|
|
1180
1277
|
force you to copy things by hand. Therefore read the README's
|
1181
1278
|
carefully that come with each package.
|
1182
1279
|
|
1183
|
-
|
1280
|
+
== Trouble shooting
|
1184
1281
|
|
1282
|
+
* Error: in `require': no such file to load -- bio (LoadError)
|
1283
|
+
|
1284
|
+
Ruby fails to find the BioRuby libraries - add it to the RUBYLIB path, or pass
|
1285
|
+
it to the interpeter. For example:
|
1286
|
+
|
1287
|
+
ruby -I~/cvs/bioruby/lib yourprogram.rb
|
1288
|
+
|
1289
|
+
== Modifying this page
|
1290
|
+
|
1291
|
+
IMPORTANT NOTICE: This page is maintained in the BioRuby CVS
|
1292
|
+
repository. Please edit the file there otherwise changes may get
|
1293
|
+
lost. See ((<BioRuby Developer Information>)) for CVS and mailing list
|
1294
|
+
access.
|
1295
|
+
|
1296
|
+
=end
|