bio 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
data/bioruby.gemspec.erb
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
Gem::Specification.new do |s|
|
|
2
|
+
s.name = 'bio'
|
|
3
|
+
s.version = "<% ###### Below is executed in ERB environment ######
|
|
4
|
+
# Version can be specified by the environment variable
|
|
5
|
+
env_ver = ENV['BIORUBY_GEM_VERSION']
|
|
6
|
+
env_ver = nil if env_ver.to_s.strip.empty?
|
|
7
|
+
|
|
8
|
+
# By default, determined from lib/bio/version.rb
|
|
9
|
+
load "./lib/bio/version.rb" unless defined?(BIO_VERSION_RB_LOADED)
|
|
10
|
+
case Bio::BIORUBY_EXTRA_VERSION
|
|
11
|
+
when nil
|
|
12
|
+
suffix = nil
|
|
13
|
+
when /\A\.(\d+)\z/
|
|
14
|
+
suffix = $1
|
|
15
|
+
when /\-alpha(\d+)/
|
|
16
|
+
decrement = true
|
|
17
|
+
suffix = 9000 + $1.to_i
|
|
18
|
+
when /\-pre(\d+)/
|
|
19
|
+
decrement = true
|
|
20
|
+
suffix = 9500 + $1.to_i
|
|
21
|
+
when /\-rc(\d+)/
|
|
22
|
+
decrement = true
|
|
23
|
+
suffix = 9900 + $1.to_i
|
|
24
|
+
else
|
|
25
|
+
suffix = "0000"
|
|
26
|
+
end
|
|
27
|
+
ver = Bio::BIORUBY_VERSION.reverse.collect do |i|
|
|
28
|
+
if decrement then
|
|
29
|
+
i -= 1
|
|
30
|
+
i < 0 ? (i += 10) : decrement = false
|
|
31
|
+
end
|
|
32
|
+
i
|
|
33
|
+
end.reverse
|
|
34
|
+
ver.push suffix if suffix
|
|
35
|
+
%><%=
|
|
36
|
+
(env_ver || ver.join('.'))
|
|
37
|
+
###### Above is executed in ERB environment ######
|
|
38
|
+
%>"
|
|
39
|
+
|
|
40
|
+
s.author = "BioRuby project"
|
|
41
|
+
s.email = "staff@bioruby.org"
|
|
42
|
+
s.homepage = "http://bioruby.org/"
|
|
43
|
+
s.rubyforge_project = "bioruby"
|
|
44
|
+
s.summary = "Bioinformatics library"
|
|
45
|
+
s.description = "BioRuby is a library for bioinformatics (biology + information science)."
|
|
46
|
+
|
|
47
|
+
s.platform = Gem::Platform::RUBY
|
|
48
|
+
s.files = [
|
|
49
|
+
<% ###### Below is executed in ERB environment ######
|
|
50
|
+
# Gets file list from the "git ls-files" command.
|
|
51
|
+
files = (`git ls-files` rescue nil).to_s.split(/\r?\n/)
|
|
52
|
+
files.delete_if { |x| x.empty? }
|
|
53
|
+
# When git-ls-files isn't available, creates a list from current files.
|
|
54
|
+
if !($?.success?) or files.size <= 0 then
|
|
55
|
+
files =
|
|
56
|
+
[ "README.rdoc", "README_DEV.rdoc",
|
|
57
|
+
"ChangeLog", "KNOWN_ISSUES.rdoc",
|
|
58
|
+
"Rakefile", "bioruby.gemspec.erb",
|
|
59
|
+
"bioruby.gemspec", "setup.rb",
|
|
60
|
+
"extconf.rb", "rdoc.zsh"
|
|
61
|
+
] + Dir.glob("{bin,doc,etc,lib,sample,test}/**/*").delete_if do |item|
|
|
62
|
+
case item
|
|
63
|
+
when /(\A|\/)CVS(\z|\/)/, /(\A|\/)rdoc(\z|\/)/, /\~\z/
|
|
64
|
+
true
|
|
65
|
+
else
|
|
66
|
+
false
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
%><%=
|
|
71
|
+
files.sort.collect { |x| x.dump }.join(",\n ")
|
|
72
|
+
###### Above is executed in ERB environment ######
|
|
73
|
+
%>
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
s.has_rdoc = true
|
|
77
|
+
s.extra_rdoc_files = [
|
|
78
|
+
<%= ###### Below is executed in ERB environment ######
|
|
79
|
+
# Files whose suffix are .rdoc are selected.
|
|
80
|
+
rdoc_files = files.find_all { |item| /\.rdoc\z/ =~ item }
|
|
81
|
+
# Fail safe settings
|
|
82
|
+
if rdoc_files.empty? then
|
|
83
|
+
rdoc_files = [ 'README.rdoc', 'README_DEV.rdoc',
|
|
84
|
+
'doc/Changes-1.3.rdoc' ]
|
|
85
|
+
end
|
|
86
|
+
rdoc_files.push "ChangeLog" unless rdoc_files.include?("ChangeLog")
|
|
87
|
+
rdoc_files.sort.collect { |x| x.dump }.join(",\n ")
|
|
88
|
+
###### Above is executed in ERB environment ######
|
|
89
|
+
%>
|
|
90
|
+
]
|
|
91
|
+
s.rdoc_options << '--main' << 'README.rdoc'
|
|
92
|
+
s.rdoc_options << '--title' << 'BioRuby API documentation'
|
|
93
|
+
s.rdoc_options << '--exclude' << '\.yaml\z'
|
|
94
|
+
s.rdoc_options << '--line-numbers' << '--inline-source'
|
|
95
|
+
|
|
96
|
+
s.require_path = 'lib'
|
|
97
|
+
s.autorequire = 'bio'
|
|
98
|
+
|
|
99
|
+
s.bindir = "bin"
|
|
100
|
+
s.executables = [
|
|
101
|
+
<%= ###### Below is executed in ERB environment ######
|
|
102
|
+
# Files in bin/ directory are selected.
|
|
103
|
+
exec_files = files.find_all { |item| /\Abin\// =~ item }
|
|
104
|
+
# Non-executable files are removed from the list.
|
|
105
|
+
exec_files.delete_if { |item| !File.executable?(item) }
|
|
106
|
+
# strip "bin/"
|
|
107
|
+
exec_files.collect! { |item| item.sub(/\Abin\//, '') }
|
|
108
|
+
# Fail safe settings
|
|
109
|
+
if exec_files.empty? then
|
|
110
|
+
exec_files = [ "bioruby", "br_biofetch.rb", "br_biogetseq.rb", "br_bioflat.rb", "br_pmfetch.rb" ]
|
|
111
|
+
end
|
|
112
|
+
exec_files.sort.collect { |x| x.dump }.join(",\n ")
|
|
113
|
+
###### Above is executed in ERB environment ######
|
|
114
|
+
%>
|
|
115
|
+
]
|
|
116
|
+
s.default_executable = "bioruby"
|
|
117
|
+
end
|
data/doc/Changes-0.7.rd
CHANGED
|
@@ -338,6 +338,13 @@ In 1.1.0:
|
|
|
338
338
|
instead of a string or nil: score, percent_identity, percent_positive,
|
|
339
339
|
percent_gaps.
|
|
340
340
|
|
|
341
|
+
--- BioRuby Shell
|
|
342
|
+
|
|
343
|
+
In 1.1.0:
|
|
344
|
+
|
|
345
|
+
* Shell commands seq, ent, obj are renamed to getseq, getent, getobj,
|
|
346
|
+
respectively.
|
|
347
|
+
|
|
341
348
|
=== Deleted files
|
|
342
349
|
|
|
343
350
|
: lib/bio/db/genbank.rb
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
= Incompatible and important changes since the BioRuby 1.2.1 release
|
|
2
|
+
|
|
3
|
+
A lot of changes have been made to the BioRuby after the version 1.2.1
|
|
4
|
+
is released.
|
|
5
|
+
|
|
6
|
+
== New features
|
|
7
|
+
|
|
8
|
+
=== Support for sequence output with improvements of Bio::Sequence
|
|
9
|
+
|
|
10
|
+
The outputting of EMBL and GenBank formatted text are now supported in the
|
|
11
|
+
Bio::Sequence class. See the document of Bio::Sequence#output for details.
|
|
12
|
+
You can also create Bio::Sequence objects from many kinds of data such as
|
|
13
|
+
Bio::GenBank, Bio::EMBL, and Bio::FastaFormat by using the to_biosequence
|
|
14
|
+
method.
|
|
15
|
+
|
|
16
|
+
=== BioSQL support
|
|
17
|
+
|
|
18
|
+
BioSQL support is completely rewritten by using ActiveRecord.
|
|
19
|
+
|
|
20
|
+
=== Bio::Blast
|
|
21
|
+
|
|
22
|
+
Bio::Blast#reports can parse NCBI default (-m 0) format and tabular (-m 8)
|
|
23
|
+
format, in addition to XML (-m 7) format.
|
|
24
|
+
|
|
25
|
+
Bio::Blast::Report now supports XML format with multiple query sequences
|
|
26
|
+
generated by blastall 2.2.14 or later.
|
|
27
|
+
|
|
28
|
+
Bio::Blast.remote supports DDBJ, in addition to GenomeNet.
|
|
29
|
+
In addition, a list of available blast databases on remote sites
|
|
30
|
+
can be obtained by using Bio::Blast::Remote::DDBJ.databases and
|
|
31
|
+
Bio::Blast::Remote::GenomeNet.databases methods. Note that the above
|
|
32
|
+
remote blast methods may be changed in the future to support NCBI.
|
|
33
|
+
|
|
34
|
+
Bio::Blast::RPSBlast::Report is newly added, a parser for NCBI RPS Blast
|
|
35
|
+
(Reversed Position Specific Blast) default (-m 0 option) results.
|
|
36
|
+
|
|
37
|
+
=== Bio::GFF::GFF2 and Bio::GFF::GFF3
|
|
38
|
+
|
|
39
|
+
The outputting of GFF2/GFF3-formatted text is now supported. However, many
|
|
40
|
+
incompatible changes have been made (See below for details).
|
|
41
|
+
|
|
42
|
+
=== Bio::Hinv
|
|
43
|
+
|
|
44
|
+
H-Invitational Database web service (REST) client class is newly added.
|
|
45
|
+
|
|
46
|
+
=== Bio::NCBI::REST
|
|
47
|
+
|
|
48
|
+
NCBI E-Utilities client class is newly added.
|
|
49
|
+
|
|
50
|
+
=== Bio::PAML::Codeml and Bio::PAML::Codeml::Report
|
|
51
|
+
|
|
52
|
+
Bio::PAML::Codeml, wrapper for PAML codeml program, and
|
|
53
|
+
Bio::PAML::Codeml::Report, parser for codeml result are newly added,
|
|
54
|
+
though some of them are still under construction and too specific to
|
|
55
|
+
particular use cases.
|
|
56
|
+
|
|
57
|
+
=== Bio::Locations
|
|
58
|
+
|
|
59
|
+
New method Bio::Locations#to_s is added to support output of features.
|
|
60
|
+
|
|
61
|
+
=== Bio::TogoWS::REST
|
|
62
|
+
|
|
63
|
+
TogoWS REST client class is newly added. Information about TogoWS REST service
|
|
64
|
+
can be found on http://togows.dbcls.jp/site/en/rest.html.
|
|
65
|
+
|
|
66
|
+
== Deprecated classes
|
|
67
|
+
|
|
68
|
+
=== Bio::Features
|
|
69
|
+
|
|
70
|
+
Bio::Features is obsoleted and changed to an array of Bio::Feature object
|
|
71
|
+
with some backward compatibility methods. The backward compatibility methods
|
|
72
|
+
will soon be removed in the future.
|
|
73
|
+
|
|
74
|
+
=== Bio::References
|
|
75
|
+
|
|
76
|
+
Bio::References is obsoleted and changed to an array of Bio::Reference object
|
|
77
|
+
with some backward compatibility methods. The backward compatibility methods
|
|
78
|
+
will soon be removed in the future.
|
|
79
|
+
|
|
80
|
+
== Incompatible changes
|
|
81
|
+
|
|
82
|
+
=== Bio::BIORUBY_VERSION
|
|
83
|
+
|
|
84
|
+
Definition of the constant Bio::BIORUBY_VERSION is moved from lib/bio.rb to
|
|
85
|
+
lib/bio/version.rb. Normally, the autoload mechanism of Ruby correctly loads
|
|
86
|
+
the version.rb, but special scripts directly using bio.rb may be needed to
|
|
87
|
+
be changed.
|
|
88
|
+
|
|
89
|
+
Bio::BIORUBY_VERSION is changed to be frozen.
|
|
90
|
+
|
|
91
|
+
New constants Bio::BIORUBY_EXTRA_VERSION and Bio::BIORUBY_VERSION_ID are
|
|
92
|
+
added. See their RDoc for details.
|
|
93
|
+
|
|
94
|
+
=== Bio::Sequence
|
|
95
|
+
|
|
96
|
+
Bio::Sequence#date is removed. Alternatively, date_created or date_modified
|
|
97
|
+
can be used.
|
|
98
|
+
|
|
99
|
+
Bio::Sequence#taxonomy is changed to be an alias of classification, and
|
|
100
|
+
the data type is changed to an array of string.
|
|
101
|
+
|
|
102
|
+
=== Bio::Locations and Bio::Location
|
|
103
|
+
|
|
104
|
+
A carat in a location (e.g. "123^124") is now parsed, instead of being
|
|
105
|
+
replaced by "..". To distinguish from normal "..", a new attribute
|
|
106
|
+
Bio::Location#carat is used.
|
|
107
|
+
|
|
108
|
+
"order(...)" or "group(...)" are also parsed, instead of being regarded
|
|
109
|
+
as "join(...)". To distinguish from "join(...)", a new attribute
|
|
110
|
+
Bio::Locations#operator is used. When "order(...)" or "group(...)",
|
|
111
|
+
the attribute is set to :order or :group, respectively. Note that
|
|
112
|
+
"group(...)" is already deprecated in EMBL/GenBank/DDBJ.
|
|
113
|
+
|
|
114
|
+
=== Bio::Blast
|
|
115
|
+
|
|
116
|
+
Return value of Bio::Blast#exec_* is changed to String instead of Report
|
|
117
|
+
object. Parsing the string is now processed in Bio::Blast#query method.
|
|
118
|
+
|
|
119
|
+
Bio::Blast#exec_genomenet_tab and Bio::Blast#server="genomenet_tab" is
|
|
120
|
+
deprecated.
|
|
121
|
+
|
|
122
|
+
Bio::Blast#options=() can now change the following attributes: program, db,
|
|
123
|
+
format, matrix, and filter.
|
|
124
|
+
|
|
125
|
+
Bio::Blast.reports now supports default (-m 0) and tabular (-m 8) formats.
|
|
126
|
+
Old implementation (only supports XML) is renamed to Bio::Blast.reports_xml,
|
|
127
|
+
to keep compatibility for older BLAST XML documents which might not be parsed
|
|
128
|
+
by the new Bio::Blast.reports nor Bio::FlatFile, although we are not sure
|
|
129
|
+
whether such documents really exist or not.
|
|
130
|
+
|
|
131
|
+
=== Bio::Blast::Default::Report and Bio::Blast::WU::Report
|
|
132
|
+
|
|
133
|
+
Iteration#lambda, #kappa, #entropy, #gapped_lambda, #gapped_kappa,
|
|
134
|
+
and #gapped_entropy, and the same methods in the Report class are
|
|
135
|
+
changed to return float or nil instead of string or nil.
|
|
136
|
+
|
|
137
|
+
=== Bio::Blat
|
|
138
|
+
|
|
139
|
+
When reading BLAT psl (or pslx) data by using Bio::FlatFile, it checks
|
|
140
|
+
each query name and returns a new entry object when the query name is
|
|
141
|
+
changed from previous queries. This is, data is stored to two or more
|
|
142
|
+
Bio::Blat::Report objects, instead of previous version's behavior
|
|
143
|
+
(always reads all data at once and stores to a Bio::Blat::Report object).
|
|
144
|
+
|
|
145
|
+
=== Bio::GFF, Bio::GFF::GFF2 and Bio::GFF::GFF3
|
|
146
|
+
|
|
147
|
+
Bio::GFF::Record#comments is renamed to #comment, and #comments= is
|
|
148
|
+
renamed to #comment=, because they only allow a single String (or nil)
|
|
149
|
+
and the plural form "comments" may be confusable. The "comments" and
|
|
150
|
+
"comments=" methods can still be used, but warning messages will be
|
|
151
|
+
shown when using in GFF2::Record and GFF3::Record objects.
|
|
152
|
+
|
|
153
|
+
See below about GFF2 and/or GFF3 specific changes.
|
|
154
|
+
|
|
155
|
+
=== Bio::GFF::GFF2 and Bio::GFF::GFF3
|
|
156
|
+
|
|
157
|
+
Bio::GFF::GFF2::Record.new and Bio::GFF::GFF3::Record.new can also
|
|
158
|
+
get 9 arguments corresponding to GFF columns, which helps to create
|
|
159
|
+
Record object directly without formatted text.
|
|
160
|
+
|
|
161
|
+
Bio::GFF::GFF2::Record#start, #end, and #frame return Integer or nil,
|
|
162
|
+
and #score returns Float or nil, instead of String or nil.
|
|
163
|
+
The same changes are also made to Bio::GFF::GFF3::Record.
|
|
164
|
+
|
|
165
|
+
Bio::GFF::GFF2::Record#attributes and Bio::GFF::GFF3::Record#attributes
|
|
166
|
+
are changed to return a nested Array, containing [ tag, value ] pairs,
|
|
167
|
+
because of supporting multiple tags in the same tag names. If you want
|
|
168
|
+
to get a Hash, use Record#attributes_to_hash method, though some
|
|
169
|
+
tag-value pairs in the same tag names may be lost. Note that
|
|
170
|
+
Bio::GFF::Record#attribute still returns a Hash for compatibility.
|
|
171
|
+
|
|
172
|
+
New methods for getting, setting and manipulating attributes are added
|
|
173
|
+
to Bio::GFF::GFF2::Record and Bio::GFF::GFF3::Record classes:
|
|
174
|
+
attribute, get_attribute, get_attributes, set_attribute, replace_attributes,
|
|
175
|
+
add_attribute, delete_attribute, delete_attributes, sort_attributes_by_tag!.
|
|
176
|
+
It is recommended to use these methods instead of directly manipulating
|
|
177
|
+
the array returned by Record#attributes.
|
|
178
|
+
|
|
179
|
+
Bio::GFF::GFF2#to_s, Bio::GFF::GFF3#to_s, Bio::GFF::GFF2::Record#to_s,
|
|
180
|
+
and Bio::GFF::GFF3::Record#to_s are added to support output of
|
|
181
|
+
GFF2/GFF3 data.
|
|
182
|
+
|
|
183
|
+
=== Bio::GFF::GFF2
|
|
184
|
+
|
|
185
|
+
GFF2 attribute values are now automatically unescaped. In addition,
|
|
186
|
+
if a value of an attribute is consisted of two or more tokens delimited
|
|
187
|
+
by spaces, an object of the new class Bio::GFF::GFF2::Record::Value is
|
|
188
|
+
returned instead of String. The new class Bio::GFF::GFF2::Record::Value
|
|
189
|
+
aims to store a parsed value of an attribute. If you really want to get
|
|
190
|
+
unparsed string, Bio::GFF::GFF2::Record::Value#to_s can be used.
|
|
191
|
+
|
|
192
|
+
The metadata (lines beginning with "##") are parsed to
|
|
193
|
+
Bio::GFF::GFF2::MetaData objects and are stored to Bio::GFF::GFF2#metadata
|
|
194
|
+
as an array, except the "##gff-version" line. The "##gff-version" version
|
|
195
|
+
string is stored to the Bio::GFF::GFF2#gff_version as a string.
|
|
196
|
+
|
|
197
|
+
=== Bio::GFF::GFF3
|
|
198
|
+
|
|
199
|
+
Aliases of columns which are renamed in the GFF3 specification are added
|
|
200
|
+
to the Bio::GFF::GFF3::Record class: seqid (column 1; alias of "seqname"),
|
|
201
|
+
feature_type (column 3; alias of "feature"; in the GFF3 spec, it is
|
|
202
|
+
called "type", but because "type" is already used by Ruby, we use
|
|
203
|
+
"feature_type"), phase (column 8; formerly "frame"). Original names can
|
|
204
|
+
still be used because they are only aliases.
|
|
205
|
+
|
|
206
|
+
Sequences bundled within GFF3 after "##FASTA" are now supported
|
|
207
|
+
(Bio::GFF::GFF3#sequences).
|
|
208
|
+
|
|
209
|
+
GFF3 attribute keys and values are automatically unescaped. Each attribute
|
|
210
|
+
value is stored as a string, except for special attributes listed below:
|
|
211
|
+
* Bio::GFF::GFF3::Record::Target to store a "Target" attribute.
|
|
212
|
+
* Bio::GFF::GFF3::Record::Gap to store a "Gap" attribute.
|
|
213
|
+
|
|
214
|
+
The metadata (lines beginning with "##") are parsed to
|
|
215
|
+
Bio::GFF::GFF3::MetaData objects and stored to Bio::GFF::GFF3#metadata
|
|
216
|
+
as an array, except "##gff-version", "##sequence-region", "###",
|
|
217
|
+
and "##FASTA" lines.
|
|
218
|
+
* "##gff-version" version string is stored to Bio::GFF::GFF3#gff_version.
|
|
219
|
+
* "##sequence-region" lines are parsed to Bio::GFF::GFF3::SequenceRegion
|
|
220
|
+
objects and stored to Bio::GFF::GFF3#sequence_regions as an array.
|
|
221
|
+
* "###" lines are parsed to Bio::GFF::GFF3::RecordBoundary objects.
|
|
222
|
+
* "##FASTA" is regarded as the beginning of bundled sequences.
|
|
223
|
+
|
|
224
|
+
=== Bio::Pathway
|
|
225
|
+
|
|
226
|
+
Bio::Pathway#cliquishness is changed to calculate cliquishness (clustering
|
|
227
|
+
coefficient) for not only undirected graphs but also directed graphs.
|
|
228
|
+
|
|
229
|
+
In Bio::Pathway#to_matrix, dump_matrix, dump_list, and depth_first_search
|
|
230
|
+
methods, to avoid dependency to the order of objects in Hash#each (and
|
|
231
|
+
each_keys etc.), Bio::Pathway#index is used to specify preferences of
|
|
232
|
+
nodes in a graph.
|
|
233
|
+
|
|
234
|
+
=== Bio::SQL and BioSQL related classes
|
|
235
|
+
|
|
236
|
+
BioSQL support is completely rewritten by using ActiveRecord. See documents
|
|
237
|
+
in lib/bio/io/sql.rb, lib/bio/io/biosql, and lib/bio/db/biosql for details
|
|
238
|
+
of changes and usage of the classes/modules.
|
|
239
|
+
|
data/doc/Tutorial.rd
CHANGED
|
@@ -1,49 +1,79 @@
|
|
|
1
|
+
# This document is generated with a version of rd2html (part of Hiki)
|
|
2
|
+
#
|
|
3
|
+
# A possible test run could be from rdtool (on Debian package rdtool)
|
|
4
|
+
#
|
|
5
|
+
# ruby -I lib ./bin/rd2 ~/cvs/opensource/bioruby/doc/Tutorial.rd
|
|
6
|
+
#
|
|
7
|
+
# or with style sheet:
|
|
8
|
+
#
|
|
9
|
+
# ruby -I lib ./bin/rd2 -r rd/rd2html-lib.rb --with-c
|
|
10
|
+
ss=bioruby.css ~/cvs/opensource/bioruby/doc/Tutorial.rd > ~/bioruby.html
|
|
11
|
+
#
|
|
12
|
+
# in Debian:
|
|
13
|
+
#
|
|
14
|
+
# rd2 -r rd/rd2html-lib --with-css="/home/wrk/izip/cvs/opensource/bioruby/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css" Tutorial.rd > index.html
|
|
15
|
+
#
|
|
16
|
+
# A common problem is tabs in the text file! TABs are not allowed.
|
|
17
|
+
#
|
|
18
|
+
# To add tests run Toshiaki's bioruby shell and paste in the query plus
|
|
19
|
+
# results.
|
|
20
|
+
#
|
|
21
|
+
# To run the embedded Ruby doctests you can get the doctest.rb from Pjotr.
|
|
22
|
+
|
|
1
23
|
=begin
|
|
24
|
+
#doctest Testing bioruby
|
|
2
25
|
|
|
3
|
-
|
|
26
|
+
= BioRuby Tutorial
|
|
4
27
|
|
|
5
|
-
|
|
28
|
+
Editor: PjotrPrins <p .at. bioruby.org>
|
|
6
29
|
|
|
7
|
-
|
|
30
|
+
* Copyright (C) 2001-2003 KATAYAMA Toshiaki <k .at. bioruby.org>
|
|
31
|
+
* Copyright (C) 2005-2008 Pjotr Prins, Naohisa Goto and others
|
|
8
32
|
|
|
9
|
-
|
|
33
|
+
The latest version resides in the CVS repository ./doc/((<Tutorial.rd|URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/*checkout*/bioruby/doc/Tutorial.rd?rev=HEAD&cvsroot=bioruby&content-type=text/plain>)). This one was updated:
|
|
10
34
|
|
|
11
|
-
|
|
35
|
+
$Id: Tutorial.rd,v 1.22 2008/05/19 12:22:05 pjotr Exp $
|
|
12
36
|
|
|
13
|
-
|
|
14
|
-
repository. Please edit the file there otherwise changes may get
|
|
15
|
-
lost. See ((<BioRuby Developer Information>)) for CVS and mailing list
|
|
16
|
-
access.
|
|
17
|
-
|
|
18
|
-
= BioRuby Tutorial
|
|
37
|
+
in preparation for the ((<BioHackathlon 2008|URL:http://hackathon.dbcls.jp/>))
|
|
19
38
|
|
|
20
39
|
== Introduction
|
|
21
40
|
|
|
22
|
-
This is a tutorial for using Bioruby.
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
(
|
|
41
|
+
This is a tutorial for using Bioruby. A basic knowledge of Ruby is required.
|
|
42
|
+
If you want to know more about the programming langauge Ruby we recommend the
|
|
43
|
+
excellent book ((<Programming Ruby|URL:http://www.pragprog.com/titles/ruby>))
|
|
44
|
+
by Dave Thomas and Andy Hunt - some of it is online
|
|
45
|
+
((<here|URL:http://www.rubycentral.com/pickaxe/>)).
|
|
27
46
|
|
|
28
|
-
For
|
|
29
|
-
reading' at the end.
|
|
47
|
+
For BioRuby you need to install Ruby and the BioRuby package on your computer
|
|
30
48
|
|
|
31
49
|
You can check whether Ruby is installed on your computer and what
|
|
32
50
|
version it has with the
|
|
33
51
|
|
|
34
|
-
|
|
52
|
+
% ruby -v
|
|
35
53
|
|
|
36
54
|
command. Showing something like:
|
|
37
55
|
|
|
38
56
|
ruby 1.8.5 (2006-08-25) [powerpc-linux]
|
|
39
57
|
|
|
58
|
+
If you see no such thing you'll have to install Ruby using your installation
|
|
59
|
+
manager. For more information see the
|
|
60
|
+
((<Ruby|URL:http://www.ruby-lang.org/en/>)) website.
|
|
61
|
+
|
|
62
|
+
Once Ruby is works download and install Bioruby using the links on the
|
|
63
|
+
((<Bioruby|URL:http://bioruby.org/>)) website.
|
|
64
|
+
|
|
65
|
+
A lot of BioRuby's documentation exists in the source code and unit tests. To
|
|
66
|
+
really dive in you will need the latest source code tree. The embedded rdoc
|
|
67
|
+
documentation can be viewed online at
|
|
68
|
+
((<bioruby's rdoc|URL:http://bioruby.org/rdoc/>)). But first lets start!
|
|
40
69
|
|
|
41
70
|
== Trying Bioruby
|
|
42
71
|
|
|
43
72
|
Bioruby comes with its own shell. After unpacking the sources run the
|
|
44
73
|
following command
|
|
45
74
|
|
|
46
|
-
|
|
75
|
+
./bin/bioruby or
|
|
76
|
+
ruby -I lib bin/bioruby
|
|
47
77
|
|
|
48
78
|
and you should see a prompt
|
|
49
79
|
|
|
@@ -52,10 +82,14 @@ and you should see a prompt
|
|
|
52
82
|
Now test the following:
|
|
53
83
|
|
|
54
84
|
bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa")
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
bioruby>
|
|
58
|
-
ttttgcatgcat
|
|
85
|
+
==> "atgcatgcaaaa"
|
|
86
|
+
|
|
87
|
+
bioruby> seq.complement
|
|
88
|
+
==> "ttttgcatgcat"
|
|
89
|
+
|
|
90
|
+
See the the Bioruby shell section below for more tweaking. If you have trouble running
|
|
91
|
+
examples also check the section below on trouble shooting. You can also post a
|
|
92
|
+
question to the mailing list. BioRuby developers usually try to help.
|
|
59
93
|
|
|
60
94
|
== Working with nucleic / amino acid sequences (Bio::Sequence class)
|
|
61
95
|
|
|
@@ -68,33 +102,48 @@ calculated, and so on. When translating into amino acid sequences the
|
|
|
68
102
|
frame can be specified and optionally the condon table selected (as
|
|
69
103
|
defined in codontable.rb).
|
|
70
104
|
|
|
105
|
+
bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa")
|
|
106
|
+
==> "atgcatgcaaaa"
|
|
107
|
+
|
|
108
|
+
# complemental sequence (Bio::Sequence::NA object)
|
|
109
|
+
bioruby> seq.complement
|
|
110
|
+
==> "ttttgcatgcat"
|
|
111
|
+
|
|
112
|
+
bioruby> seq.subseq(3,8) # gets subsequence of positions 3 to 8
|
|
113
|
+
==> "gcatgc"
|
|
114
|
+
bioruby> seq.gc_percent
|
|
115
|
+
==> 33
|
|
116
|
+
bioruby> seq.composition
|
|
117
|
+
==> {"a"=>6, "c"=>2, "g"=>2, "t"=>2}
|
|
118
|
+
bioruby> seq.translate
|
|
119
|
+
==> "MHAK"
|
|
120
|
+
bioruby> seq.translate(2) # translate from frame 2
|
|
121
|
+
==> "CMQ"
|
|
122
|
+
bioruby> seq.translate(1,11) # codon table 11
|
|
123
|
+
==> "MHAK"
|
|
124
|
+
bioruby> seq.translate.codes
|
|
125
|
+
==> ["Met", "His", "Ala", "Lys"]
|
|
126
|
+
bioruby> seq.translate.names
|
|
127
|
+
==> ["methionine", "histidine", "alanine", "lysine"]
|
|
128
|
+
bioruby> seq.translate.composition
|
|
129
|
+
==> {"K"=>1, "A"=>1, "M"=>1, "H"=>1}
|
|
130
|
+
bioruby> seq.translate.molecular_weight
|
|
131
|
+
==> 485.605
|
|
132
|
+
bioruby> seq.complement.translate
|
|
133
|
+
==> "FCMH"
|
|
134
|
+
|
|
135
|
+
get a random sequence with the same NA count:
|
|
136
|
+
|
|
137
|
+
bioruby> counts = {'a'=>seq.count('a'),'c'=>seq.count('c'),'g'=>seq.count('g'),'t'=>seq.count('t')}
|
|
138
|
+
==> {"a"=>6, "c"=>2, "g"=>2, "t"=>2}
|
|
139
|
+
bioruby!> randomseq = Bio::Sequence::NA.randomize(counts)
|
|
140
|
+
==!> "aaacatgaagtc"
|
|
141
|
+
|
|
142
|
+
bioruby!> print counts
|
|
143
|
+
a6c2g2t2
|
|
144
|
+
bioruby!> p counts
|
|
145
|
+
{"a"=>6, "c"=>2, "g"=>2, "t"=>2}
|
|
71
146
|
|
|
72
|
-
#!/usr/bin/env ruby
|
|
73
|
-
|
|
74
|
-
require 'bio'
|
|
75
|
-
|
|
76
|
-
seq = Bio::Sequence::NA.new("atgcatgcaaaa")
|
|
77
|
-
|
|
78
|
-
puts seq # original sequence
|
|
79
|
-
puts seq.complement # complemental sequence (Bio::Sequence::NA object)
|
|
80
|
-
puts seq.subseq(3,8) # gets subsequence of positions 3 to 8
|
|
81
|
-
|
|
82
|
-
p seq.gc_percent # GC percent (BioRuby 0.6.X: Float, BioRuby 0.7 or later: Integer)
|
|
83
|
-
p seq.composition # nucleic acid compositions (Hash)
|
|
84
|
-
|
|
85
|
-
puts seq.translate # translation (Bio::Sequence::AA object)
|
|
86
|
-
puts seq.translate(2) # translation from frame 2 (default is frame 1)
|
|
87
|
-
puts seq.translate(1,11) # using codon table No.11 (see http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi)
|
|
88
|
-
|
|
89
|
-
p seq.translate.codes # shows three-letter codes (Array)
|
|
90
|
-
p seq.translate.names # shows amino acid names (Array)
|
|
91
|
-
p seq.translate.composition # amino acid compositions (Hash)
|
|
92
|
-
p seq.translate.molecular_weight # calculating molecular weight (Float)
|
|
93
|
-
|
|
94
|
-
puts seq.complement.translate # translation of complemental strand
|
|
95
|
-
|
|
96
|
-
counts = {'a'=>seq.count('a'),'c'=>seq.count('c'),'g'=>seq.count('g'),'t'=>seq.count('t')}
|
|
97
|
-
p randomseq = Bio::Sequence::NA.randomize(counts) # reshuffle sequence with same freq.
|
|
98
147
|
|
|
99
148
|
The p, print and puts methods are standard Ruby ways of outputting to
|
|
100
149
|
the screen. If you want to know more about standard Ruby commands you
|
|
@@ -105,9 +154,9 @@ Windows). For example
|
|
|
105
154
|
% ri p
|
|
106
155
|
% ri File.open
|
|
107
156
|
|
|
108
|
-
Nucleic acid sequence is an object of
|
|
109
|
-
amino acid sequence is an object of
|
|
110
|
-
methods are in the parent
|
|
157
|
+
Nucleic acid sequence is an object of Bio::Sequence::NA class, and
|
|
158
|
+
amino acid sequence is an object of Bio::Sequence::AA class. Shared
|
|
159
|
+
methods are in the parent Bio::Sequence class.
|
|
111
160
|
|
|
112
161
|
As Bio::Sequence class inherits Ruby's String class, you can use
|
|
113
162
|
String class methods. For example, to get a subsequence, you can
|
|
@@ -116,15 +165,12 @@ not only use subseq(from, to) but also String#[].
|
|
|
116
165
|
Please take note that the Ruby's string's are base 0 - i.e. the first letter
|
|
117
166
|
has index 0, for example:
|
|
118
167
|
|
|
119
|
-
s = 'abc'
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
>ab
|
|
127
|
-
|
|
168
|
+
bioruby> s = 'abc'
|
|
169
|
+
==> "abc"
|
|
170
|
+
bioruby> s[0].chr
|
|
171
|
+
==> "a"
|
|
172
|
+
bioruby> s[0..1]
|
|
173
|
+
==> "ab"
|
|
128
174
|
|
|
129
175
|
So when using String methods, you should subtract 1 from positions
|
|
130
176
|
conventionally used in biology. (subseq method will throw an exception if you
|
|
@@ -136,55 +182,71 @@ way of writing concise and clear code using 'closures'. Each sliding
|
|
|
136
182
|
window creates a subsequence which is supplied to the enclosed block
|
|
137
183
|
through a variable named +s+.
|
|
138
184
|
|
|
139
|
-
|
|
140
|
-
the default one base at a time)
|
|
185
|
+
Show average percentage of GC content for 20 bases (stepping the default one base at a time)
|
|
141
186
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
end
|
|
187
|
+
bioruby> seq = Bio::Sequence::NA.new("atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa")
|
|
188
|
+
==> "atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa"
|
|
145
189
|
|
|
190
|
+
bioruby> a=[]; seq.window_search(20) { |s| a.push s.gc_percent }
|
|
191
|
+
bioruby> a
|
|
192
|
+
==> [30, 35, 40, 40, 35, 35, 35, 30, 25, 30, 30, 30, 35, 35, 35, 35, 35, 40, 45, 45, 45, 45, 40, 35, 40, 40, 40, 40, 40, 35, 35, 35, 30, 30, 30]
|
|
193
|
+
|
|
194
|
+
|
|
146
195
|
Since the class of each subsequence is the same as original sequence
|
|
147
196
|
(Bio::Sequence::NA or Bio::Sequence::AA or Bio::Sequence), you can
|
|
148
197
|
use all methods on the subsequence. For example,
|
|
149
198
|
|
|
150
|
-
|
|
199
|
+
Shows translation results for 15 bases shifting a codon at a time
|
|
200
|
+
|
|
201
|
+
bioruby> a = []
|
|
202
|
+
bioruby> seq.window_search(15, 3) do |s|
|
|
203
|
+
bioruby> a.push s.translate
|
|
204
|
+
bioruby> end
|
|
205
|
+
bioruby> a
|
|
206
|
+
==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]
|
|
151
207
|
|
|
152
|
-
seq.window_search(15, 3) do |s|
|
|
153
|
-
puts s.translate
|
|
154
|
-
end
|
|
155
208
|
|
|
156
209
|
Finally, the window_search method returns the last leftover
|
|
157
210
|
subsequence. This allows for example
|
|
158
211
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
212
|
+
Divide a genome sequence into sections of 10000bp and
|
|
213
|
+
output FASTA formatted sequences (line width 60 chars). The 1000bp at the
|
|
214
|
+
start and end of each subsequence overlapped. At the 3' end of the sequence
|
|
215
|
+
the leftover is also added:
|
|
163
216
|
|
|
164
217
|
i = 1
|
|
218
|
+
textwidth=60
|
|
165
219
|
remainder = seq.window_search(10000, 9000) do |s|
|
|
166
|
-
puts s.to_fasta("segment #{i}",
|
|
220
|
+
puts s.to_fasta("segment #{i}", textwidth)
|
|
167
221
|
i += 1
|
|
168
222
|
end
|
|
169
|
-
|
|
223
|
+
if remainder
|
|
224
|
+
puts remainder.to_fasta("segment #{i}", textwidth)
|
|
225
|
+
end
|
|
170
226
|
|
|
171
227
|
If you don't want the overlapping window, set window size and stepping
|
|
172
228
|
size to equal values.
|
|
173
229
|
|
|
174
230
|
Other examples
|
|
175
231
|
|
|
176
|
-
|
|
232
|
+
Count the codon usage
|
|
177
233
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
234
|
+
bioruby> codon_usage = Hash.new(0)
|
|
235
|
+
bioruby> seq.window_search(3, 3) do |s|
|
|
236
|
+
bioruby> codon_usage[s] += 1
|
|
237
|
+
bioruby> end
|
|
238
|
+
bioruby> codon_usage
|
|
239
|
+
==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}
|
|
182
240
|
|
|
183
|
-
* Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)
|
|
184
241
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
242
|
+
Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)
|
|
243
|
+
|
|
244
|
+
bioruby> a = []
|
|
245
|
+
bioruby> seq.window_search(10, 10) do |s|
|
|
246
|
+
bioruby> a.push s.molecular_weight
|
|
247
|
+
bioruby> end
|
|
248
|
+
bioruby> a
|
|
249
|
+
==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]
|
|
188
250
|
|
|
189
251
|
In most cases, sequences are read from files or retrieved from databases.
|
|
190
252
|
For example:
|
|
@@ -210,6 +272,10 @@ For example, translates my_naseq.txt:
|
|
|
210
272
|
|
|
211
273
|
% ruby na2aa.rb my_naseq.txt
|
|
212
274
|
|
|
275
|
+
or use a pipe!
|
|
276
|
+
|
|
277
|
+
% cat my_naseq.txt|ruby na2aa.rb
|
|
278
|
+
|
|
213
279
|
Outputs
|
|
214
280
|
|
|
215
281
|
VAIFPKAMTGAKNQSSDICLMPHVGLIRRGQRRIRHLVQMSDAA*
|
|
@@ -218,8 +284,9 @@ You can also write this, a bit fanciful, as a one-liner script.
|
|
|
218
284
|
|
|
219
285
|
% ruby -r bio -e 'p Bio::Sequence::NA.new($<.read).translate' my_naseq.txt
|
|
220
286
|
|
|
221
|
-
In the next section we will retrieve data from databases instead of
|
|
222
|
-
|
|
287
|
+
In the next section we will retrieve data from databases instead of using raw
|
|
288
|
+
sequence files. One generic example of the above can be found in
|
|
289
|
+
./sample/na2aa.rb.
|
|
223
290
|
|
|
224
291
|
== Parsing GenBank data (Bio::GenBank class)
|
|
225
292
|
|
|
@@ -243,7 +310,8 @@ the data:
|
|
|
243
310
|
|
|
244
311
|
print ">#{gb.accession} " # Accession
|
|
245
312
|
puts gb.definition # Definition
|
|
246
|
-
puts gb.naseq # Nucleic acid sequence
|
|
313
|
+
puts gb.naseq # Nucleic acid sequence
|
|
314
|
+
# (Bio::Sequence::NA object)
|
|
247
315
|
end
|
|
248
316
|
|
|
249
317
|
But that has the disadvantage the code is tied to GenBank input. A more
|
|
@@ -251,9 +319,9 @@ generic method is to use Bio::FlatFile which allows you to use different
|
|
|
251
319
|
input formats:
|
|
252
320
|
|
|
253
321
|
#!/usr/bin/env ruby
|
|
254
|
-
|
|
322
|
+
|
|
255
323
|
require 'bio'
|
|
256
|
-
|
|
324
|
+
|
|
257
325
|
ff = Bio::FlatFile.new(Bio::GenBank, ARGF)
|
|
258
326
|
ff.each_entry do |gb|
|
|
259
327
|
definition = "#{gb.accession} #{gb.definition}"
|
|
@@ -288,9 +356,6 @@ Again another option is to use the Bio::DB.open class:
|
|
|
288
356
|
puts gb.naseq.to_fasta(definition, 60)
|
|
289
357
|
end
|
|
290
358
|
|
|
291
|
-
(TRANSLATOR'S NOTE: Bio::DB.open have not been used so well.)
|
|
292
|
-
(EDITOR's NOTE: Test code)
|
|
293
|
-
|
|
294
359
|
Next, we are going to parse the GenBank 'features', which is normally
|
|
295
360
|
very complicated:
|
|
296
361
|
|
|
@@ -333,12 +398,12 @@ very complicated:
|
|
|
333
398
|
end
|
|
334
399
|
end
|
|
335
400
|
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
401
|
+
Note: In this example Feature#assoc method makes a Hash from a
|
|
402
|
+
feature object. It is useful because you can get data from the hash
|
|
403
|
+
by using qualifiers as keys.
|
|
404
|
+
(But there is a risk some information is lost when two or more
|
|
405
|
+
qualifiers are the same. Therefore an Array is returned by
|
|
406
|
+
Feature#feature)
|
|
342
407
|
|
|
343
408
|
Bio::Sequence#splicing splices subsequence from nucleic acid sequence
|
|
344
409
|
according to location information used in GenBank, EMBL and DDBJ.
|
|
@@ -352,11 +417,11 @@ feature style location text but also Bio::Locations object. For more
|
|
|
352
417
|
information about location format and Bio::Locations class, see
|
|
353
418
|
bio/location.rb.
|
|
354
419
|
|
|
355
|
-
|
|
420
|
+
Splice according to location string used in a GenBank entry
|
|
356
421
|
|
|
357
422
|
naseq.splicing('join(2035..2050,complement(1775..1818),13..345')
|
|
358
423
|
|
|
359
|
-
|
|
424
|
+
Generate Bio::Locations object and pass the splicing method
|
|
360
425
|
|
|
361
426
|
locs = Bio::Locations.new('join((8298.8300)..10206,1..855)')
|
|
362
427
|
naseq.splicing(locs)
|
|
@@ -364,17 +429,16 @@ bio/location.rb.
|
|
|
364
429
|
You can also use the splicing method for amino acid sequences
|
|
365
430
|
(Bio::Sequence::AA objects).
|
|
366
431
|
|
|
367
|
-
|
|
432
|
+
Splicing peptide from a protein (e.g. signal peptide)
|
|
368
433
|
|
|
369
434
|
aaseq.splicing('21..119')
|
|
370
435
|
|
|
371
|
-
(EDITOR's NOTE: why use STRINGs here?)
|
|
372
436
|
|
|
373
437
|
=== More databases
|
|
374
438
|
|
|
375
439
|
Databases in BioRuby are essentially accessed like that of GenBank
|
|
376
|
-
with classes like Bio::GenBank, Bio::KEGG::GENES
|
|
377
|
-
|
|
440
|
+
with classes like Bio::GenBank, Bio::KEGG::GENES. A full list can be found in
|
|
441
|
+
the ./lib/bio/db directory of the BioRuby source tree.
|
|
378
442
|
|
|
379
443
|
In many cases the Bio::DatabaseClass acts as a factory pattern
|
|
380
444
|
and recognises the database type automatically - returning a
|
|
@@ -401,7 +465,14 @@ database class?
|
|
|
401
465
|
end
|
|
402
466
|
|
|
403
467
|
An example that can take any input, filter using a regular expression to output
|
|
404
|
-
to a FASTA file can be found in sample/any2fasta.rb.
|
|
468
|
+
to a FASTA file can be found in sample/any2fasta.rb. With this technique it is
|
|
469
|
+
possible to write a Unix type grep/sort pipe for sequence information. One
|
|
470
|
+
example using scripts in the BIORUBY sample folder:
|
|
471
|
+
|
|
472
|
+
fastagrep.rb '/At|Dm/' database.seq | fastasort.rb
|
|
473
|
+
|
|
474
|
+
greps the database for Arabidopsis and Drosophila entries and sorts the output
|
|
475
|
+
to FASTA.
|
|
405
476
|
|
|
406
477
|
Other methods to extract specific data from database objects can be
|
|
407
478
|
different between databases, though some methods are common (see the
|
|
@@ -427,35 +498,30 @@ multiple Bio::Reference objects as an Array. And some classes have a
|
|
|
427
498
|
Bio::Alignment class in bio/alignment.rb is a container class like Ruby's Hash,
|
|
428
499
|
Array and BioPerl's Bio::SimpleAlign. A very simple example is:
|
|
429
500
|
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
seqs = [ 'atgca', 'aagca', 'acgca', 'acgcg' ]
|
|
433
|
-
seqs = seqs.collect{ |x| Bio::Sequence::NA.new(x) }
|
|
434
|
-
|
|
501
|
+
bioruby> seqs = [ 'atgca', 'aagca', 'acgca', 'acgcg' ]
|
|
502
|
+
bioruby> seqs = seqs.collect{ |x| Bio::Sequence::NA.new(x) }
|
|
435
503
|
# creates alignment object
|
|
436
|
-
a = Bio::Alignment.new(seqs)
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
p a.consensus # ==> "a?gc?"
|
|
440
|
-
|
|
504
|
+
bioruby> a = Bio::Alignment.new(seqs)
|
|
505
|
+
bioruby> a.consensus
|
|
506
|
+
==> "a?gc?"
|
|
441
507
|
# shows IUPAC consensus
|
|
442
|
-
|
|
443
|
-
|
|
508
|
+
a.consensus_iupac
|
|
509
|
+
==> "ahgcr"
|
|
444
510
|
# iterates over each seq
|
|
445
511
|
a.each { |x| p x }
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
512
|
+
# ==>
|
|
513
|
+
# "atgca"
|
|
514
|
+
# "aagca"
|
|
515
|
+
# "acgca"
|
|
516
|
+
# "acgcg"
|
|
451
517
|
# iterates over each site
|
|
452
518
|
a.each_site { |x| p x }
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
519
|
+
# ==>
|
|
520
|
+
# ["a", "a", "a", "a"]
|
|
521
|
+
# ["t", "a", "c", "c"]
|
|
522
|
+
# ["g", "g", "g", "g"]
|
|
523
|
+
# ["c", "c", "c", "c"]
|
|
524
|
+
# ["a", "a", "a", "g"]
|
|
459
525
|
|
|
460
526
|
# doing alignment by using CLUSTAL W.
|
|
461
527
|
# clustalw command must be installed.
|
|
@@ -469,21 +535,22 @@ library of commonly used REs (from REBASE) which can be used to cut single
|
|
|
469
535
|
stranded RNA or dubbel stranded DNA into fragments. To list all enzymes:
|
|
470
536
|
|
|
471
537
|
rebase = Bio::RestrictionEnzyme.rebase
|
|
472
|
-
|
|
473
|
-
|
|
538
|
+
rebase.each do |enzyme_name, info|
|
|
539
|
+
p enzyme_name
|
|
474
540
|
end
|
|
475
541
|
|
|
476
542
|
and cut a sequence with an enzyme follow up with:
|
|
477
543
|
|
|
478
|
-
res = seq.cut_with_enzyme('EcoRII', {:max_permutations => 0},
|
|
544
|
+
res = seq.cut_with_enzyme('EcoRII', {:max_permutations => 0},
|
|
545
|
+
{:view_ranges => true})
|
|
479
546
|
if res.kind_of? Symbol #error
|
|
480
547
|
err = Err.find_by_code(res.to_s)
|
|
481
548
|
unless err
|
|
482
549
|
err = Err.new(:code => res.to_s)
|
|
483
550
|
end
|
|
484
551
|
end
|
|
485
|
-
|
|
486
|
-
|
|
552
|
+
res.each do |frag|
|
|
553
|
+
em = EnzymeMatch.new
|
|
487
554
|
|
|
488
555
|
em.p_left = frag.p_left
|
|
489
556
|
em.p_right = frag.p_right
|
|
@@ -493,7 +560,7 @@ and cut a sequence with an enzyme follow up with:
|
|
|
493
560
|
em.err = nil
|
|
494
561
|
em.enzyme = ar_enz
|
|
495
562
|
em.sequence = ar_seq
|
|
496
|
-
|
|
563
|
+
p em
|
|
497
564
|
end
|
|
498
565
|
|
|
499
566
|
|
|
@@ -510,21 +577,21 @@ local machine.
|
|
|
510
577
|
Install the fasta program on your machine (the command name looks like
|
|
511
578
|
fasta34. FASTA can be downloaded from ftp://ftp.virginia.edu/pub/fasta/).
|
|
512
579
|
First, you must prepare your FASTA-formatted database sequence file
|
|
513
|
-
target.pep and FASTA-formatted query.pep.
|
|
514
|
-
we should provide sample data to readers.)
|
|
580
|
+
target.pep and FASTA-formatted query.pep.
|
|
515
581
|
|
|
516
582
|
#!/usr/bin/env ruby
|
|
517
583
|
|
|
518
584
|
require 'bio'
|
|
519
585
|
|
|
520
|
-
# Creates FASTA factory object ("ssearch" instead of
|
|
586
|
+
# Creates FASTA factory object ("ssearch" instead of
|
|
587
|
+
# "fasta34" can also work)
|
|
521
588
|
factory = Bio::Fasta.local('fasta34', ARGV.pop)
|
|
522
589
|
(EDITOR's NOTE: not consistent pop command)
|
|
523
590
|
|
|
524
|
-
# Reads FASTA-formatted files (TRANSLATOR'S NOTE: something wrong in Japanese text)
|
|
525
591
|
ff = Bio::FlatFile.new(Bio::FastaFormat, ARGF)
|
|
526
592
|
|
|
527
|
-
# Iterates over each entry. the variable "entry" is a
|
|
593
|
+
# Iterates over each entry. the variable "entry" is a
|
|
594
|
+
# Bio::FastaFormat object:
|
|
528
595
|
ff.each do |entry|
|
|
529
596
|
# shows definition line (begins with '>') to the standard error output
|
|
530
597
|
$stderr.puts "Searching ... " + entry.definition
|
|
@@ -536,7 +603,8 @@ we should provide sample data to readers.)
|
|
|
536
603
|
report.each do |hit|
|
|
537
604
|
# If E-value is smaller than 0.0001
|
|
538
605
|
if hit.evalue < 0.0001
|
|
539
|
-
# shows identifier of query and hit, E-value, start and
|
|
606
|
+
# shows identifier of query and hit, E-value, start and
|
|
607
|
+
# end positions of homologous region
|
|
540
608
|
print "#{hit.query_id} : evalue #{hit.evalue}\t#{hit.target_id} at "
|
|
541
609
|
p hit.lap_at
|
|
542
610
|
end
|
|
@@ -550,7 +618,6 @@ We named above script as f_search.rb. You can execute as follows:
|
|
|
550
618
|
In above script, the variable "factory" is a factory object for executing
|
|
551
619
|
FASTA many times easily. Instead of using Fasta#query method,
|
|
552
620
|
Bio::Sequence#fasta method can be used.
|
|
553
|
-
(TRANSLATOR'S NOTE: Bio::Sequence#fasta are not so frequently used.)
|
|
554
621
|
|
|
555
622
|
seq = ">test seq\nYQVLEEIGRGSFGSVRKVIHIPTKKLLVRKDIKYGHMNSKE"
|
|
556
623
|
seq.fasta(factory)
|
|
@@ -566,7 +633,6 @@ Bio::Fasta#query returns Bio::Fasta::Report object.
|
|
|
566
633
|
We can get almost all information described in FASTA report text
|
|
567
634
|
with the Report object. For example, getting information for hits:
|
|
568
635
|
|
|
569
|
-
|
|
570
636
|
report.each do |hit|
|
|
571
637
|
puts hit.evalue # E-value
|
|
572
638
|
puts hit.sw # Smith-Waterman score (*)
|
|
@@ -575,15 +641,19 @@ with the Report object. For example, getting information for hits:
|
|
|
575
641
|
puts hit.query_id # identifier of query sequence
|
|
576
642
|
puts hit.query_def # definition(comment line) of query sequence
|
|
577
643
|
puts hit.query_len # length of query sequence
|
|
578
|
-
puts hit.query_seq #
|
|
644
|
+
puts hit.query_seq # sequence of homologous region
|
|
579
645
|
puts hit.target_id # identifier of hit sequence
|
|
580
646
|
puts hit.target_def # definition(comment line) of hit sequence
|
|
581
647
|
puts hit.target_len # length of hit sequence
|
|
582
|
-
puts hit.target_seq # hit
|
|
583
|
-
puts hit.query_start # start position of homologous
|
|
584
|
-
|
|
585
|
-
puts hit.
|
|
586
|
-
|
|
648
|
+
puts hit.target_seq # hit of homologous region of hit sequence
|
|
649
|
+
puts hit.query_start # start position of homologous
|
|
650
|
+
# region in query sequence
|
|
651
|
+
puts hit.query_end # end position of homologous region
|
|
652
|
+
# in query sequence
|
|
653
|
+
puts hit.target_start # start posiotion of homologous region
|
|
654
|
+
# in hit(target) sequence
|
|
655
|
+
puts hit.target_end # end position of homologous region
|
|
656
|
+
# in hit(target) sequence
|
|
587
657
|
puts hit.lap_at # array of above four numbers
|
|
588
658
|
end
|
|
589
659
|
|
|
@@ -676,25 +746,25 @@ There are some additional BLAST methods, for example, bit_score and
|
|
|
676
746
|
midline.
|
|
677
747
|
|
|
678
748
|
report.each do |hit|
|
|
679
|
-
puts hit.bit_score
|
|
680
|
-
puts hit.query_seq
|
|
681
|
-
puts hit.midline
|
|
682
|
-
puts hit.target_seq
|
|
683
|
-
|
|
684
|
-
puts hit.evalue
|
|
685
|
-
puts hit.identity
|
|
686
|
-
puts hit.overlap
|
|
687
|
-
puts hit.query_id
|
|
688
|
-
puts hit.query_def
|
|
689
|
-
puts hit.query_len
|
|
690
|
-
puts hit.target_id
|
|
691
|
-
puts hit.target_def
|
|
692
|
-
puts hit.target_len
|
|
693
|
-
puts hit.query_start
|
|
694
|
-
puts hit.query_end
|
|
695
|
-
puts hit.target_start
|
|
696
|
-
puts hit.target_end
|
|
697
|
-
puts hit.lap_at
|
|
749
|
+
puts hit.bit_score
|
|
750
|
+
puts hit.query_seq
|
|
751
|
+
puts hit.midline
|
|
752
|
+
puts hit.target_seq
|
|
753
|
+
|
|
754
|
+
puts hit.evalue
|
|
755
|
+
puts hit.identity
|
|
756
|
+
puts hit.overlap
|
|
757
|
+
puts hit.query_id
|
|
758
|
+
puts hit.query_def
|
|
759
|
+
puts hit.query_len
|
|
760
|
+
puts hit.target_id
|
|
761
|
+
puts hit.target_def
|
|
762
|
+
puts hit.target_len
|
|
763
|
+
puts hit.query_start
|
|
764
|
+
puts hit.query_end
|
|
765
|
+
puts hit.target_start
|
|
766
|
+
puts hit.target_end
|
|
767
|
+
puts hit.lap_at
|
|
698
768
|
end
|
|
699
769
|
|
|
700
770
|
For simplicity and API compatibility, some information such as score
|
|
@@ -1131,39 +1201,66 @@ to be written...
|
|
|
1131
1201
|
|
|
1132
1202
|
== The BioRuby example programs
|
|
1133
1203
|
|
|
1134
|
-
Some sample programs are stored in samples/
|
|
1135
|
-
Some programs are obsolete. Since samples are not enough,
|
|
1136
|
-
practical and interesting samples are welcome.
|
|
1204
|
+
Some sample programs are stored in ./samples/ directory. Run for example:
|
|
1137
1205
|
|
|
1138
|
-
|
|
1206
|
+
./sample/na2aa.rb test/data/fasta/example1.txt
|
|
1207
|
+
|
|
1208
|
+
== Unit testing and doctests
|
|
1139
1209
|
|
|
1140
|
-
|
|
1141
|
-
|
|
1210
|
+
BioRuby comes with an extensive testing framework with over 1300 tests and 2700
|
|
1211
|
+
assertions. To run the unit tests:
|
|
1212
|
+
|
|
1213
|
+
cd test
|
|
1214
|
+
ruby runner.rb
|
|
1215
|
+
|
|
1216
|
+
We have also started with doctest for Ruby. We are porting the examples
|
|
1217
|
+
in this tutorial to doctest - more info upcoming.
|
|
1142
1218
|
|
|
1143
1219
|
== Further reading
|
|
1144
1220
|
|
|
1145
|
-
See the BioRuby in anger Wiki
|
|
1146
|
-
|
|
1221
|
+
See the BioRuby in anger Wiki. A lot of BioRuby's documentation exists in the
|
|
1222
|
+
source code and unit tests. To really dive in you will need the latest source
|
|
1223
|
+
code tree. The embedded rdoc documentation can be viewed online at
|
|
1224
|
+
((<URL:http://bioruby.org/rdoc/>)).
|
|
1225
|
+
|
|
1226
|
+
== BioRuby Shell
|
|
1227
|
+
|
|
1228
|
+
The BioRuby shell implementation you find in ./lib/bio/shell. It is very interesting
|
|
1229
|
+
as it uses IRB (the Ruby intepreter) which is a powerful environment described in
|
|
1230
|
+
((<Programming Ruby's irb chapter|URL:http://ruby-doc.org/docs/ProgrammingRuby/html/irb.html>)). IRB commands can directly be typed in the shell, e.g.
|
|
1231
|
+
|
|
1232
|
+
bioruby!> IRB.conf[:PROMPT_MODE]
|
|
1233
|
+
==!> :PROMPT_C
|
|
1147
1234
|
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1235
|
+
optionally you also may want to install the optional Ruby readline support -
|
|
1236
|
+
with Debian libreadline-ruby. To edit a previous line you may have to press
|
|
1237
|
+
line down (arrow down) first.
|
|
1238
|
+
|
|
1239
|
+
= Helpful tools
|
|
1240
|
+
|
|
1241
|
+
Apart from rdoc you may also want to use rtags - which allows jumping around
|
|
1242
|
+
source code by clicking on class and method names.
|
|
1243
|
+
|
|
1244
|
+
cd bioruby/lib
|
|
1245
|
+
rtags -R --vi
|
|
1246
|
+
|
|
1247
|
+
For a tutorial see ((<URL:http://rtags.rubyforge.org/>))
|
|
1151
1248
|
|
|
1152
1249
|
= APPENDIX
|
|
1153
1250
|
|
|
1154
1251
|
== KEGG API
|
|
1155
1252
|
|
|
1156
|
-
Please refer to KEGG_API.rd.ja (
|
|
1253
|
+
Please refer to KEGG_API.rd.ja (English version: ((<URL:http://www.genome.jp/kegg/soap/doc/keggapi_manual.html>)) ) and
|
|
1157
1254
|
|
|
1158
1255
|
* ((<URL:http://www.genome.jp/kegg/soap/>))
|
|
1159
1256
|
|
|
1160
1257
|
== Comparing BioProjects
|
|
1161
1258
|
|
|
1162
|
-
For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see ((<http://sciruby.codeforpeople.com/sr.cgi/BioProjects>))
|
|
1259
|
+
For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see ((<URL:http://sciruby.codeforpeople.com/sr.cgi/BioProjects>))
|
|
1163
1260
|
|
|
1164
1261
|
== Using BioRuby with R
|
|
1165
1262
|
|
|
1166
|
-
Using Ruby with R Pjotr wrote a section on SciRuby. See ((<
|
|
1263
|
+
Using Ruby with R Pjotr wrote a section on SciRuby. See ((<URL:http://sciruby.codeforpeople.com/sr.cgi/RubyWithRlang>))
|
|
1167
1264
|
|
|
1168
1265
|
== Using BioPerl or BioPython from Ruby
|
|
1169
1266
|
|
|
@@ -1180,5 +1277,20 @@ painful, as the gem standard for packages evolved late and some still
|
|
|
1180
1277
|
force you to copy things by hand. Therefore read the README's
|
|
1181
1278
|
carefully that come with each package.
|
|
1182
1279
|
|
|
1183
|
-
|
|
1280
|
+
== Trouble shooting
|
|
1184
1281
|
|
|
1282
|
+
* Error: in `require': no such file to load -- bio (LoadError)
|
|
1283
|
+
|
|
1284
|
+
Ruby fails to find the BioRuby libraries - add it to the RUBYLIB path, or pass
|
|
1285
|
+
it to the interpeter. For example:
|
|
1286
|
+
|
|
1287
|
+
ruby -I~/cvs/bioruby/lib yourprogram.rb
|
|
1288
|
+
|
|
1289
|
+
== Modifying this page
|
|
1290
|
+
|
|
1291
|
+
IMPORTANT NOTICE: This page is maintained in the BioRuby CVS
|
|
1292
|
+
repository. Please edit the file there otherwise changes may get
|
|
1293
|
+
lost. See ((<BioRuby Developer Information>)) for CVS and mailing list
|
|
1294
|
+
access.
|
|
1295
|
+
|
|
1296
|
+
=end
|