bio-biosql 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.project +17 -0
- data/BSDL +22 -0
- data/COPYING +56 -0
- data/COPYING.ja +51 -0
- data/Gemfile +4 -0
- data/LEGAL +9 -0
- data/README.md +69 -0
- data/Rakefile +19 -0
- data/bio-biosql.gemspec +36 -0
- data/lib/bio-biosql/version.rb +5 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +78 -0
- data/lib/bio/db/biosql/sequence.rb +444 -0
- data/lib/bio/io/biosql/ar-biosql.rb +257 -0
- data/lib/bio/io/biosql/biosql.rb +39 -0
- data/lib/bio/io/biosql/config/database.yml +21 -0
- data/lib/bio/io/sql.rb +79 -0
- data/test/bioruby_test_helper.rb +86 -0
- data/test/data/00dummy.txt +1 -0
- data/test/unit/bio/db/biosql/tc_biosql.rb +114 -0
- data/test/unit/bio/db/biosql/ts_suite_biosql.rb +8 -0
- metadata +176 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6b56ab2936a2ea661de08ec2fa82104a41c25208
|
4
|
+
data.tar.gz: d9f69c1c27b3c4d1d7a82dd346cf534226c1fda7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 133e9c0940a2443a219eff533dd0ffbf38d929a6f4049ef79e3799ac0789cd5d19121d9c011ceaa236f0d2a04fc833dc2c49506e938402b16dcca0ff1397894e
|
7
|
+
data.tar.gz: 5f5f36f25d48c8997c2360ceef23bcdb62bdf0b77e6f4b85cee0497172d19fe0d41ae0bcb360f6ab1eaca32ac7cc9c57b817d3f53a5dd7ff84c476fe6dc11b0d
|
data/.gitignore
ADDED
data/.project
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<projectDescription>
|
3
|
+
<name>bioruby</name>
|
4
|
+
<comment></comment>
|
5
|
+
<projects>
|
6
|
+
</projects>
|
7
|
+
<buildSpec>
|
8
|
+
<buildCommand>
|
9
|
+
<name>org.rubypeople.rdt.core.rubybuilder</name>
|
10
|
+
<arguments>
|
11
|
+
</arguments>
|
12
|
+
</buildCommand>
|
13
|
+
</buildSpec>
|
14
|
+
<natures>
|
15
|
+
<nature>org.rubypeople.rdt.core.rubynature</nature>
|
16
|
+
</natures>
|
17
|
+
</projectDescription>
|
data/BSDL
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (C) 1993-2013 Yukihiro Matsumoto. All rights reserved.
|
2
|
+
|
3
|
+
Redistribution and use in source and binary forms, with or without
|
4
|
+
modification, are permitted provided that the following conditions
|
5
|
+
are met:
|
6
|
+
1. Redistributions of source code must retain the above copyright
|
7
|
+
notice, this list of conditions and the following disclaimer.
|
8
|
+
2. Redistributions in binary form must reproduce the above copyright
|
9
|
+
notice, this list of conditions and the following disclaimer in the
|
10
|
+
documentation and/or other materials provided with the distribution.
|
11
|
+
|
12
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
13
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
16
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
17
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
18
|
+
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
19
|
+
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
20
|
+
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
21
|
+
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
22
|
+
SUCH DAMAGE.
|
data/COPYING
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
bio-biosql is copyrighted free software by BioRuby project <staff@bioruby.org>.
|
2
|
+
You can redistribute it and/or modify it under either the terms of the
|
3
|
+
2-clause BSDL (see the file BSDL), or the conditions below:
|
4
|
+
|
5
|
+
1. You may make and give away verbatim copies of the source form of the
|
6
|
+
software without restriction, provided that you duplicate all of the
|
7
|
+
original copyright notices and associated disclaimers.
|
8
|
+
|
9
|
+
2. You may modify your copy of the software in any way, provided that
|
10
|
+
you do at least ONE of the following:
|
11
|
+
|
12
|
+
a) place your modifications in the Public Domain or otherwise
|
13
|
+
make them Freely Available, such as by posting said
|
14
|
+
modifications to Usenet or an equivalent medium, or by allowing
|
15
|
+
the author to include your modifications in the software.
|
16
|
+
|
17
|
+
b) use the modified software only within your corporation or
|
18
|
+
organization.
|
19
|
+
|
20
|
+
c) give non-standard binaries non-standard names, with
|
21
|
+
instructions on where to get the original software distribution.
|
22
|
+
|
23
|
+
d) make other distribution arrangements with the author.
|
24
|
+
|
25
|
+
3. You may distribute the software in object code or binary form,
|
26
|
+
provided that you do at least ONE of the following:
|
27
|
+
|
28
|
+
a) distribute the binaries and library files of the software,
|
29
|
+
together with instructions (in the manual page or equivalent)
|
30
|
+
on where to get the original distribution.
|
31
|
+
|
32
|
+
b) accompany the distribution with the machine-readable source of
|
33
|
+
the software.
|
34
|
+
|
35
|
+
c) give non-standard binaries non-standard names, with
|
36
|
+
instructions on where to get the original software distribution.
|
37
|
+
|
38
|
+
d) make other distribution arrangements with the author.
|
39
|
+
|
40
|
+
4. You may modify and include the part of the software into any other
|
41
|
+
software (possibly commercial). But some files in the distribution
|
42
|
+
are not written by the author, so that they are not under these terms.
|
43
|
+
|
44
|
+
For the list of those files and their copying conditions, see the
|
45
|
+
file LEGAL.
|
46
|
+
|
47
|
+
5. The scripts and library files supplied as input to or produced as
|
48
|
+
output from the software do not automatically fall under the
|
49
|
+
copyright of the software, but belong to whomever generated them,
|
50
|
+
and may be sold commercially, and may be aggregated with this
|
51
|
+
software.
|
52
|
+
|
53
|
+
6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
|
54
|
+
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
|
55
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
56
|
+
PURPOSE.
|
data/COPYING.ja
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
本プログラムはフリーソフトウェアです.2-clause BSDL
|
2
|
+
または以下に示す条件で本プログラムを再配布できます
|
3
|
+
2-clause BSDLについてはBSDLファイルを参照して下さい.
|
4
|
+
|
5
|
+
1. 複製は制限なく自由です.
|
6
|
+
|
7
|
+
2. 以下の条件のいずれかを満たす時に本プログラムのソースを
|
8
|
+
自由に変更できます.
|
9
|
+
|
10
|
+
(a) ネットニューズにポストしたり,作者に変更を送付する
|
11
|
+
などの方法で,変更を公開する.
|
12
|
+
|
13
|
+
(b) 変更した本プログラムを自分の所属する組織内部だけで
|
14
|
+
使う.
|
15
|
+
|
16
|
+
(c) 変更点を明示したうえ,ソフトウェアの名前を変更する.
|
17
|
+
そのソフトウェアを配布する時には変更前の本プログラ
|
18
|
+
ムも同時に配布する.または変更前の本プログラムのソー
|
19
|
+
スの入手法を明示する.
|
20
|
+
|
21
|
+
(d) その他の変更条件を作者と合意する.
|
22
|
+
|
23
|
+
3. 以下の条件のいずれかを満たす時に本プログラムをコンパイ
|
24
|
+
ルしたオブジェクトコードや実行形式でも配布できます.
|
25
|
+
|
26
|
+
(a) バイナリを受け取った人がソースを入手できるように,
|
27
|
+
ソースの入手法を明示する.
|
28
|
+
|
29
|
+
(b) 機械可読なソースコードを添付する.
|
30
|
+
|
31
|
+
(c) 変更を行ったバイナリは名前を変更したうえ,オリジナ
|
32
|
+
ルのソースコードの入手法を明示する.
|
33
|
+
|
34
|
+
(d) その他の配布条件を作者と合意する.
|
35
|
+
|
36
|
+
4. 他のプログラムへの引用はいかなる目的であれ自由です.た
|
37
|
+
だし,本プログラムに含まれる他の作者によるコードは,そ
|
38
|
+
れぞれの作者の意向による制限が加えられる場合があります.
|
39
|
+
|
40
|
+
それらファイルの一覧とそれぞれの配布条件などに付いては
|
41
|
+
LEGALファイルを参照してください.
|
42
|
+
|
43
|
+
5. 本プログラムへの入力となるスクリプトおよび,本プログラ
|
44
|
+
ムからの出力の権利は本プログラムの作者ではなく,それぞ
|
45
|
+
れの入出力を生成した人に属します.また,本プログラムに
|
46
|
+
組み込まれるための拡張ライブラリについても同様です.
|
47
|
+
|
48
|
+
6. 本プログラムは無保証です.作者は本プログラムをサポート
|
49
|
+
する意志はありますが,プログラム自身のバグあるいは本プ
|
50
|
+
ログラムの実行などから発生するいかなる損害に対しても責
|
51
|
+
任を持ちません.
|
data/Gemfile
ADDED
data/LEGAL
ADDED
data/README.md
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
# Bio::SQL -- BioRuby BioSQL Plugin
|
2
|
+
|
3
|
+
IMPORTANT NOTICE: This package is preliminary experimental state.
|
4
|
+
It may not work as expected.
|
5
|
+
Please fix bugs and report to us.
|
6
|
+
|
7
|
+
bio-biosql is a [BioSQL](http://biosql.org/) plugin for
|
8
|
+
[BioRuby](http://bioruby.org/), an open source bioinformatics
|
9
|
+
library for Ruby.
|
10
|
+
|
11
|
+
BioSQL is a set of SQL schema for storing biological sequences
|
12
|
+
with meta-data. BioSQL supports major open-source database engines.
|
13
|
+
See [BioSQL Documentation](http://biosql.org/) for details.
|
14
|
+
|
15
|
+
This code written in Ruby has historically been part of the core BioRuby
|
16
|
+
[gem](https://github.com/bioruby/bioruby), but has been split into its
|
17
|
+
own gem as part of an effort to
|
18
|
+
[modularize](http://bioruby.open-bio.org/wiki/Plugins)
|
19
|
+
BioRuby. bio-biosql and many more plugins are available at
|
20
|
+
[biogems.info](http://www.biogems.info/).
|
21
|
+
|
22
|
+
This code was initially written by Toshiaki Katayama.
|
23
|
+
Raoul Jean Pierre Bonnal greatly improved the code with introducing
|
24
|
+
ActiveRecord. For details of development, see
|
25
|
+
[github.com/helios/bioruby](https://github.com/helios/bioruby),
|
26
|
+
[github.com/bioruby/bioruby](https://github.com/bioruby/bioruby),
|
27
|
+
and the BioRuby mailing list archives.
|
28
|
+
|
29
|
+
## Installation
|
30
|
+
|
31
|
+
Add this line to your application's Gemfile:
|
32
|
+
|
33
|
+
```ruby
|
34
|
+
gem 'bio-biosql'
|
35
|
+
```
|
36
|
+
|
37
|
+
And then execute:
|
38
|
+
|
39
|
+
$ bundle
|
40
|
+
|
41
|
+
Or install it yourself as:
|
42
|
+
|
43
|
+
$ gem install bio-biosql
|
44
|
+
|
45
|
+
## Requirements
|
46
|
+
|
47
|
+
In addition to the dependency written in Gemfile,
|
48
|
+
at least one ActiveRecord adapter will be needed.
|
49
|
+
|
50
|
+
* [pg](http://rubygems.org/gems/pg)
|
51
|
+
* [sqlite-ruby](http://rubygems.org/gems/sqlite-ruby)
|
52
|
+
* [sqlite3](http://rubygems.org/gems/sqlite3)
|
53
|
+
* [mysql](http://rubygems.org/gems/mysql)
|
54
|
+
* [mysql2](http://rubygems.org/gems/mysql2)
|
55
|
+
* [activerecord-oracle_enhanced-adapter](http://rubygems.org/gems/activerecord-oracle_enhanced-adapter)
|
56
|
+
|
57
|
+
TODO: Please test Bio::SQL with above adapters.
|
58
|
+
|
59
|
+
## Usage
|
60
|
+
|
61
|
+
TODO: Write usage instructions here
|
62
|
+
|
63
|
+
## Contributing
|
64
|
+
|
65
|
+
1. Fork it ( https://github.com/bioruby/bioruby-biosql/fork )
|
66
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
67
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
68
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
69
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require 'rdoc/task'
|
3
|
+
require 'rake/testtask'
|
4
|
+
|
5
|
+
task :default => "test"
|
6
|
+
|
7
|
+
Rake::TestTask.new do |t|
|
8
|
+
t.test_files = FileList["test/unit/**/test_*.rb"]
|
9
|
+
end
|
10
|
+
|
11
|
+
Rake::RDocTask.new do |r|
|
12
|
+
r.rdoc_dir = "rdoc"
|
13
|
+
r.rdoc_files.include("README.md",
|
14
|
+
"COPYING", "COPYING.ja", "BSDL",
|
15
|
+
"lib/**/*.rb")
|
16
|
+
r.main = "README.md"
|
17
|
+
r.options << '--title' << 'Bio::SQL API documentation'
|
18
|
+
r.options << '--line-numbers'
|
19
|
+
end
|
data/bio-biosql.gemspec
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'bio-biosql/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "bio-biosql"
|
8
|
+
spec.version = Bio::SQL::VERSION
|
9
|
+
spec.authors = ["BioRuby project"]
|
10
|
+
spec.email = ["staff@bioruby.org"]
|
11
|
+
spec.summary = %q{BioSQL plugin for BioRuby}
|
12
|
+
spec.description = %q{Provides BioSQL support for BioRuby.}
|
13
|
+
spec.homepage = "http://github.com/bioruby/bioruby-biosql"
|
14
|
+
spec.license = "Ruby"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.extra_rdoc_files = [ "README.md",
|
22
|
+
"COPYING", "COPYING.ja", "BSDL"
|
23
|
+
]
|
24
|
+
spec.rdoc_options << '--main' << 'README.md'
|
25
|
+
spec.rdoc_options << '--title' << 'Bio::SQL API documentation'
|
26
|
+
spec.rdoc_options << '--line-numbers'
|
27
|
+
|
28
|
+
spec.add_runtime_dependency "bio", "~> 1.5.0"
|
29
|
+
spec.add_runtime_dependency "activerecord", "~> 3.0.10"
|
30
|
+
spec.add_runtime_dependency "composite_primary_keys", "~> 3.1.10"
|
31
|
+
|
32
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
33
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
34
|
+
spec.add_development_dependency "rdoc", "~> 4"
|
35
|
+
spec.add_development_dependency "test-unit", "~> 3"
|
36
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
#
|
2
|
+
# = bio/db/biosql/biosql_to_biosequence.rb - Bio::SQL::Sequence to Bio::Sequence adapter module
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008
|
5
|
+
# Naohisa Goto <ng@bioruby.org>,
|
6
|
+
# Raoul Jean Pierre Bonnal
|
7
|
+
# License:: The Ruby License
|
8
|
+
#
|
9
|
+
# $Id:$
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'bio/sequence'
|
13
|
+
require 'bio/sequence/adapter'
|
14
|
+
require 'bio/sequence/dblink'
|
15
|
+
|
16
|
+
# Internal use only. Normal users should not use this module.
|
17
|
+
#
|
18
|
+
# Bio::SQL::Sequence to Bio::Sequence adapter module.
|
19
|
+
# It is internally used in Bio::SQL::Sequence#to_biosequence.
|
20
|
+
#
|
21
|
+
module Bio::Sequence::Adapter::BioSQL
|
22
|
+
|
23
|
+
extend Bio::Sequence::Adapter
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def_biosequence_adapter :seq
|
28
|
+
|
29
|
+
def_biosequence_adapter :entry_id
|
30
|
+
|
31
|
+
def_biosequence_adapter :primary_accession
|
32
|
+
|
33
|
+
def_biosequence_adapter :secondary_accessions
|
34
|
+
|
35
|
+
def_biosequence_adapter :molecule_type
|
36
|
+
|
37
|
+
#--
|
38
|
+
#TODO: identify where is stored data_class in biosql
|
39
|
+
#++
|
40
|
+
|
41
|
+
def_biosequence_adapter :data_class
|
42
|
+
|
43
|
+
def_biosequence_adapter :definition, :description
|
44
|
+
|
45
|
+
def_biosequence_adapter :topology
|
46
|
+
|
47
|
+
def_biosequence_adapter :date_created
|
48
|
+
|
49
|
+
def_biosequence_adapter :date_modified
|
50
|
+
#do |bs|
|
51
|
+
# Date.parse(bs.date_modified.to_s).strftime("%d-%b-%Y").upcase
|
52
|
+
# end
|
53
|
+
|
54
|
+
def_biosequence_adapter :division
|
55
|
+
|
56
|
+
def_biosequence_adapter :sequence_version
|
57
|
+
|
58
|
+
def_biosequence_adapter :keywords
|
59
|
+
|
60
|
+
def_biosequence_adapter :species
|
61
|
+
|
62
|
+
def_biosequence_adapter :classification, :taxonomy
|
63
|
+
|
64
|
+
def_biosequence_adapter :references
|
65
|
+
|
66
|
+
def_biosequence_adapter :features
|
67
|
+
|
68
|
+
def_biosequence_adapter :comments
|
69
|
+
|
70
|
+
def_biosequence_adapter :other_seqids do |orig|
|
71
|
+
orig.identifier.split(',').collect do |dblink|
|
72
|
+
database, id = dblink.split(':')
|
73
|
+
Bio::Sequence::DBLink.new(database,id)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
end #module Bio::Sequence::Adapter::BioSQL
|
78
|
+
|
@@ -0,0 +1,444 @@
|
|
1
|
+
|
2
|
+
#TODO save on db reading from a genbank or embl object
|
3
|
+
module Bio
|
4
|
+
class SQL
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
class Sequence
|
9
|
+
private
|
10
|
+
# example
|
11
|
+
# bioentry_qualifier_anchor :molecule_type, :synonym=>'mol_type'
|
12
|
+
# this function creates other 3 functions, molecule_type, molecule_type=, molecule_type_update
|
13
|
+
#molecule_type => return an array of strings, where each string is the value associated with the qualifier, ordered by rank.
|
14
|
+
#molecule_type=value add a bioentry_qualifier value to the table
|
15
|
+
#molecule_type_update(value, rank) update an entry of the table with an existing rank
|
16
|
+
#the method inferr the qualifier term from the name of the first symbol, or you can specify a synonym to use
|
17
|
+
|
18
|
+
#creating an object with to_biosql is transaction safe.
|
19
|
+
|
20
|
+
#TODO: implement setting for more than a qualifier-vale.
|
21
|
+
def self.bioentry_qualifier_anchor(sym, *args)
|
22
|
+
options = args.first || Hash.new
|
23
|
+
#options.assert_valid_keys(:rank,:synonym,:multi)
|
24
|
+
method_reader = sym.to_s.to_sym
|
25
|
+
method_writer_operator = (sym.to_s+"=").to_sym
|
26
|
+
method_writer_modder = (sym.to_s+"_update").to_sym
|
27
|
+
synonym = options[:synonym].nil? ? sym.to_s : options[:synonym]
|
28
|
+
|
29
|
+
#DELETE #Bio::SQL::Term.create(:name=>synonym, :ontology=> Bio::SQL::Ontology.find_by_name('Annotation Tags')) unless Bio::SQL::Term.exists?(:name =>synonym)
|
30
|
+
send :define_method, method_reader do
|
31
|
+
#return an array of bioentry_qualifier_values
|
32
|
+
begin
|
33
|
+
#DELETE ontology_annotation_tags = Ontology.find_or_create({:name=>'Annotation Tags'})
|
34
|
+
term = Term.first(:conditions=>["name = ?",synonym]) || Term.create({:name => synonym, :ontology=> Ontology.first(:conditions=>["name = ?",'Annotation Tags'])})
|
35
|
+
bioentry_qualifier_values = @entry.bioentry_qualifier_values.all(:conditions=>["term_id = ?",term.term_id])
|
36
|
+
data = bioentry_qualifier_values.map{|row| row.value} unless bioentry_qualifier_values.nil?
|
37
|
+
begin
|
38
|
+
# this block try to check if the data retrived is a
|
39
|
+
# Date or not and change it according to GenBank/EMBL format
|
40
|
+
# in that case return a string
|
41
|
+
# otherwise the []
|
42
|
+
Date.parse(data.to_s).strftime("%d-%b-%Y").upcase
|
43
|
+
rescue ArgumentError, TypeError, NoMethodError, NameError
|
44
|
+
data
|
45
|
+
end
|
46
|
+
rescue Exception => e
|
47
|
+
puts "Reader Error: #{synonym} #{e.message}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
send :define_method, method_writer_operator do |value|
|
52
|
+
begin
|
53
|
+
#DELETE ontology_annotation_tags = Ontology.find_or_create({:name=>'Annotation Tags'})
|
54
|
+
term = Term.first(:conditions=>["name = ?",synonym]) || Term.create({:name => synonym, :ontology=> Ontology.first(:conditions=>["name = ?",'Annotation Tags'])})
|
55
|
+
datas = @entry.bioentry_qualifier_values.all(:conditions=>["term_id = ?",term.term_id])
|
56
|
+
#add an element incrementing the rank or setting the first to 1
|
57
|
+
be_qu_va=@entry.bioentry_qualifier_values.build({:term=>term, :rank=>(datas.empty? ? 1 : datas.last.rank.succ), :value=>value})
|
58
|
+
be_qu_va.save
|
59
|
+
rescue Exception => e
|
60
|
+
puts "WriterOperator= Error: #{synonym} #{e.message}"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
send :define_method, method_writer_modder do |value, rank|
|
65
|
+
begin
|
66
|
+
#DELETE ontology_annotation_tags = Ontology.find_or_create({:name=>'Annotation Tags'})
|
67
|
+
term = Term.first(:conditions=>["name = ?",synonym]) || Term.create({:name => synonym, :ontology=> Ontology.first(:conditions=>["name = ?",'Annotation Tags'])})
|
68
|
+
data = @entry.bioentry_qualifier_values.all(:term_id=>term.term_id, :rank=>rank)
|
69
|
+
if data.nil?
|
70
|
+
send method_writer_operator, value
|
71
|
+
else
|
72
|
+
data.value=value
|
73
|
+
data.save
|
74
|
+
end
|
75
|
+
rescue Exception => e
|
76
|
+
puts "WriterModder Error: #{synonym} #{e.message}"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
public
|
83
|
+
attr_reader :entry
|
84
|
+
|
85
|
+
def delete
|
86
|
+
#TODO: check is references connected to this bioentry are leaf or not.
|
87
|
+
#actually I think it should be more sofisticated, check if there are
|
88
|
+
#other bioentries connected to references; if not delete 'em
|
89
|
+
@entry.references.each { |ref| ref.delete if ref.bioentries.size==1}
|
90
|
+
@entry.destroy
|
91
|
+
end
|
92
|
+
|
93
|
+
def get_seqfeature(sf)
|
94
|
+
|
95
|
+
#in seqfeature BioSQL class
|
96
|
+
locations_str = sf.locations.map{|loc| loc.to_s}.join(',')
|
97
|
+
#pp sf.locations.inspect
|
98
|
+
locations_str = "join(#{locations_str})" if sf.locations.count>1
|
99
|
+
Bio::Feature.new(sf.type_term.name, locations_str,sf.seqfeature_qualifier_values.collect{|sfqv| Bio::Feature::Qualifier.new(sfqv.term.name,sfqv.value)})
|
100
|
+
end
|
101
|
+
|
102
|
+
def length=(len)
|
103
|
+
@entry.biosequence.length=len
|
104
|
+
end
|
105
|
+
|
106
|
+
def initialize(options={})
|
107
|
+
#options.assert_valid_keys(:entry, :biodatabase,:biosequence)
|
108
|
+
return @entry = options[:entry] unless options[:entry].nil?
|
109
|
+
|
110
|
+
return to_biosql(options[:biosequence], options[:biodatabase]) unless options[:biosequence].nil? or options[:biodatabase].nil?
|
111
|
+
|
112
|
+
end
|
113
|
+
|
114
|
+
def to_biosql(bs,biodatabase)
|
115
|
+
#DELETE #Transcaction works greatly!!!
|
116
|
+
begin
|
117
|
+
#DELETE Bioentry.transaction do
|
118
|
+
@entry = biodatabase.bioentries.build({:name=>bs.entry_id})
|
119
|
+
|
120
|
+
puts "primary" if $DEBUG
|
121
|
+
self.primary_accession = bs.primary_accession
|
122
|
+
|
123
|
+
puts "def" if $DEBUG
|
124
|
+
self.definition = bs.definition unless bs.definition.nil?
|
125
|
+
|
126
|
+
puts "seqver" if $DEBUG
|
127
|
+
self.sequence_version = bs.sequence_version || 0
|
128
|
+
|
129
|
+
puts "divi" if $DEBUG
|
130
|
+
self.division = bs.division unless bs.division.nil?
|
131
|
+
|
132
|
+
puts "identifier" if $DEBUG
|
133
|
+
self.identifier = bs.other_seqids.collect{|dblink| "#{dblink.database}:#{dblink.id}"}.join(';') unless bs.other_seqids.nil?
|
134
|
+
@entry.save
|
135
|
+
puts "secacc" if $DEBUG
|
136
|
+
|
137
|
+
bs.secondary_accessions.each do |sa|
|
138
|
+
puts "#{sa}" if $DEBUG
|
139
|
+
#write as qualifier every secondary accession into the array
|
140
|
+
self.secondary_accessions = sa
|
141
|
+
end unless bs.secondary_accessions.nil?
|
142
|
+
|
143
|
+
|
144
|
+
#to create the sequence entry needs to exists
|
145
|
+
puts "seq" if $DEBUG
|
146
|
+
puts bs.seq if $DEBUG
|
147
|
+
self.seq = bs.seq unless bs.seq.nil?
|
148
|
+
puts "mol" if $DEBUG
|
149
|
+
|
150
|
+
self.molecule_type = bs.molecule_type unless bs.molecule_type.nil?
|
151
|
+
puts "dc" if $DEBUG
|
152
|
+
|
153
|
+
self.data_class = bs.data_class unless bs.data_class.nil?
|
154
|
+
puts "top" if $DEBUG
|
155
|
+
self.topology = bs.topology unless bs.topology.nil?
|
156
|
+
puts "datec" if $DEBUG
|
157
|
+
self.date_created = bs.date_created unless bs.date_created.nil?
|
158
|
+
puts "datemod" if $DEBUG
|
159
|
+
self.date_modified = bs.date_modified unless bs.date_modified.nil?
|
160
|
+
puts "key" if $DEBUG
|
161
|
+
|
162
|
+
bs.keywords.each do |kw|
|
163
|
+
#write as qualifier every secondary accessions into the array
|
164
|
+
self.keywords = kw
|
165
|
+
end unless bs.keywords.nil?
|
166
|
+
|
167
|
+
puts "spec" if $DEBUG
|
168
|
+
#self.species = bs.species unless bs.species.nil?
|
169
|
+
self.species = bs.species unless bs.species.empty?
|
170
|
+
puts "Debug: #{bs.species}" if $DEBUG
|
171
|
+
puts "Debug: feat..start" if $DEBUG
|
172
|
+
|
173
|
+
bs.features.each do |feat|
|
174
|
+
self.feature=feat
|
175
|
+
end unless bs.features.nil?
|
176
|
+
|
177
|
+
puts "Debug: feat...end" if $DEBUG
|
178
|
+
bs.references.each do |reference|
|
179
|
+
self.reference=reference
|
180
|
+
end unless bs.references.nil?
|
181
|
+
|
182
|
+
bs.comments.each do |comment|
|
183
|
+
self.comment=comment
|
184
|
+
end unless bs.comments.nil?
|
185
|
+
|
186
|
+
#DELETE end #transaction
|
187
|
+
return self
|
188
|
+
rescue Exception => e
|
189
|
+
puts "to_biosql exception: #{e}"
|
190
|
+
puts $!
|
191
|
+
end #rescue
|
192
|
+
end #to_biosql
|
193
|
+
|
194
|
+
|
195
|
+
def name
|
196
|
+
@entry.name
|
197
|
+
end
|
198
|
+
alias entry_id name
|
199
|
+
|
200
|
+
def name=(value)
|
201
|
+
@entry.name=value
|
202
|
+
end
|
203
|
+
alias entry_id= name=
|
204
|
+
|
205
|
+
def primary_accession
|
206
|
+
@entry.accession
|
207
|
+
end
|
208
|
+
|
209
|
+
def primary_accession=(value)
|
210
|
+
@entry.accession=value
|
211
|
+
end
|
212
|
+
|
213
|
+
#TODO def secondary_accession
|
214
|
+
# @entry.bioentry_qualifier_values
|
215
|
+
# end
|
216
|
+
|
217
|
+
def organism
|
218
|
+
@entry.taxon.nil? ? "" : "#{@entry.taxon.taxon_scientific_name.name}"+ (@entry.taxon.taxon_genbank_common_name ? "(#{@entry.taxon.taxon_genbank_common_name.name})" : '')
|
219
|
+
end
|
220
|
+
alias species organism
|
221
|
+
|
222
|
+
def organism=(value)
|
223
|
+
#FIX there is a shortcut
|
224
|
+
taxon_name=TaxonName.first(:conditions=>["name = ? and name_class = ?",value.gsub(/\s+\(.+\)/,''),'scientific name'])
|
225
|
+
if taxon_name.nil?
|
226
|
+
puts "Error value doesn't exists in taxon_name table with scientific name constraint."
|
227
|
+
else
|
228
|
+
@entry.taxon_id=taxon_name.taxon_id
|
229
|
+
@entry.save
|
230
|
+
end
|
231
|
+
end
|
232
|
+
alias species= organism=
|
233
|
+
|
234
|
+
def database
|
235
|
+
@entry.biodatabase.name
|
236
|
+
end
|
237
|
+
|
238
|
+
def database_desc
|
239
|
+
@entry.biodatabase.description
|
240
|
+
end
|
241
|
+
|
242
|
+
def version
|
243
|
+
@entry.version
|
244
|
+
end
|
245
|
+
alias sequence_version version
|
246
|
+
|
247
|
+
def version=(value)
|
248
|
+
@entry.version=value
|
249
|
+
end
|
250
|
+
alias sequence_version= version=
|
251
|
+
|
252
|
+
def division
|
253
|
+
@entry.division
|
254
|
+
end
|
255
|
+
|
256
|
+
def division=(value)
|
257
|
+
@entry.division=value
|
258
|
+
end
|
259
|
+
|
260
|
+
def description
|
261
|
+
@entry.description
|
262
|
+
end
|
263
|
+
alias definition description
|
264
|
+
|
265
|
+
def description=(value)
|
266
|
+
@entry.description=value
|
267
|
+
end
|
268
|
+
alias definition= description=
|
269
|
+
|
270
|
+
def identifier
|
271
|
+
@entry.identifier
|
272
|
+
end
|
273
|
+
alias other_seqids identifier
|
274
|
+
|
275
|
+
def identifier=(value)
|
276
|
+
@entry.identifier=value
|
277
|
+
end
|
278
|
+
|
279
|
+
bioentry_qualifier_anchor :data_class
|
280
|
+
bioentry_qualifier_anchor :molecule_type, :synonym=>'mol_type'
|
281
|
+
bioentry_qualifier_anchor :topology
|
282
|
+
bioentry_qualifier_anchor :date_created
|
283
|
+
bioentry_qualifier_anchor :date_modified, :synonym=>'date_changed'
|
284
|
+
bioentry_qualifier_anchor :keywords, :synonym=>'keyword'
|
285
|
+
bioentry_qualifier_anchor :secondary_accessions, :synonym=>'secondary_accession'
|
286
|
+
|
287
|
+
def features
|
288
|
+
@entry.seqfeatures.collect do |sf|
|
289
|
+
self.get_seqfeature(sf)
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
def feature=(feat)
|
294
|
+
#ToDo: avoid Ontology find here, probably more efficient create class variables
|
295
|
+
#DELETE type_term_ontology = Ontology.find_or_create({:name=>'SeqFeature Keys'})
|
296
|
+
puts "feature:type_term = #{feat.feature}" if $DEBUG
|
297
|
+
type_term = Term.first(:conditions=>["name = ?", feat.feature]) || Term.create({:name=>feat.feature, :ontology=>Ontology.first(:conditions=>["name = ?",'SeqFeature Keys'])})
|
298
|
+
#DELETE source_term_ontology = Ontology.find_or_create({:name=>'SeqFeature Sources'})
|
299
|
+
puts "feature:source_term" if $DEBUG
|
300
|
+
source_term = Term.first(:conditions=>["name = ?",'EMBLGenBankSwit'])
|
301
|
+
puts "feature:seqfeature" if $DEBUG
|
302
|
+
seqfeature = @entry.seqfeatures.build({:source_term=>source_term, :type_term=>type_term, :rank=>@entry.seqfeatures.count.succ, :display_name=>''})
|
303
|
+
seqfeature.save
|
304
|
+
puts "feature:location" if $DEBUG
|
305
|
+
feat.locations.each do |loc|
|
306
|
+
location = seqfeature.locations.build({:seqfeature=>seqfeature, :start_pos=>loc.from, :end_pos=>loc.to, :strand=>loc.strand, :rank=>seqfeature.locations.count.succ})
|
307
|
+
location.save
|
308
|
+
end
|
309
|
+
|
310
|
+
#DELETE qual_term_ontology = Ontology.find_or_create({:name=>'Annotation Tags'})
|
311
|
+
|
312
|
+
puts "feature:qualifier" if $DEBUG
|
313
|
+
feat.each do |qualifier|
|
314
|
+
#DELETE qual_term = Term.find_or_create({:name=>qualifier.qualifier}, {:ontology=>qual_term_ontology})
|
315
|
+
qual_term = Term.first(:conditions=>["name = ?", qualifier.qualifier]) || Term.create({:name=>qualifier.qualifier, :ontology=>Ontology.first(:conditions=>["name = ?", 'Annotation Tags'])})
|
316
|
+
qual = seqfeature.seqfeature_qualifier_values.build({:seqfeature=>seqfeature, :term=>qual_term, :value=>qualifier.value.to_s, :rank=>seqfeature.seqfeature_qualifier_values.count.succ})
|
317
|
+
qual.save
|
318
|
+
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
#return the seqfeature mapped from BioSQL with a type_term like 'CDS'
|
323
|
+
def cdsfeatures
|
324
|
+
@entry.cdsfeatures
|
325
|
+
end
|
326
|
+
|
327
|
+
# Returns the sequence.
|
328
|
+
# Returns a Bio::Sequence::Generic object.
|
329
|
+
|
330
|
+
def seq
|
331
|
+
s = @entry.biosequence
|
332
|
+
Bio::Sequence::Generic.new(s ? s.seq : '')
|
333
|
+
end
|
334
|
+
|
335
|
+
def seq=(value)
|
336
|
+
#TODO: revise this piece of code.
|
337
|
+
#chk which type of alphabet is, NU/NA/nil
|
338
|
+
if @entry.biosequence.nil?
|
339
|
+
#DELETE puts "intoseq1"
|
340
|
+
@entry.biosequence = Biosequence.new(:seq=>value)
|
341
|
+
# biosequence = @entry.biosequence.build({:seq=>value})
|
342
|
+
@entry.biosequence.save
|
343
|
+
# biosequence.save
|
344
|
+
else
|
345
|
+
@entry.biosequence.seq=value
|
346
|
+
end
|
347
|
+
self.length=value.length
|
348
|
+
#DELETE #@entry.biosequence.length=value.length
|
349
|
+
#DELETE #break
|
350
|
+
@entry.save
|
351
|
+
end
|
352
|
+
|
353
|
+
#report parents and exclude info with "no rank". Now I report rank == class but ... Question ? Have to be reported taxonomy with rank=="class"?
|
354
|
+
def taxonomy
|
355
|
+
tax = []
|
356
|
+
taxon = Taxon.first(:conditions=>["taxon_id = ?",@entry.taxon.parent_taxon_id])
|
357
|
+
while taxon and taxon.taxon_id != taxon.parent_taxon_id and taxon.node_rank!='no rank'
|
358
|
+
tax << taxon.taxon_scientific_name.name if taxon.node_rank!='class'
|
359
|
+
#Note: I don't like this call very much, correct with a relationship in the ref class.
|
360
|
+
taxon = Taxon.first(:conditions=>["taxon_id = ?",taxon.parent_taxon_id])
|
361
|
+
end
|
362
|
+
tax.reverse
|
363
|
+
end
|
364
|
+
|
365
|
+
def length
|
366
|
+
@entry.biosequence.length
|
367
|
+
end
|
368
|
+
|
369
|
+
def references
|
370
|
+
#return and array of hash, hash has these keys ["title", "dbxref_id", "reference_id", "authors", "crc", "location"]
|
371
|
+
#probably would be better to d a class refrence to collect these informations
|
372
|
+
@entry.bioentry_references.collect do |bio_ref|
|
373
|
+
hash = Hash.new
|
374
|
+
hash['authors'] = bio_ref.reference.authors.gsub(/\.\s/, "\.\s\|").split(/\|/) if (bio_ref.reference and bio_ref.reference.authors)
|
375
|
+
|
376
|
+
hash['sequence_position'] = "#{bio_ref.start_pos}-#{bio_ref.end_pos}" if (bio_ref.start_pos and bio_ref.end_pos)
|
377
|
+
hash['title'] = bio_ref.reference.title
|
378
|
+
hash['embl_gb_record_number'] = bio_ref.rank
|
379
|
+
#TODO: solve the problem with specific comment per reference.
|
380
|
+
#TODO: get dbxref
|
381
|
+
#take a look when location is build up in def reference=(value)
|
382
|
+
|
383
|
+
bio_ref.reference.location.split('|').each do |element|
|
384
|
+
key,value=element.split('=')
|
385
|
+
hash[key]=value
|
386
|
+
end unless bio_ref.reference.location.nil?
|
387
|
+
|
388
|
+
hash['xrefs'] = bio_ref.reference.dbxref ? "#{bio_ref.reference.dbxref.dbname}; #{bio_ref.reference.dbxref.accession}." : ''
|
389
|
+
Bio::Reference.new(hash)
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
def comments
|
394
|
+
@entry.comments.map do |comment|
|
395
|
+
comment.comment_text
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
399
|
+
def reference=(value)
|
400
|
+
locations=Array.new
|
401
|
+
locations << "journal=#{value.journal}" unless value.journal.empty?
|
402
|
+
locations << "volume=#{value.volume}" unless value.volume.empty?
|
403
|
+
locations << "issue=#{value.issue}" unless value.issue.empty?
|
404
|
+
locations << "pages=#{value.pages}" unless value.pages.empty?
|
405
|
+
locations << "year=#{value.year}" unless value.year.empty?
|
406
|
+
locations << "pubmed=#{value.pubmed}" unless value.pubmed.empty?
|
407
|
+
locations << "medline=#{value.medline}" unless value.medline.empty?
|
408
|
+
locations << "doi=#{value.doi}" unless value.doi.nil?
|
409
|
+
locations << "abstract=#{value.abstract}" unless value.abstract.empty?
|
410
|
+
locations << "url=#{value.url}" unless value.url.nil?
|
411
|
+
locations << "mesh=#{value.mesh}" unless value.mesh.empty?
|
412
|
+
locations << "affiliations=#{value.affiliations}" unless value.affiliations.empty?
|
413
|
+
locations << "comments=#{value.comments.join('~')}"unless value.comments.nil?
|
414
|
+
start_pos, end_pos = value.sequence_position ? value.sequence_position.gsub(/\s*/,'').split('-') : [nil,nil]
|
415
|
+
reference= Reference.first(:conditions=>["title = ?",value.title]) || Reference.create({:title=>value.title,:authors=>value.authors.join(' '), :location=>locations.join('|')})
|
416
|
+
bio_reference=@entry.bioentry_references.build({:reference=>reference,:rank=>value.embl_gb_record_number, :start_pos=>start_pos, :end_pos=>end_pos})
|
417
|
+
bio_reference.save
|
418
|
+
end
|
419
|
+
|
420
|
+
def comment=(value)
|
421
|
+
#DELETE comment=Comment.new({:bioentry=>@entry, :comment_text=>value, :rank=>@entry.comments.count.succ})
|
422
|
+
comment = @entry.comments.build({:comment_text=>value, :rank=>@entry.comments.count.succ})
|
423
|
+
comment.save
|
424
|
+
end
|
425
|
+
|
426
|
+
def save
|
427
|
+
#I should add chks for SQL errors
|
428
|
+
@entry.biosequence.save
|
429
|
+
@entry.save
|
430
|
+
end
|
431
|
+
def to_fasta
|
432
|
+
">" + accession + "\n" + seq.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
|
433
|
+
end
|
434
|
+
|
435
|
+
def to_fasta_reverse_complememt
|
436
|
+
">" + accession + "\n" + seq.reverse_complement.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
|
437
|
+
end
|
438
|
+
|
439
|
+
def to_biosequence
|
440
|
+
Bio::Sequence.adapter(self,Bio::Sequence::Adapter::BioSQL)
|
441
|
+
end
|
442
|
+
end #Sequence
|
443
|
+
end #SQL
|
444
|
+
end #Bio
|