bio-biosql 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.project +17 -0
- data/BSDL +22 -0
- data/COPYING +56 -0
- data/COPYING.ja +51 -0
- data/Gemfile +4 -0
- data/LEGAL +9 -0
- data/README.md +69 -0
- data/Rakefile +19 -0
- data/bio-biosql.gemspec +36 -0
- data/lib/bio-biosql/version.rb +5 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +78 -0
- data/lib/bio/db/biosql/sequence.rb +444 -0
- data/lib/bio/io/biosql/ar-biosql.rb +257 -0
- data/lib/bio/io/biosql/biosql.rb +39 -0
- data/lib/bio/io/biosql/config/database.yml +21 -0
- data/lib/bio/io/sql.rb +79 -0
- data/test/bioruby_test_helper.rb +86 -0
- data/test/data/00dummy.txt +1 -0
- data/test/unit/bio/db/biosql/tc_biosql.rb +114 -0
- data/test/unit/bio/db/biosql/ts_suite_biosql.rb +8 -0
- metadata +176 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6b56ab2936a2ea661de08ec2fa82104a41c25208
|
4
|
+
data.tar.gz: d9f69c1c27b3c4d1d7a82dd346cf534226c1fda7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 133e9c0940a2443a219eff533dd0ffbf38d929a6f4049ef79e3799ac0789cd5d19121d9c011ceaa236f0d2a04fc833dc2c49506e938402b16dcca0ff1397894e
|
7
|
+
data.tar.gz: 5f5f36f25d48c8997c2360ceef23bcdb62bdf0b77e6f4b85cee0497172d19fe0d41ae0bcb360f6ab1eaca32ac7cc9c57b817d3f53a5dd7ff84c476fe6dc11b0d
|
data/.gitignore
ADDED
data/.project
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<projectDescription>
|
3
|
+
<name>bioruby</name>
|
4
|
+
<comment></comment>
|
5
|
+
<projects>
|
6
|
+
</projects>
|
7
|
+
<buildSpec>
|
8
|
+
<buildCommand>
|
9
|
+
<name>org.rubypeople.rdt.core.rubybuilder</name>
|
10
|
+
<arguments>
|
11
|
+
</arguments>
|
12
|
+
</buildCommand>
|
13
|
+
</buildSpec>
|
14
|
+
<natures>
|
15
|
+
<nature>org.rubypeople.rdt.core.rubynature</nature>
|
16
|
+
</natures>
|
17
|
+
</projectDescription>
|
data/BSDL
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (C) 1993-2013 Yukihiro Matsumoto. All rights reserved.
|
2
|
+
|
3
|
+
Redistribution and use in source and binary forms, with or without
|
4
|
+
modification, are permitted provided that the following conditions
|
5
|
+
are met:
|
6
|
+
1. Redistributions of source code must retain the above copyright
|
7
|
+
notice, this list of conditions and the following disclaimer.
|
8
|
+
2. Redistributions in binary form must reproduce the above copyright
|
9
|
+
notice, this list of conditions and the following disclaimer in the
|
10
|
+
documentation and/or other materials provided with the distribution.
|
11
|
+
|
12
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
13
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
16
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
17
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
18
|
+
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
19
|
+
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
20
|
+
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
21
|
+
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
22
|
+
SUCH DAMAGE.
|
data/COPYING
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
bio-biosql is copyrighted free software by BioRuby project <staff@bioruby.org>.
|
2
|
+
You can redistribute it and/or modify it under either the terms of the
|
3
|
+
2-clause BSDL (see the file BSDL), or the conditions below:
|
4
|
+
|
5
|
+
1. You may make and give away verbatim copies of the source form of the
|
6
|
+
software without restriction, provided that you duplicate all of the
|
7
|
+
original copyright notices and associated disclaimers.
|
8
|
+
|
9
|
+
2. You may modify your copy of the software in any way, provided that
|
10
|
+
you do at least ONE of the following:
|
11
|
+
|
12
|
+
a) place your modifications in the Public Domain or otherwise
|
13
|
+
make them Freely Available, such as by posting said
|
14
|
+
modifications to Usenet or an equivalent medium, or by allowing
|
15
|
+
the author to include your modifications in the software.
|
16
|
+
|
17
|
+
b) use the modified software only within your corporation or
|
18
|
+
organization.
|
19
|
+
|
20
|
+
c) give non-standard binaries non-standard names, with
|
21
|
+
instructions on where to get the original software distribution.
|
22
|
+
|
23
|
+
d) make other distribution arrangements with the author.
|
24
|
+
|
25
|
+
3. You may distribute the software in object code or binary form,
|
26
|
+
provided that you do at least ONE of the following:
|
27
|
+
|
28
|
+
a) distribute the binaries and library files of the software,
|
29
|
+
together with instructions (in the manual page or equivalent)
|
30
|
+
on where to get the original distribution.
|
31
|
+
|
32
|
+
b) accompany the distribution with the machine-readable source of
|
33
|
+
the software.
|
34
|
+
|
35
|
+
c) give non-standard binaries non-standard names, with
|
36
|
+
instructions on where to get the original software distribution.
|
37
|
+
|
38
|
+
d) make other distribution arrangements with the author.
|
39
|
+
|
40
|
+
4. You may modify and include the part of the software into any other
|
41
|
+
software (possibly commercial). But some files in the distribution
|
42
|
+
are not written by the author, so that they are not under these terms.
|
43
|
+
|
44
|
+
For the list of those files and their copying conditions, see the
|
45
|
+
file LEGAL.
|
46
|
+
|
47
|
+
5. The scripts and library files supplied as input to or produced as
|
48
|
+
output from the software do not automatically fall under the
|
49
|
+
copyright of the software, but belong to whomever generated them,
|
50
|
+
and may be sold commercially, and may be aggregated with this
|
51
|
+
software.
|
52
|
+
|
53
|
+
6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
|
54
|
+
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
|
55
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
56
|
+
PURPOSE.
|
data/COPYING.ja
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
本プログラムはフリーソフトウェアです.2-clause BSDL
|
2
|
+
または以下に示す条件で本プログラムを再配布できます
|
3
|
+
2-clause BSDLについてはBSDLファイルを参照して下さい.
|
4
|
+
|
5
|
+
1. 複製は制限なく自由です.
|
6
|
+
|
7
|
+
2. 以下の条件のいずれかを満たす時に本プログラムのソースを
|
8
|
+
自由に変更できます.
|
9
|
+
|
10
|
+
(a) ネットニューズにポストしたり,作者に変更を送付する
|
11
|
+
などの方法で,変更を公開する.
|
12
|
+
|
13
|
+
(b) 変更した本プログラムを自分の所属する組織内部だけで
|
14
|
+
使う.
|
15
|
+
|
16
|
+
(c) 変更点を明示したうえ,ソフトウェアの名前を変更する.
|
17
|
+
そのソフトウェアを配布する時には変更前の本プログラ
|
18
|
+
ムも同時に配布する.または変更前の本プログラムのソー
|
19
|
+
スの入手法を明示する.
|
20
|
+
|
21
|
+
(d) その他の変更条件を作者と合意する.
|
22
|
+
|
23
|
+
3. 以下の条件のいずれかを満たす時に本プログラムをコンパイ
|
24
|
+
ルしたオブジェクトコードや実行形式でも配布できます.
|
25
|
+
|
26
|
+
(a) バイナリを受け取った人がソースを入手できるように,
|
27
|
+
ソースの入手法を明示する.
|
28
|
+
|
29
|
+
(b) 機械可読なソースコードを添付する.
|
30
|
+
|
31
|
+
(c) 変更を行ったバイナリは名前を変更したうえ,オリジナ
|
32
|
+
ルのソースコードの入手法を明示する.
|
33
|
+
|
34
|
+
(d) その他の配布条件を作者と合意する.
|
35
|
+
|
36
|
+
4. 他のプログラムへの引用はいかなる目的であれ自由です.た
|
37
|
+
だし,本プログラムに含まれる他の作者によるコードは,そ
|
38
|
+
れぞれの作者の意向による制限が加えられる場合があります.
|
39
|
+
|
40
|
+
それらファイルの一覧とそれぞれの配布条件などに付いては
|
41
|
+
LEGALファイルを参照してください.
|
42
|
+
|
43
|
+
5. 本プログラムへの入力となるスクリプトおよび,本プログラ
|
44
|
+
ムからの出力の権利は本プログラムの作者ではなく,それぞ
|
45
|
+
れの入出力を生成した人に属します.また,本プログラムに
|
46
|
+
組み込まれるための拡張ライブラリについても同様です.
|
47
|
+
|
48
|
+
6. 本プログラムは無保証です.作者は本プログラムをサポート
|
49
|
+
する意志はありますが,プログラム自身のバグあるいは本プ
|
50
|
+
ログラムの実行などから発生するいかなる損害に対しても責
|
51
|
+
任を持ちません.
|
data/Gemfile
ADDED
data/LEGAL
ADDED
data/README.md
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
# Bio::SQL -- BioRuby BioSQL Plugin
|
2
|
+
|
3
|
+
IMPORTANT NOTICE: This package is preliminary experimental state.
|
4
|
+
It may not work as expected.
|
5
|
+
Please fix bugs and report to us.
|
6
|
+
|
7
|
+
bio-biosql is a [BioSQL](http://biosql.org/) plugin for
|
8
|
+
[BioRuby](http://bioruby.org/), an open source bioinformatics
|
9
|
+
library for Ruby.
|
10
|
+
|
11
|
+
BioSQL is a set of SQL schema for storing biological sequences
|
12
|
+
with meta-data. BioSQL supports major open-source database engines.
|
13
|
+
See [BioSQL Documentation](http://biosql.org/) for details.
|
14
|
+
|
15
|
+
This code written in Ruby has historically been part of the core BioRuby
|
16
|
+
[gem](https://github.com/bioruby/bioruby), but has been split into its
|
17
|
+
own gem as part of an effort to
|
18
|
+
[modularize](http://bioruby.open-bio.org/wiki/Plugins)
|
19
|
+
BioRuby. bio-biosql and many more plugins are available at
|
20
|
+
[biogems.info](http://www.biogems.info/).
|
21
|
+
|
22
|
+
This code was initially written by Toshiaki Katayama.
|
23
|
+
Raoul Jean Pierre Bonnal greatly improved the code with introducing
|
24
|
+
ActiveRecord. For details of development, see
|
25
|
+
[github.com/helios/bioruby](https://github.com/helios/bioruby),
|
26
|
+
[github.com/bioruby/bioruby](https://github.com/bioruby/bioruby),
|
27
|
+
and the BioRuby mailing list archives.
|
28
|
+
|
29
|
+
## Installation
|
30
|
+
|
31
|
+
Add this line to your application's Gemfile:
|
32
|
+
|
33
|
+
```ruby
|
34
|
+
gem 'bio-biosql'
|
35
|
+
```
|
36
|
+
|
37
|
+
And then execute:
|
38
|
+
|
39
|
+
$ bundle
|
40
|
+
|
41
|
+
Or install it yourself as:
|
42
|
+
|
43
|
+
$ gem install bio-biosql
|
44
|
+
|
45
|
+
## Requirements
|
46
|
+
|
47
|
+
In addition to the dependency written in Gemfile,
|
48
|
+
at least one ActiveRecord adapter will be needed.
|
49
|
+
|
50
|
+
* [pg](http://rubygems.org/gems/pg)
|
51
|
+
* [sqlite-ruby](http://rubygems.org/gems/sqlite-ruby)
|
52
|
+
* [sqlite3](http://rubygems.org/gems/sqlite3)
|
53
|
+
* [mysql](http://rubygems.org/gems/mysql)
|
54
|
+
* [mysql2](http://rubygems.org/gems/mysql2)
|
55
|
+
* [activerecord-oracle_enhanced-adapter](http://rubygems.org/gems/activerecord-oracle_enhanced-adapter)
|
56
|
+
|
57
|
+
TODO: Please test Bio::SQL with above adapters.
|
58
|
+
|
59
|
+
## Usage
|
60
|
+
|
61
|
+
TODO: Write usage instructions here
|
62
|
+
|
63
|
+
## Contributing
|
64
|
+
|
65
|
+
1. Fork it ( https://github.com/bioruby/bioruby-biosql/fork )
|
66
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
67
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
68
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
69
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require 'rdoc/task'
|
3
|
+
require 'rake/testtask'
|
4
|
+
|
5
|
+
task :default => "test"
|
6
|
+
|
7
|
+
Rake::TestTask.new do |t|
|
8
|
+
t.test_files = FileList["test/unit/**/test_*.rb"]
|
9
|
+
end
|
10
|
+
|
11
|
+
Rake::RDocTask.new do |r|
|
12
|
+
r.rdoc_dir = "rdoc"
|
13
|
+
r.rdoc_files.include("README.md",
|
14
|
+
"COPYING", "COPYING.ja", "BSDL",
|
15
|
+
"lib/**/*.rb")
|
16
|
+
r.main = "README.md"
|
17
|
+
r.options << '--title' << 'Bio::SQL API documentation'
|
18
|
+
r.options << '--line-numbers'
|
19
|
+
end
|
data/bio-biosql.gemspec
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'bio-biosql/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "bio-biosql"
|
8
|
+
spec.version = Bio::SQL::VERSION
|
9
|
+
spec.authors = ["BioRuby project"]
|
10
|
+
spec.email = ["staff@bioruby.org"]
|
11
|
+
spec.summary = %q{BioSQL plugin for BioRuby}
|
12
|
+
spec.description = %q{Provides BioSQL support for BioRuby.}
|
13
|
+
spec.homepage = "http://github.com/bioruby/bioruby-biosql"
|
14
|
+
spec.license = "Ruby"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.extra_rdoc_files = [ "README.md",
|
22
|
+
"COPYING", "COPYING.ja", "BSDL"
|
23
|
+
]
|
24
|
+
spec.rdoc_options << '--main' << 'README.md'
|
25
|
+
spec.rdoc_options << '--title' << 'Bio::SQL API documentation'
|
26
|
+
spec.rdoc_options << '--line-numbers'
|
27
|
+
|
28
|
+
spec.add_runtime_dependency "bio", "~> 1.5.0"
|
29
|
+
spec.add_runtime_dependency "activerecord", "~> 3.0.10"
|
30
|
+
spec.add_runtime_dependency "composite_primary_keys", "~> 3.1.10"
|
31
|
+
|
32
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
33
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
34
|
+
spec.add_development_dependency "rdoc", "~> 4"
|
35
|
+
spec.add_development_dependency "test-unit", "~> 3"
|
36
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
#
|
2
|
+
# = bio/db/biosql/biosql_to_biosequence.rb - Bio::SQL::Sequence to Bio::Sequence adapter module
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008
|
5
|
+
# Naohisa Goto <ng@bioruby.org>,
|
6
|
+
# Raoul Jean Pierre Bonnal
|
7
|
+
# License:: The Ruby License
|
8
|
+
#
|
9
|
+
# $Id:$
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'bio/sequence'
|
13
|
+
require 'bio/sequence/adapter'
|
14
|
+
require 'bio/sequence/dblink'
|
15
|
+
|
16
|
+
# Internal use only. Normal users should not use this module.
|
17
|
+
#
|
18
|
+
# Bio::SQL::Sequence to Bio::Sequence adapter module.
|
19
|
+
# It is internally used in Bio::SQL::Sequence#to_biosequence.
|
20
|
+
#
|
21
|
+
module Bio::Sequence::Adapter::BioSQL
|
22
|
+
|
23
|
+
extend Bio::Sequence::Adapter
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def_biosequence_adapter :seq
|
28
|
+
|
29
|
+
def_biosequence_adapter :entry_id
|
30
|
+
|
31
|
+
def_biosequence_adapter :primary_accession
|
32
|
+
|
33
|
+
def_biosequence_adapter :secondary_accessions
|
34
|
+
|
35
|
+
def_biosequence_adapter :molecule_type
|
36
|
+
|
37
|
+
#--
|
38
|
+
#TODO: identify where is stored data_class in biosql
|
39
|
+
#++
|
40
|
+
|
41
|
+
def_biosequence_adapter :data_class
|
42
|
+
|
43
|
+
def_biosequence_adapter :definition, :description
|
44
|
+
|
45
|
+
def_biosequence_adapter :topology
|
46
|
+
|
47
|
+
def_biosequence_adapter :date_created
|
48
|
+
|
49
|
+
def_biosequence_adapter :date_modified
|
50
|
+
#do |bs|
|
51
|
+
# Date.parse(bs.date_modified.to_s).strftime("%d-%b-%Y").upcase
|
52
|
+
# end
|
53
|
+
|
54
|
+
def_biosequence_adapter :division
|
55
|
+
|
56
|
+
def_biosequence_adapter :sequence_version
|
57
|
+
|
58
|
+
def_biosequence_adapter :keywords
|
59
|
+
|
60
|
+
def_biosequence_adapter :species
|
61
|
+
|
62
|
+
def_biosequence_adapter :classification, :taxonomy
|
63
|
+
|
64
|
+
def_biosequence_adapter :references
|
65
|
+
|
66
|
+
def_biosequence_adapter :features
|
67
|
+
|
68
|
+
def_biosequence_adapter :comments
|
69
|
+
|
70
|
+
def_biosequence_adapter :other_seqids do |orig|
|
71
|
+
orig.identifier.split(',').collect do |dblink|
|
72
|
+
database, id = dblink.split(':')
|
73
|
+
Bio::Sequence::DBLink.new(database,id)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
end #module Bio::Sequence::Adapter::BioSQL
|
78
|
+
|
@@ -0,0 +1,444 @@
|
|
1
|
+
|
2
|
+
#TODO save on db reading from a genbank or embl object
|
3
|
+
module Bio
|
4
|
+
class SQL
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
class Sequence
|
9
|
+
private
|
10
|
+
# example
|
11
|
+
# bioentry_qualifier_anchor :molecule_type, :synonym=>'mol_type'
|
12
|
+
# this function creates other 3 functions, molecule_type, molecule_type=, molecule_type_update
|
13
|
+
#molecule_type => return an array of strings, where each string is the value associated with the qualifier, ordered by rank.
|
14
|
+
#molecule_type=value add a bioentry_qualifier value to the table
|
15
|
+
#molecule_type_update(value, rank) update an entry of the table with an existing rank
|
16
|
+
#the method inferr the qualifier term from the name of the first symbol, or you can specify a synonym to use
|
17
|
+
|
18
|
+
#creating an object with to_biosql is transaction safe.
|
19
|
+
|
20
|
+
#TODO: implement setting for more than a qualifier-vale.
|
21
|
+
def self.bioentry_qualifier_anchor(sym, *args)
|
22
|
+
options = args.first || Hash.new
|
23
|
+
#options.assert_valid_keys(:rank,:synonym,:multi)
|
24
|
+
method_reader = sym.to_s.to_sym
|
25
|
+
method_writer_operator = (sym.to_s+"=").to_sym
|
26
|
+
method_writer_modder = (sym.to_s+"_update").to_sym
|
27
|
+
synonym = options[:synonym].nil? ? sym.to_s : options[:synonym]
|
28
|
+
|
29
|
+
#DELETE #Bio::SQL::Term.create(:name=>synonym, :ontology=> Bio::SQL::Ontology.find_by_name('Annotation Tags')) unless Bio::SQL::Term.exists?(:name =>synonym)
|
30
|
+
send :define_method, method_reader do
|
31
|
+
#return an array of bioentry_qualifier_values
|
32
|
+
begin
|
33
|
+
#DELETE ontology_annotation_tags = Ontology.find_or_create({:name=>'Annotation Tags'})
|
34
|
+
term = Term.first(:conditions=>["name = ?",synonym]) || Term.create({:name => synonym, :ontology=> Ontology.first(:conditions=>["name = ?",'Annotation Tags'])})
|
35
|
+
bioentry_qualifier_values = @entry.bioentry_qualifier_values.all(:conditions=>["term_id = ?",term.term_id])
|
36
|
+
data = bioentry_qualifier_values.map{|row| row.value} unless bioentry_qualifier_values.nil?
|
37
|
+
begin
|
38
|
+
# this block try to check if the data retrived is a
|
39
|
+
# Date or not and change it according to GenBank/EMBL format
|
40
|
+
# in that case return a string
|
41
|
+
# otherwise the []
|
42
|
+
Date.parse(data.to_s).strftime("%d-%b-%Y").upcase
|
43
|
+
rescue ArgumentError, TypeError, NoMethodError, NameError
|
44
|
+
data
|
45
|
+
end
|
46
|
+
rescue Exception => e
|
47
|
+
puts "Reader Error: #{synonym} #{e.message}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
send :define_method, method_writer_operator do |value|
|
52
|
+
begin
|
53
|
+
#DELETE ontology_annotation_tags = Ontology.find_or_create({:name=>'Annotation Tags'})
|
54
|
+
term = Term.first(:conditions=>["name = ?",synonym]) || Term.create({:name => synonym, :ontology=> Ontology.first(:conditions=>["name = ?",'Annotation Tags'])})
|
55
|
+
datas = @entry.bioentry_qualifier_values.all(:conditions=>["term_id = ?",term.term_id])
|
56
|
+
#add an element incrementing the rank or setting the first to 1
|
57
|
+
be_qu_va=@entry.bioentry_qualifier_values.build({:term=>term, :rank=>(datas.empty? ? 1 : datas.last.rank.succ), :value=>value})
|
58
|
+
be_qu_va.save
|
59
|
+
rescue Exception => e
|
60
|
+
puts "WriterOperator= Error: #{synonym} #{e.message}"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
send :define_method, method_writer_modder do |value, rank|
|
65
|
+
begin
|
66
|
+
#DELETE ontology_annotation_tags = Ontology.find_or_create({:name=>'Annotation Tags'})
|
67
|
+
term = Term.first(:conditions=>["name = ?",synonym]) || Term.create({:name => synonym, :ontology=> Ontology.first(:conditions=>["name = ?",'Annotation Tags'])})
|
68
|
+
data = @entry.bioentry_qualifier_values.all(:term_id=>term.term_id, :rank=>rank)
|
69
|
+
if data.nil?
|
70
|
+
send method_writer_operator, value
|
71
|
+
else
|
72
|
+
data.value=value
|
73
|
+
data.save
|
74
|
+
end
|
75
|
+
rescue Exception => e
|
76
|
+
puts "WriterModder Error: #{synonym} #{e.message}"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
public
|
83
|
+
attr_reader :entry
|
84
|
+
|
85
|
+
def delete
|
86
|
+
#TODO: check is references connected to this bioentry are leaf or not.
|
87
|
+
#actually I think it should be more sofisticated, check if there are
|
88
|
+
#other bioentries connected to references; if not delete 'em
|
89
|
+
@entry.references.each { |ref| ref.delete if ref.bioentries.size==1}
|
90
|
+
@entry.destroy
|
91
|
+
end
|
92
|
+
|
93
|
+
def get_seqfeature(sf)
|
94
|
+
|
95
|
+
#in seqfeature BioSQL class
|
96
|
+
locations_str = sf.locations.map{|loc| loc.to_s}.join(',')
|
97
|
+
#pp sf.locations.inspect
|
98
|
+
locations_str = "join(#{locations_str})" if sf.locations.count>1
|
99
|
+
Bio::Feature.new(sf.type_term.name, locations_str,sf.seqfeature_qualifier_values.collect{|sfqv| Bio::Feature::Qualifier.new(sfqv.term.name,sfqv.value)})
|
100
|
+
end
|
101
|
+
|
102
|
+
def length=(len)
|
103
|
+
@entry.biosequence.length=len
|
104
|
+
end
|
105
|
+
|
106
|
+
def initialize(options={})
|
107
|
+
#options.assert_valid_keys(:entry, :biodatabase,:biosequence)
|
108
|
+
return @entry = options[:entry] unless options[:entry].nil?
|
109
|
+
|
110
|
+
return to_biosql(options[:biosequence], options[:biodatabase]) unless options[:biosequence].nil? or options[:biodatabase].nil?
|
111
|
+
|
112
|
+
end
|
113
|
+
|
114
|
+
def to_biosql(bs,biodatabase)
|
115
|
+
#DELETE #Transcaction works greatly!!!
|
116
|
+
begin
|
117
|
+
#DELETE Bioentry.transaction do
|
118
|
+
@entry = biodatabase.bioentries.build({:name=>bs.entry_id})
|
119
|
+
|
120
|
+
puts "primary" if $DEBUG
|
121
|
+
self.primary_accession = bs.primary_accession
|
122
|
+
|
123
|
+
puts "def" if $DEBUG
|
124
|
+
self.definition = bs.definition unless bs.definition.nil?
|
125
|
+
|
126
|
+
puts "seqver" if $DEBUG
|
127
|
+
self.sequence_version = bs.sequence_version || 0
|
128
|
+
|
129
|
+
puts "divi" if $DEBUG
|
130
|
+
self.division = bs.division unless bs.division.nil?
|
131
|
+
|
132
|
+
puts "identifier" if $DEBUG
|
133
|
+
self.identifier = bs.other_seqids.collect{|dblink| "#{dblink.database}:#{dblink.id}"}.join(';') unless bs.other_seqids.nil?
|
134
|
+
@entry.save
|
135
|
+
puts "secacc" if $DEBUG
|
136
|
+
|
137
|
+
bs.secondary_accessions.each do |sa|
|
138
|
+
puts "#{sa}" if $DEBUG
|
139
|
+
#write as qualifier every secondary accession into the array
|
140
|
+
self.secondary_accessions = sa
|
141
|
+
end unless bs.secondary_accessions.nil?
|
142
|
+
|
143
|
+
|
144
|
+
#to create the sequence entry needs to exists
|
145
|
+
puts "seq" if $DEBUG
|
146
|
+
puts bs.seq if $DEBUG
|
147
|
+
self.seq = bs.seq unless bs.seq.nil?
|
148
|
+
puts "mol" if $DEBUG
|
149
|
+
|
150
|
+
self.molecule_type = bs.molecule_type unless bs.molecule_type.nil?
|
151
|
+
puts "dc" if $DEBUG
|
152
|
+
|
153
|
+
self.data_class = bs.data_class unless bs.data_class.nil?
|
154
|
+
puts "top" if $DEBUG
|
155
|
+
self.topology = bs.topology unless bs.topology.nil?
|
156
|
+
puts "datec" if $DEBUG
|
157
|
+
self.date_created = bs.date_created unless bs.date_created.nil?
|
158
|
+
puts "datemod" if $DEBUG
|
159
|
+
self.date_modified = bs.date_modified unless bs.date_modified.nil?
|
160
|
+
puts "key" if $DEBUG
|
161
|
+
|
162
|
+
bs.keywords.each do |kw|
|
163
|
+
#write as qualifier every secondary accessions into the array
|
164
|
+
self.keywords = kw
|
165
|
+
end unless bs.keywords.nil?
|
166
|
+
|
167
|
+
puts "spec" if $DEBUG
|
168
|
+
#self.species = bs.species unless bs.species.nil?
|
169
|
+
self.species = bs.species unless bs.species.empty?
|
170
|
+
puts "Debug: #{bs.species}" if $DEBUG
|
171
|
+
puts "Debug: feat..start" if $DEBUG
|
172
|
+
|
173
|
+
bs.features.each do |feat|
|
174
|
+
self.feature=feat
|
175
|
+
end unless bs.features.nil?
|
176
|
+
|
177
|
+
puts "Debug: feat...end" if $DEBUG
|
178
|
+
bs.references.each do |reference|
|
179
|
+
self.reference=reference
|
180
|
+
end unless bs.references.nil?
|
181
|
+
|
182
|
+
bs.comments.each do |comment|
|
183
|
+
self.comment=comment
|
184
|
+
end unless bs.comments.nil?
|
185
|
+
|
186
|
+
#DELETE end #transaction
|
187
|
+
return self
|
188
|
+
rescue Exception => e
|
189
|
+
puts "to_biosql exception: #{e}"
|
190
|
+
puts $!
|
191
|
+
end #rescue
|
192
|
+
end #to_biosql
|
193
|
+
|
194
|
+
|
195
|
+
def name
|
196
|
+
@entry.name
|
197
|
+
end
|
198
|
+
alias entry_id name
|
199
|
+
|
200
|
+
def name=(value)
|
201
|
+
@entry.name=value
|
202
|
+
end
|
203
|
+
alias entry_id= name=
|
204
|
+
|
205
|
+
def primary_accession
|
206
|
+
@entry.accession
|
207
|
+
end
|
208
|
+
|
209
|
+
def primary_accession=(value)
|
210
|
+
@entry.accession=value
|
211
|
+
end
|
212
|
+
|
213
|
+
#TODO def secondary_accession
|
214
|
+
# @entry.bioentry_qualifier_values
|
215
|
+
# end
|
216
|
+
|
217
|
+
def organism
|
218
|
+
@entry.taxon.nil? ? "" : "#{@entry.taxon.taxon_scientific_name.name}"+ (@entry.taxon.taxon_genbank_common_name ? "(#{@entry.taxon.taxon_genbank_common_name.name})" : '')
|
219
|
+
end
|
220
|
+
alias species organism
|
221
|
+
|
222
|
+
def organism=(value)
|
223
|
+
#FIX there is a shortcut
|
224
|
+
taxon_name=TaxonName.first(:conditions=>["name = ? and name_class = ?",value.gsub(/\s+\(.+\)/,''),'scientific name'])
|
225
|
+
if taxon_name.nil?
|
226
|
+
puts "Error value doesn't exists in taxon_name table with scientific name constraint."
|
227
|
+
else
|
228
|
+
@entry.taxon_id=taxon_name.taxon_id
|
229
|
+
@entry.save
|
230
|
+
end
|
231
|
+
end
|
232
|
+
alias species= organism=
|
233
|
+
|
234
|
+
def database
|
235
|
+
@entry.biodatabase.name
|
236
|
+
end
|
237
|
+
|
238
|
+
def database_desc
|
239
|
+
@entry.biodatabase.description
|
240
|
+
end
|
241
|
+
|
242
|
+
def version
|
243
|
+
@entry.version
|
244
|
+
end
|
245
|
+
alias sequence_version version
|
246
|
+
|
247
|
+
def version=(value)
|
248
|
+
@entry.version=value
|
249
|
+
end
|
250
|
+
alias sequence_version= version=
|
251
|
+
|
252
|
+
def division
|
253
|
+
@entry.division
|
254
|
+
end
|
255
|
+
|
256
|
+
def division=(value)
|
257
|
+
@entry.division=value
|
258
|
+
end
|
259
|
+
|
260
|
+
def description
|
261
|
+
@entry.description
|
262
|
+
end
|
263
|
+
alias definition description
|
264
|
+
|
265
|
+
def description=(value)
|
266
|
+
@entry.description=value
|
267
|
+
end
|
268
|
+
alias definition= description=
|
269
|
+
|
270
|
+
def identifier
|
271
|
+
@entry.identifier
|
272
|
+
end
|
273
|
+
alias other_seqids identifier
|
274
|
+
|
275
|
+
def identifier=(value)
|
276
|
+
@entry.identifier=value
|
277
|
+
end
|
278
|
+
|
279
|
+
bioentry_qualifier_anchor :data_class
|
280
|
+
bioentry_qualifier_anchor :molecule_type, :synonym=>'mol_type'
|
281
|
+
bioentry_qualifier_anchor :topology
|
282
|
+
bioentry_qualifier_anchor :date_created
|
283
|
+
bioentry_qualifier_anchor :date_modified, :synonym=>'date_changed'
|
284
|
+
bioentry_qualifier_anchor :keywords, :synonym=>'keyword'
|
285
|
+
bioentry_qualifier_anchor :secondary_accessions, :synonym=>'secondary_accession'
|
286
|
+
|
287
|
+
def features
|
288
|
+
@entry.seqfeatures.collect do |sf|
|
289
|
+
self.get_seqfeature(sf)
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
def feature=(feat)
|
294
|
+
#ToDo: avoid Ontology find here, probably more efficient create class variables
|
295
|
+
#DELETE type_term_ontology = Ontology.find_or_create({:name=>'SeqFeature Keys'})
|
296
|
+
puts "feature:type_term = #{feat.feature}" if $DEBUG
|
297
|
+
type_term = Term.first(:conditions=>["name = ?", feat.feature]) || Term.create({:name=>feat.feature, :ontology=>Ontology.first(:conditions=>["name = ?",'SeqFeature Keys'])})
|
298
|
+
#DELETE source_term_ontology = Ontology.find_or_create({:name=>'SeqFeature Sources'})
|
299
|
+
puts "feature:source_term" if $DEBUG
|
300
|
+
source_term = Term.first(:conditions=>["name = ?",'EMBLGenBankSwit'])
|
301
|
+
puts "feature:seqfeature" if $DEBUG
|
302
|
+
seqfeature = @entry.seqfeatures.build({:source_term=>source_term, :type_term=>type_term, :rank=>@entry.seqfeatures.count.succ, :display_name=>''})
|
303
|
+
seqfeature.save
|
304
|
+
puts "feature:location" if $DEBUG
|
305
|
+
feat.locations.each do |loc|
|
306
|
+
location = seqfeature.locations.build({:seqfeature=>seqfeature, :start_pos=>loc.from, :end_pos=>loc.to, :strand=>loc.strand, :rank=>seqfeature.locations.count.succ})
|
307
|
+
location.save
|
308
|
+
end
|
309
|
+
|
310
|
+
#DELETE qual_term_ontology = Ontology.find_or_create({:name=>'Annotation Tags'})
|
311
|
+
|
312
|
+
puts "feature:qualifier" if $DEBUG
|
313
|
+
feat.each do |qualifier|
|
314
|
+
#DELETE qual_term = Term.find_or_create({:name=>qualifier.qualifier}, {:ontology=>qual_term_ontology})
|
315
|
+
qual_term = Term.first(:conditions=>["name = ?", qualifier.qualifier]) || Term.create({:name=>qualifier.qualifier, :ontology=>Ontology.first(:conditions=>["name = ?", 'Annotation Tags'])})
|
316
|
+
qual = seqfeature.seqfeature_qualifier_values.build({:seqfeature=>seqfeature, :term=>qual_term, :value=>qualifier.value.to_s, :rank=>seqfeature.seqfeature_qualifier_values.count.succ})
|
317
|
+
qual.save
|
318
|
+
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
#return the seqfeature mapped from BioSQL with a type_term like 'CDS'
|
323
|
+
def cdsfeatures
|
324
|
+
@entry.cdsfeatures
|
325
|
+
end
|
326
|
+
|
327
|
+
# Returns the sequence.
|
328
|
+
# Returns a Bio::Sequence::Generic object.
|
329
|
+
|
330
|
+
def seq
|
331
|
+
s = @entry.biosequence
|
332
|
+
Bio::Sequence::Generic.new(s ? s.seq : '')
|
333
|
+
end
|
334
|
+
|
335
|
+
def seq=(value)
|
336
|
+
#TODO: revise this piece of code.
|
337
|
+
#chk which type of alphabet is, NU/NA/nil
|
338
|
+
if @entry.biosequence.nil?
|
339
|
+
#DELETE puts "intoseq1"
|
340
|
+
@entry.biosequence = Biosequence.new(:seq=>value)
|
341
|
+
# biosequence = @entry.biosequence.build({:seq=>value})
|
342
|
+
@entry.biosequence.save
|
343
|
+
# biosequence.save
|
344
|
+
else
|
345
|
+
@entry.biosequence.seq=value
|
346
|
+
end
|
347
|
+
self.length=value.length
|
348
|
+
#DELETE #@entry.biosequence.length=value.length
|
349
|
+
#DELETE #break
|
350
|
+
@entry.save
|
351
|
+
end
|
352
|
+
|
353
|
+
#report parents and exclude info with "no rank". Now I report rank == class but ... Question ? Have to be reported taxonomy with rank=="class"?
|
354
|
+
def taxonomy
|
355
|
+
tax = []
|
356
|
+
taxon = Taxon.first(:conditions=>["taxon_id = ?",@entry.taxon.parent_taxon_id])
|
357
|
+
while taxon and taxon.taxon_id != taxon.parent_taxon_id and taxon.node_rank!='no rank'
|
358
|
+
tax << taxon.taxon_scientific_name.name if taxon.node_rank!='class'
|
359
|
+
#Note: I don't like this call very much, correct with a relationship in the ref class.
|
360
|
+
taxon = Taxon.first(:conditions=>["taxon_id = ?",taxon.parent_taxon_id])
|
361
|
+
end
|
362
|
+
tax.reverse
|
363
|
+
end
|
364
|
+
|
365
|
+
def length
|
366
|
+
@entry.biosequence.length
|
367
|
+
end
|
368
|
+
|
369
|
+
def references
|
370
|
+
#return and array of hash, hash has these keys ["title", "dbxref_id", "reference_id", "authors", "crc", "location"]
|
371
|
+
#probably would be better to d a class refrence to collect these informations
|
372
|
+
@entry.bioentry_references.collect do |bio_ref|
|
373
|
+
hash = Hash.new
|
374
|
+
hash['authors'] = bio_ref.reference.authors.gsub(/\.\s/, "\.\s\|").split(/\|/) if (bio_ref.reference and bio_ref.reference.authors)
|
375
|
+
|
376
|
+
hash['sequence_position'] = "#{bio_ref.start_pos}-#{bio_ref.end_pos}" if (bio_ref.start_pos and bio_ref.end_pos)
|
377
|
+
hash['title'] = bio_ref.reference.title
|
378
|
+
hash['embl_gb_record_number'] = bio_ref.rank
|
379
|
+
#TODO: solve the problem with specific comment per reference.
|
380
|
+
#TODO: get dbxref
|
381
|
+
#take a look when location is build up in def reference=(value)
|
382
|
+
|
383
|
+
bio_ref.reference.location.split('|').each do |element|
|
384
|
+
key,value=element.split('=')
|
385
|
+
hash[key]=value
|
386
|
+
end unless bio_ref.reference.location.nil?
|
387
|
+
|
388
|
+
hash['xrefs'] = bio_ref.reference.dbxref ? "#{bio_ref.reference.dbxref.dbname}; #{bio_ref.reference.dbxref.accession}." : ''
|
389
|
+
Bio::Reference.new(hash)
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
def comments
|
394
|
+
@entry.comments.map do |comment|
|
395
|
+
comment.comment_text
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
399
|
+
def reference=(value)
|
400
|
+
locations=Array.new
|
401
|
+
locations << "journal=#{value.journal}" unless value.journal.empty?
|
402
|
+
locations << "volume=#{value.volume}" unless value.volume.empty?
|
403
|
+
locations << "issue=#{value.issue}" unless value.issue.empty?
|
404
|
+
locations << "pages=#{value.pages}" unless value.pages.empty?
|
405
|
+
locations << "year=#{value.year}" unless value.year.empty?
|
406
|
+
locations << "pubmed=#{value.pubmed}" unless value.pubmed.empty?
|
407
|
+
locations << "medline=#{value.medline}" unless value.medline.empty?
|
408
|
+
locations << "doi=#{value.doi}" unless value.doi.nil?
|
409
|
+
locations << "abstract=#{value.abstract}" unless value.abstract.empty?
|
410
|
+
locations << "url=#{value.url}" unless value.url.nil?
|
411
|
+
locations << "mesh=#{value.mesh}" unless value.mesh.empty?
|
412
|
+
locations << "affiliations=#{value.affiliations}" unless value.affiliations.empty?
|
413
|
+
locations << "comments=#{value.comments.join('~')}"unless value.comments.nil?
|
414
|
+
start_pos, end_pos = value.sequence_position ? value.sequence_position.gsub(/\s*/,'').split('-') : [nil,nil]
|
415
|
+
reference= Reference.first(:conditions=>["title = ?",value.title]) || Reference.create({:title=>value.title,:authors=>value.authors.join(' '), :location=>locations.join('|')})
|
416
|
+
bio_reference=@entry.bioentry_references.build({:reference=>reference,:rank=>value.embl_gb_record_number, :start_pos=>start_pos, :end_pos=>end_pos})
|
417
|
+
bio_reference.save
|
418
|
+
end
|
419
|
+
|
420
|
+
def comment=(value)
|
421
|
+
#DELETE comment=Comment.new({:bioentry=>@entry, :comment_text=>value, :rank=>@entry.comments.count.succ})
|
422
|
+
comment = @entry.comments.build({:comment_text=>value, :rank=>@entry.comments.count.succ})
|
423
|
+
comment.save
|
424
|
+
end
|
425
|
+
|
426
|
+
def save
|
427
|
+
#I should add chks for SQL errors
|
428
|
+
@entry.biosequence.save
|
429
|
+
@entry.save
|
430
|
+
end
|
431
|
+
def to_fasta
|
432
|
+
">" + accession + "\n" + seq.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
|
433
|
+
end
|
434
|
+
|
435
|
+
def to_fasta_reverse_complememt
|
436
|
+
">" + accession + "\n" + seq.reverse_complement.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
|
437
|
+
end
|
438
|
+
|
439
|
+
def to_biosequence
|
440
|
+
Bio::Sequence.adapter(self,Bio::Sequence::Adapter::BioSQL)
|
441
|
+
end
|
442
|
+
end #Sequence
|
443
|
+
end #SQL
|
444
|
+
end #Bio
|