exodb 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/contributors.txt +1 -0
- data/exodb.gemspec +35 -0
- data/genome/process_genome_seq.rb +22 -0
- data/lib/exodb/addon/string.rb +26 -0
- data/lib/exodb/addon.rb +13 -0
- data/lib/exodb/datamodel/locationfield.rb +103 -0
- data/lib/exodb/datamodel/reference.rb +387 -0
- data/lib/exodb/datamodel/region.rb +51 -0
- data/lib/exodb/datamodel/source.rb +122 -0
- data/lib/exodb/datamodel/variant.rb +89 -0
- data/lib/exodb/datamodel/xrefsfield.rb +42 -0
- data/lib/exodb/datamodel.rb +19 -0
- data/lib/exodb/dbconnection.rb +83 -0
- data/lib/exodb/exception.rb +18 -0
- data/lib/exodb/usermanage.rb +84 -0
- data/lib/exodb/utils/upload_generef.rb +163 -0
- data/lib/exodb/utils/upload_var.rb +60 -0
- data/lib/exodb/utils.rb +42 -0
- data/lib/exodb/vcf.rb +193 -0
- data/lib/exodb/version.rb +15 -0
- data/lib/exodb.rb +44 -0
- data/session.yml +6 -0
- metadata +122 -0
data/lib/exodb/utils.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
#
|
2
|
+
# Exodb
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
require 'exodb/utils/upload_generef.rb'
|
13
|
+
require 'exodb/utils/upload_var.rb'
|
14
|
+
|
15
|
+
module Exodb
|
16
|
+
|
17
|
+
module Utils
|
18
|
+
|
19
|
+
module_function
|
20
|
+
|
21
|
+
# Guess the type of id
|
22
|
+
#
|
23
|
+
# @param [String] id string
|
24
|
+
# @return [String] a miriam registry or the string itself incase cannot guess
|
25
|
+
def guess_miriam(str)
|
26
|
+
case str
|
27
|
+
when /\A((AC|AP|NC|NG|NM|NP|NR|NT|NW|XM|XP|XR|YP|ZP)_\d+|(NZ\_[A-Z]{4}\d+))(\.\d+)?\z/
|
28
|
+
return "urn:miriam:refseq:#{str}"
|
29
|
+
when /\A((ENS[A-Z]*[FPTG]\d{11}(\.\d+)?)|(FB\w{2}\d{7})|(Y[A-Z]{2}\d{3}[a-zA-Z](\-[A-Z])?)|([A-Z_a-z0-9]+(\.)?(t)?(\d+)?([a-z])?))\z/
|
30
|
+
return "urn:miriam:ensembl:#{str}"
|
31
|
+
when /\A((HGNC|hgnc):)?\d{1,5}\z/
|
32
|
+
return "urn:miriam:hgnc:#{str}"
|
33
|
+
when /\ACCDS\d+\.\d+\z/
|
34
|
+
return "urn:miriam:ccds:#{str}"
|
35
|
+
else
|
36
|
+
return str
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
data/lib/exodb/vcf.rb
ADDED
@@ -0,0 +1,193 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
class VCF
|
4
|
+
|
5
|
+
attr_reader :info_attr, :filter_attr, :format_attr, :meta, :contig, :samples, :variants, :alt_attr
|
6
|
+
|
7
|
+
def initialize()
|
8
|
+
@info_attr = {}
|
9
|
+
@filter_attr = {}
|
10
|
+
@format_attr = {}
|
11
|
+
@alt_attr = {}
|
12
|
+
@meta = {}
|
13
|
+
@contig = {}
|
14
|
+
@variants = []
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
def open(filename)
|
19
|
+
|
20
|
+
File.open(filename).each do |line|
|
21
|
+
case line
|
22
|
+
when /^##INFO/
|
23
|
+
parse_info(line.chomp)
|
24
|
+
when /^##FORMAT/
|
25
|
+
parse_format(line.chomp)
|
26
|
+
when /^##FILTER/
|
27
|
+
parse_filter(line.chomp)
|
28
|
+
when /^##contig/
|
29
|
+
parse_contig(line.chomp)
|
30
|
+
when /^##ALT/
|
31
|
+
parse_alt(line.chomp)
|
32
|
+
when /^##/
|
33
|
+
parse_meta(line.chomp)
|
34
|
+
when /^#/
|
35
|
+
@samples = line.split(/\s+/)[9..-1]
|
36
|
+
else
|
37
|
+
@variants.push(Variant.new(line.chomp))
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
def fileformat
|
45
|
+
return @meta[:fileformat]
|
46
|
+
end
|
47
|
+
|
48
|
+
def parse(line = 4000)
|
49
|
+
@variants.each_index do |ind|
|
50
|
+
break if ind > line
|
51
|
+
Thread.new (ind) {
|
52
|
+
@variants[ind].parse!(@info_attr, @format_attr)
|
53
|
+
}
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def sample_num
|
58
|
+
return @samples.length
|
59
|
+
end
|
60
|
+
|
61
|
+
def variant_num
|
62
|
+
return @variants.length
|
63
|
+
end
|
64
|
+
|
65
|
+
class Variant
|
66
|
+
|
67
|
+
attr_reader :chrom, :pos, :id, :ref, :alt, :qual, :filter, :info, :sample_data, :str
|
68
|
+
|
69
|
+
CASTING = lambda do |string, type|
|
70
|
+
case type
|
71
|
+
when 'Integer'
|
72
|
+
return string.to_i
|
73
|
+
when 'Float'
|
74
|
+
return string.to_f
|
75
|
+
else
|
76
|
+
return string.to_s
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def initialize(str)
|
81
|
+
@str = str
|
82
|
+
end
|
83
|
+
|
84
|
+
def parse!(info_attr, format_attr)
|
85
|
+
|
86
|
+
if @str
|
87
|
+
splited = str.split(/\s+/)
|
88
|
+
@chrom = splited[0]
|
89
|
+
@pos = splited[1].to_i
|
90
|
+
@id = splited[2]
|
91
|
+
@ref = splited[3]
|
92
|
+
@alt = splited[4]
|
93
|
+
@qual = splited[5]
|
94
|
+
@filter = splited[6]
|
95
|
+
@info = {}
|
96
|
+
parse_info(splited[7], info_attr)
|
97
|
+
|
98
|
+
@formats = splited[8].split(/:/)
|
99
|
+
@sample_data = []
|
100
|
+
splited[9..-1].each {|data| @sample_data.push(parse_format(data, @formats, format_attr))}
|
101
|
+
@str = nil
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
def parse_info(str, info_attr)
|
109
|
+
str.split(/;/).each do |entry|
|
110
|
+
splited = entry.split(/=/)
|
111
|
+
case info_attr[splited[0]][:Number]
|
112
|
+
when '1'
|
113
|
+
@info[splited[0]] = CASTING.call(splited[1], info_attr[splited[0]][:Type])
|
114
|
+
when '0'
|
115
|
+
@info[splited[0]] = true
|
116
|
+
else
|
117
|
+
@info[splited[0]] = []
|
118
|
+
splited[1].split(/,/).each {|e| @info[splited[0]].push(CASTING.call(e, info_attr[splited[0]][:Type]))}
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def parse_format(str, formats, format_attr)
|
124
|
+
|
125
|
+
result = {}
|
126
|
+
|
127
|
+
splited = str.split(/:/)
|
128
|
+
formats.each_index do |ind|
|
129
|
+
case format_attr[formats[ind]][:Number]
|
130
|
+
when '1'
|
131
|
+
result[formats[ind]] = CASTING.call(splited[ind], format_attr[formats[0]][:Type])
|
132
|
+
when '0'
|
133
|
+
result[formats[ind]] = true
|
134
|
+
else
|
135
|
+
result[formats[ind]] = []
|
136
|
+
formats[ind].split(/,/).each {|e| result[formats[ind]].push(CASTING.call(e, format_attr[formats[ind]][:Type]))}
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
return result
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
private
|
145
|
+
|
146
|
+
def parse_meta(str)
|
147
|
+
splited = str[2..-1].split(/=/)
|
148
|
+
@meta[splited[0].to_sym] = splited[1]
|
149
|
+
end
|
150
|
+
|
151
|
+
def parse_contig(str)
|
152
|
+
@contig = parse_data(str[10..-1])
|
153
|
+
end
|
154
|
+
|
155
|
+
def parse_filter(str)
|
156
|
+
data = parse_data(str[10..-1])
|
157
|
+
@filter_attr[data[:ID]] = data
|
158
|
+
end
|
159
|
+
|
160
|
+
def parse_info(str)
|
161
|
+
data = parse_data(str[8..-1])
|
162
|
+
@info_attr[data[:ID]] = data
|
163
|
+
end
|
164
|
+
|
165
|
+
def parse_format(str)
|
166
|
+
data = parse_data(str[10..-1])
|
167
|
+
@format_attr[data[:ID]] = data
|
168
|
+
end
|
169
|
+
|
170
|
+
def parse_alt(str)
|
171
|
+
data = parse_data(str[7..-1])
|
172
|
+
@alt_attr[data[:ID]] = data
|
173
|
+
end
|
174
|
+
|
175
|
+
def parse_data(str)
|
176
|
+
result = {}
|
177
|
+
str.scan(/(\w+=\w+|\w+=\".+\")/).each do |entry|
|
178
|
+
splited = entry[0].split(/=/)
|
179
|
+
result[splited[0].to_sym] = splited [1]
|
180
|
+
end
|
181
|
+
return result
|
182
|
+
end
|
183
|
+
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
|
188
|
+
vcf = VCF.new()
|
189
|
+
vcf.open('./sample.vcf')
|
190
|
+
#p vcf.sample_num
|
191
|
+
#p vcf.variant_num
|
192
|
+
vcf.parse
|
193
|
+
p vcf
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#
|
2
|
+
# Exodus
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
|
13
|
+
module Exodb
|
14
|
+
VERSION = "0.0.2"
|
15
|
+
end
|
data/lib/exodb.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
#
|
2
|
+
# Exodus
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
|
13
|
+
$: << File.join(File.expand_path(File.dirname(__FILE__)))
|
14
|
+
|
15
|
+
#specify gem version
|
16
|
+
gem 'mongoid', '~> 3.1'
|
17
|
+
|
18
|
+
#Internal library
|
19
|
+
require 'open-uri'
|
20
|
+
require 'json'
|
21
|
+
|
22
|
+
#External library
|
23
|
+
#require 'bio-vcf'
|
24
|
+
require 'highline/import'
|
25
|
+
require 'mongoid'
|
26
|
+
require 'bio'
|
27
|
+
require 'pry'
|
28
|
+
|
29
|
+
I18n.enforce_available_locales = false
|
30
|
+
|
31
|
+
#library
|
32
|
+
require 'exodb/dbconnection.rb'
|
33
|
+
require 'exodb/usermanage.rb'
|
34
|
+
require 'exodb/datamodel.rb'
|
35
|
+
require 'exodb/exception.rb'
|
36
|
+
require 'exodb/utils.rb'
|
37
|
+
require 'exodb/addon.rb'
|
38
|
+
|
39
|
+
|
40
|
+
module Exodb
|
41
|
+
|
42
|
+
module_function
|
43
|
+
|
44
|
+
end
|
data/session.yml
ADDED
metadata
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: exodb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Natapol Pornputtapong
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-10-31 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mongoid
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '3.1'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3.1'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bio
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.4'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.4'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: highline
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.6'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.6'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: pry
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.10'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0.10'
|
69
|
+
description: A library for exome sequencing data management
|
70
|
+
email: natapol.por@gmail.com
|
71
|
+
executables: []
|
72
|
+
extensions: []
|
73
|
+
extra_rdoc_files: []
|
74
|
+
files:
|
75
|
+
- contributors.txt
|
76
|
+
- exodb.gemspec
|
77
|
+
- genome/process_genome_seq.rb
|
78
|
+
- lib/exodb.rb
|
79
|
+
- lib/exodb/addon.rb
|
80
|
+
- lib/exodb/addon/string.rb
|
81
|
+
- lib/exodb/datamodel.rb
|
82
|
+
- lib/exodb/datamodel/locationfield.rb
|
83
|
+
- lib/exodb/datamodel/reference.rb
|
84
|
+
- lib/exodb/datamodel/region.rb
|
85
|
+
- lib/exodb/datamodel/source.rb
|
86
|
+
- lib/exodb/datamodel/variant.rb
|
87
|
+
- lib/exodb/datamodel/xrefsfield.rb
|
88
|
+
- lib/exodb/dbconnection.rb
|
89
|
+
- lib/exodb/exception.rb
|
90
|
+
- lib/exodb/usermanage.rb
|
91
|
+
- lib/exodb/utils.rb
|
92
|
+
- lib/exodb/utils/upload_generef.rb
|
93
|
+
- lib/exodb/utils/upload_var.rb
|
94
|
+
- lib/exodb/vcf.rb
|
95
|
+
- lib/exodb/version.rb
|
96
|
+
- session.yml
|
97
|
+
homepage: http://rubygems.org/gems/dactyls
|
98
|
+
licenses:
|
99
|
+
- GPL
|
100
|
+
metadata: {}
|
101
|
+
post_install_message:
|
102
|
+
rdoc_options: []
|
103
|
+
require_paths:
|
104
|
+
- lib
|
105
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
111
|
+
requirements:
|
112
|
+
- - '>='
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: '0'
|
115
|
+
requirements: []
|
116
|
+
rubyforge_project:
|
117
|
+
rubygems_version: 2.0.14
|
118
|
+
signing_key:
|
119
|
+
specification_version: 4
|
120
|
+
summary: A library for exome sequencing data management development
|
121
|
+
test_files: []
|
122
|
+
has_rdoc:
|