anncrsnp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +41 -0
- data/Rakefile +6 -0
- data/anncrsnp.gemspec +35 -0
- data/bin/console +14 -0
- data/bin/grdbfinder.rb +472 -0
- data/bin/grdbmanager.rb +226 -0
- data/bin/masterfeatures.rb +188 -0
- data/bin/setup +7 -0
- data/bin/statistics.rb +193 -0
- data/database/deleteme +0 -0
- data/lib/anncrsnp/dataset.rb +178 -0
- data/lib/anncrsnp/parsers/ucscparser.rb +35 -0
- data/lib/anncrsnp/version.rb +3 -0
- data/lib/anncrsnp.rb +5 -0
- metadata +144 -0
@@ -0,0 +1,178 @@
|
|
1
|
+
CHR = 0
|
2
|
+
START = 1
|
3
|
+
ENDING = 2
|
4
|
+
ID = 3
|
5
|
+
|
6
|
+
class Dataset
|
7
|
+
def initialize(header = [])
|
8
|
+
@all_record = []
|
9
|
+
@metadata = {}
|
10
|
+
add_metadata(:header, [:chr, :start, :ending, :id].concat(header))
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_record(fields_array) # Fixed col => 0 -> chr, 1 -> start, 2 -> end, 3 -> id
|
14
|
+
fields_array[START] = fields_array[START].to_i
|
15
|
+
fields_array[ENDING] = fields_array[ENDING].to_i
|
16
|
+
@all_record << fields_array
|
17
|
+
end
|
18
|
+
|
19
|
+
def add_record_batch(records_array)
|
20
|
+
records_array.each do |record|
|
21
|
+
add_record(record)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_elements_on_position(chr, start, ending)
|
26
|
+
results = []
|
27
|
+
@all_record.each do |record|
|
28
|
+
if record[CHR] == chr
|
29
|
+
if (record[START] >= start && record[START] <= ending) || (record[ENDING] >= start && record[ENDING] <= ending)
|
30
|
+
results << record
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
return results
|
35
|
+
end
|
36
|
+
|
37
|
+
def each_record
|
38
|
+
@all_record.each do |record|
|
39
|
+
yield(record)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def length
|
44
|
+
return @all_record.length
|
45
|
+
end
|
46
|
+
|
47
|
+
def first
|
48
|
+
return @all_record.first
|
49
|
+
end
|
50
|
+
|
51
|
+
def last
|
52
|
+
return @all_record.last
|
53
|
+
end
|
54
|
+
|
55
|
+
def numeric_filter(col_name, thresold)
|
56
|
+
index = get_metadata(:header).index(col_name)
|
57
|
+
@all_record.select!{|rec| rec[index].to_f >= thresold }
|
58
|
+
end
|
59
|
+
|
60
|
+
def drop_columns(col_names)
|
61
|
+
drop_index = col_names.map{|name| get_metadata(:header).index(name)}
|
62
|
+
each_record do |rec|
|
63
|
+
drop_index.reverse_each do |ind|
|
64
|
+
rec.delete_at(ind)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
add_metadata(:header, get_metadata(:header) - col_names)
|
68
|
+
end
|
69
|
+
|
70
|
+
def extract_records_by_coordinates(dataset, results_column_name)
|
71
|
+
added_fields = get_metadata(:added_fields)
|
72
|
+
if added_fields
|
73
|
+
added_fields[results_column_name] = dataset.get_metadata(:header)
|
74
|
+
else
|
75
|
+
added_fields = {results_column_name => dataset.get_metadata(:header)}
|
76
|
+
end
|
77
|
+
add_metadata(:added_fields, added_fields)
|
78
|
+
add_metadata(:header, get_metadata(:header).concat([results_column_name])) # Add new column to dataset
|
79
|
+
each_record{|record|
|
80
|
+
|
81
|
+
record << dataset.get_elements_on_position(record[CHR], record[START], record[ENDING])
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
def empty?
|
86
|
+
return @all_record.empty?
|
87
|
+
end
|
88
|
+
|
89
|
+
def get_metadata(keyword)
|
90
|
+
return @metadata[keyword]
|
91
|
+
end
|
92
|
+
|
93
|
+
def add_metadata(keyword, value)
|
94
|
+
return @metadata[keyword] = value
|
95
|
+
end
|
96
|
+
|
97
|
+
def write(output_path, output_format)
|
98
|
+
path = "#{output_path}.#{output_format}"
|
99
|
+
if output_format == 'html'
|
100
|
+
write_html(path)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
def write_html(path)
|
104
|
+
output_file = File.open(path, 'w')
|
105
|
+
# Header
|
106
|
+
output_file.puts "<html>",
|
107
|
+
"<header>",
|
108
|
+
"</header>",
|
109
|
+
"<body>"
|
110
|
+
|
111
|
+
#Table
|
112
|
+
output_file.puts '<table border=1>'
|
113
|
+
output_file.puts write_html_table_header
|
114
|
+
each_record{|record|
|
115
|
+
output_file.puts create_record_template(record)
|
116
|
+
}
|
117
|
+
output_file.puts "</table>"
|
118
|
+
|
119
|
+
# Footer
|
120
|
+
output_file.puts "</body>",
|
121
|
+
"</html>"
|
122
|
+
|
123
|
+
output_file.close
|
124
|
+
end
|
125
|
+
|
126
|
+
def create_record_template(record)
|
127
|
+
record_template = []
|
128
|
+
record_lentgh = record.select{|field| field.class == Array }.map{|field| field.length}.max
|
129
|
+
record_lentgh = 1 if record_lentgh.nil? || record_lentgh == 0
|
130
|
+
record_lentgh.times do
|
131
|
+
record_template << Array.new(@fields_number){" "}
|
132
|
+
end
|
133
|
+
field_position = 0
|
134
|
+
record.each_with_index do |field, column_number|
|
135
|
+
if field.class != Array
|
136
|
+
record_template[0][field_position] = field
|
137
|
+
field_position += 1
|
138
|
+
else
|
139
|
+
field.each_with_index do |added_record, raw|
|
140
|
+
added_record.each_with_index do |record_field, col|
|
141
|
+
record_template[raw][col+field_position+1] = record_field
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
field_position += @html_header[column_number- @html_header.first.last.length+1].last.length
|
146
|
+
#field_position += @html_header[get_metadata(:header)[column_number]].length
|
147
|
+
end
|
148
|
+
end
|
149
|
+
record_html = ""
|
150
|
+
record_template.each do |row|
|
151
|
+
record_html << "<tr><td>#{row.join("</td><td>")}</td></tr>\n"
|
152
|
+
end
|
153
|
+
return record_html
|
154
|
+
end
|
155
|
+
|
156
|
+
def write_html_table_header
|
157
|
+
@html_header = []
|
158
|
+
added_fields = get_metadata(:added_fields)
|
159
|
+
@html_header << [ get_metadata(:classification) , get_metadata(:header) - added_fields.keys]
|
160
|
+
puts @html_header.inspect
|
161
|
+
added_fields.each do |classification, fields|
|
162
|
+
@html_header << [classification, fields]
|
163
|
+
end
|
164
|
+
|
165
|
+
puts @html_header.inspect
|
166
|
+
|
167
|
+
main_header = ""
|
168
|
+
fields_header = ""
|
169
|
+
@fields_number = 0
|
170
|
+
@html_header.each do |classification, fields|
|
171
|
+
main_header << "<td colspan=\"#{fields.length}\">#{classification.to_s}"
|
172
|
+
fields_header << "<td>#{fields.map{|h| h.to_s}.join("</td><td>")}</td>"
|
173
|
+
@fields_number += fields.length
|
174
|
+
end
|
175
|
+
|
176
|
+
return "<tr>#{main_header}</tr>\n<tr>#{fields_header}</tr>"
|
177
|
+
end
|
178
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'dataset'
|
2
|
+
|
3
|
+
def parseUCSCformat(file, header, skip_first_col = TRUE)
|
4
|
+
dataset = Dataset.new(header)
|
5
|
+
File.open(file).each do |line|
|
6
|
+
line.chomp!
|
7
|
+
fields = line.split("\t")
|
8
|
+
bin_signal = fields.shift if skip_first_col
|
9
|
+
dataset.add_record(fields)
|
10
|
+
end
|
11
|
+
return dataset
|
12
|
+
end
|
13
|
+
|
14
|
+
def parseUCSCrefseqformat(file, header, skip_first_col = TRUE)
|
15
|
+
dataset = Dataset.new(header)
|
16
|
+
File.open(file).each do |line|
|
17
|
+
line.chomp!
|
18
|
+
fields = line.split("\t")
|
19
|
+
bin_signal = fields.shift if skip_first_col
|
20
|
+
fields = [fields[1], fields[3], fields[4], fields[11], fields[0], fields[2], fields[5], fields[6], fields[7], fields[8], fields[9], fields[10], fields[12], fields[13], fields[14]]
|
21
|
+
dataset.add_record(fields)
|
22
|
+
end
|
23
|
+
return dataset
|
24
|
+
end
|
25
|
+
|
26
|
+
def parseDENdbCSVformat(file, header)
|
27
|
+
dataset = Dataset.new(header)
|
28
|
+
File.open(file).each do |line|
|
29
|
+
line.chomp!
|
30
|
+
fields = line.split(",")
|
31
|
+
fields = [fields[1], fields[2], fields[3], fields[0]].concat(fields[4..11])
|
32
|
+
dataset.add_record(fields)
|
33
|
+
end
|
34
|
+
return dataset
|
35
|
+
end
|
data/lib/anncrsnp.rb
ADDED
metadata
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: anncrsnp
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Elena Rojano
|
8
|
+
- Pedro Seoane
|
9
|
+
autorequire:
|
10
|
+
bindir: exe
|
11
|
+
cert_chain: []
|
12
|
+
date: 2016-01-24 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '1.10'
|
21
|
+
type: :development
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '1.10'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rake
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '10.0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '10.0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: rspec
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: sqlite3
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: rubyzip
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :runtime
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
description: 'AnNCR-SNP integrates data from various sources, allowing the user to
|
85
|
+
investigate the potential effects of variants in non-coding regions of the human
|
86
|
+
genome. AnNCR-SNP consists of a database containing data on all non-coding elements
|
87
|
+
and two main programs: manager and finder. The manager program is responsible for
|
88
|
+
creating the local data-base, and the finder program receives the user queries in
|
89
|
+
order to search in the local database and retrieve information. The user can find
|
90
|
+
information about various regu-latory elements, such as TFBs, open chromatin, histone
|
91
|
+
modification and methyla-tion sites, information about SNPs from dbSNP and gene
|
92
|
+
information from RefSeq.'
|
93
|
+
email:
|
94
|
+
- elenarojano@outlook.com
|
95
|
+
- seoanezonjic@hotmail.com
|
96
|
+
executables: []
|
97
|
+
extensions: []
|
98
|
+
extra_rdoc_files: []
|
99
|
+
files:
|
100
|
+
- ".gitignore"
|
101
|
+
- ".rspec"
|
102
|
+
- ".travis.yml"
|
103
|
+
- Gemfile
|
104
|
+
- LICENSE.txt
|
105
|
+
- README.md
|
106
|
+
- Rakefile
|
107
|
+
- anncrsnp.gemspec
|
108
|
+
- bin/console
|
109
|
+
- bin/grdbfinder.rb
|
110
|
+
- bin/grdbmanager.rb
|
111
|
+
- bin/masterfeatures.rb
|
112
|
+
- bin/setup
|
113
|
+
- bin/statistics.rb
|
114
|
+
- database/deleteme
|
115
|
+
- lib/anncrsnp.rb
|
116
|
+
- lib/anncrsnp/dataset.rb
|
117
|
+
- lib/anncrsnp/parsers/ucscparser.rb
|
118
|
+
- lib/anncrsnp/version.rb
|
119
|
+
homepage: ''
|
120
|
+
licenses:
|
121
|
+
- MIT
|
122
|
+
metadata: {}
|
123
|
+
post_install_message:
|
124
|
+
rdoc_options: []
|
125
|
+
require_paths:
|
126
|
+
- lib
|
127
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
133
|
+
requirements:
|
134
|
+
- - ">="
|
135
|
+
- !ruby/object:Gem::Version
|
136
|
+
version: '0'
|
137
|
+
requirements: []
|
138
|
+
rubyforge_project:
|
139
|
+
rubygems_version: 2.4.8
|
140
|
+
signing_key:
|
141
|
+
specification_version: 4
|
142
|
+
summary: Tool to characterize Single Nucleotide Polymorphisms (SNP) in genomic non-coding
|
143
|
+
regions.
|
144
|
+
test_files: []
|