anncrsnp 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,178 @@
1
+ CHR = 0
2
+ START = 1
3
+ ENDING = 2
4
+ ID = 3
5
+
6
+ class Dataset
7
+ def initialize(header = [])
8
+ @all_record = []
9
+ @metadata = {}
10
+ add_metadata(:header, [:chr, :start, :ending, :id].concat(header))
11
+ end
12
+
13
+ def add_record(fields_array) # Fixed col => 0 -> chr, 1 -> start, 2 -> end, 3 -> id
14
+ fields_array[START] = fields_array[START].to_i
15
+ fields_array[ENDING] = fields_array[ENDING].to_i
16
+ @all_record << fields_array
17
+ end
18
+
19
+ def add_record_batch(records_array)
20
+ records_array.each do |record|
21
+ add_record(record)
22
+ end
23
+ end
24
+
25
+ def get_elements_on_position(chr, start, ending)
26
+ results = []
27
+ @all_record.each do |record|
28
+ if record[CHR] == chr
29
+ if (record[START] >= start && record[START] <= ending) || (record[ENDING] >= start && record[ENDING] <= ending)
30
+ results << record
31
+ end
32
+ end
33
+ end
34
+ return results
35
+ end
36
+
37
+ def each_record
38
+ @all_record.each do |record|
39
+ yield(record)
40
+ end
41
+ end
42
+
43
+ def length
44
+ return @all_record.length
45
+ end
46
+
47
+ def first
48
+ return @all_record.first
49
+ end
50
+
51
+ def last
52
+ return @all_record.last
53
+ end
54
+
55
+ def numeric_filter(col_name, thresold)
56
+ index = get_metadata(:header).index(col_name)
57
+ @all_record.select!{|rec| rec[index].to_f >= thresold }
58
+ end
59
+
60
+ def drop_columns(col_names)
61
+ drop_index = col_names.map{|name| get_metadata(:header).index(name)}
62
+ each_record do |rec|
63
+ drop_index.reverse_each do |ind|
64
+ rec.delete_at(ind)
65
+ end
66
+ end
67
+ add_metadata(:header, get_metadata(:header) - col_names)
68
+ end
69
+
70
+ def extract_records_by_coordinates(dataset, results_column_name)
71
+ added_fields = get_metadata(:added_fields)
72
+ if added_fields
73
+ added_fields[results_column_name] = dataset.get_metadata(:header)
74
+ else
75
+ added_fields = {results_column_name => dataset.get_metadata(:header)}
76
+ end
77
+ add_metadata(:added_fields, added_fields)
78
+ add_metadata(:header, get_metadata(:header).concat([results_column_name])) # Add new column to dataset
79
+ each_record{|record|
80
+
81
+ record << dataset.get_elements_on_position(record[CHR], record[START], record[ENDING])
82
+ }
83
+ end
84
+
85
+ def empty?
86
+ return @all_record.empty?
87
+ end
88
+
89
+ def get_metadata(keyword)
90
+ return @metadata[keyword]
91
+ end
92
+
93
+ def add_metadata(keyword, value)
94
+ return @metadata[keyword] = value
95
+ end
96
+
97
+ def write(output_path, output_format)
98
+ path = "#{output_path}.#{output_format}"
99
+ if output_format == 'html'
100
+ write_html(path)
101
+ end
102
+ end
103
+ def write_html(path)
104
+ output_file = File.open(path, 'w')
105
+ # Header
106
+ output_file.puts "<html>",
107
+ "<header>",
108
+ "</header>",
109
+ "<body>"
110
+
111
+ #Table
112
+ output_file.puts '<table border=1>'
113
+ output_file.puts write_html_table_header
114
+ each_record{|record|
115
+ output_file.puts create_record_template(record)
116
+ }
117
+ output_file.puts "</table>"
118
+
119
+ # Footer
120
+ output_file.puts "</body>",
121
+ "</html>"
122
+
123
+ output_file.close
124
+ end
125
+
126
+ def create_record_template(record)
127
+ record_template = []
128
+ record_lentgh = record.select{|field| field.class == Array }.map{|field| field.length}.max
129
+ record_lentgh = 1 if record_lentgh.nil? || record_lentgh == 0
130
+ record_lentgh.times do
131
+ record_template << Array.new(@fields_number){" "}
132
+ end
133
+ field_position = 0
134
+ record.each_with_index do |field, column_number|
135
+ if field.class != Array
136
+ record_template[0][field_position] = field
137
+ field_position += 1
138
+ else
139
+ field.each_with_index do |added_record, raw|
140
+ added_record.each_with_index do |record_field, col|
141
+ record_template[raw][col+field_position+1] = record_field
142
+ end
143
+ end
144
+
145
+ field_position += @html_header[column_number- @html_header.first.last.length+1].last.length
146
+ #field_position += @html_header[get_metadata(:header)[column_number]].length
147
+ end
148
+ end
149
+ record_html = ""
150
+ record_template.each do |row|
151
+ record_html << "<tr><td>#{row.join("</td><td>")}</td></tr>\n"
152
+ end
153
+ return record_html
154
+ end
155
+
156
+ def write_html_table_header
157
+ @html_header = []
158
+ added_fields = get_metadata(:added_fields)
159
+ @html_header << [ get_metadata(:classification) , get_metadata(:header) - added_fields.keys]
160
+ puts @html_header.inspect
161
+ added_fields.each do |classification, fields|
162
+ @html_header << [classification, fields]
163
+ end
164
+
165
+ puts @html_header.inspect
166
+
167
+ main_header = ""
168
+ fields_header = ""
169
+ @fields_number = 0
170
+ @html_header.each do |classification, fields|
171
+ main_header << "<td colspan=\"#{fields.length}\">#{classification.to_s}"
172
+ fields_header << "<td>#{fields.map{|h| h.to_s}.join("</td><td>")}</td>"
173
+ @fields_number += fields.length
174
+ end
175
+
176
+ return "<tr>#{main_header}</tr>\n<tr>#{fields_header}</tr>"
177
+ end
178
+ end
@@ -0,0 +1,35 @@
1
+ require 'dataset'
2
+
3
+ def parseUCSCformat(file, header, skip_first_col = TRUE)
4
+ dataset = Dataset.new(header)
5
+ File.open(file).each do |line|
6
+ line.chomp!
7
+ fields = line.split("\t")
8
+ bin_signal = fields.shift if skip_first_col
9
+ dataset.add_record(fields)
10
+ end
11
+ return dataset
12
+ end
13
+
14
+ def parseUCSCrefseqformat(file, header, skip_first_col = TRUE)
15
+ dataset = Dataset.new(header)
16
+ File.open(file).each do |line|
17
+ line.chomp!
18
+ fields = line.split("\t")
19
+ bin_signal = fields.shift if skip_first_col
20
+ fields = [fields[1], fields[3], fields[4], fields[11], fields[0], fields[2], fields[5], fields[6], fields[7], fields[8], fields[9], fields[10], fields[12], fields[13], fields[14]]
21
+ dataset.add_record(fields)
22
+ end
23
+ return dataset
24
+ end
25
+
26
+ def parseDENdbCSVformat(file, header)
27
+ dataset = Dataset.new(header)
28
+ File.open(file).each do |line|
29
+ line.chomp!
30
+ fields = line.split(",")
31
+ fields = [fields[1], fields[2], fields[3], fields[0]].concat(fields[4..11])
32
+ dataset.add_record(fields)
33
+ end
34
+ return dataset
35
+ end
@@ -0,0 +1,3 @@
1
+ module Anncrsnp
2
+ VERSION = "0.1.0"
3
+ end
data/lib/anncrsnp.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "anncrsnp/version"
2
+
3
+ module Anncrsnp
4
+ # Your code goes here...
5
+ end
metadata ADDED
@@ -0,0 +1,144 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: anncrsnp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elena Rojano
8
+ - Pedro Seoane
9
+ autorequire:
10
+ bindir: exe
11
+ cert_chain: []
12
+ date: 2016-01-24 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.10'
21
+ type: :development
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '1.10'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rake
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '10.0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '10.0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rspec
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: sqlite3
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ - !ruby/object:Gem::Dependency
71
+ name: rubyzip
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ description: 'AnNCR-SNP integrates data from various sources, allowing the user to
85
+ investigate the potential effects of variants in non-coding regions of the human
86
+ genome. AnNCR-SNP consists of a database containing data on all non-coding elements
87
+ and two main programs: manager and finder. The manager program is responsible for
88
+ creating the local data-base, and the finder program receives the user queries in
89
+ order to search in the local database and retrieve information. The user can find
90
+ information about various regu-latory elements, such as TFBs, open chromatin, histone
91
+ modification and methyla-tion sites, information about SNPs from dbSNP and gene
92
+ information from RefSeq.'
93
+ email:
94
+ - elenarojano@outlook.com
95
+ - seoanezonjic@hotmail.com
96
+ executables: []
97
+ extensions: []
98
+ extra_rdoc_files: []
99
+ files:
100
+ - ".gitignore"
101
+ - ".rspec"
102
+ - ".travis.yml"
103
+ - Gemfile
104
+ - LICENSE.txt
105
+ - README.md
106
+ - Rakefile
107
+ - anncrsnp.gemspec
108
+ - bin/console
109
+ - bin/grdbfinder.rb
110
+ - bin/grdbmanager.rb
111
+ - bin/masterfeatures.rb
112
+ - bin/setup
113
+ - bin/statistics.rb
114
+ - database/deleteme
115
+ - lib/anncrsnp.rb
116
+ - lib/anncrsnp/dataset.rb
117
+ - lib/anncrsnp/parsers/ucscparser.rb
118
+ - lib/anncrsnp/version.rb
119
+ homepage: ''
120
+ licenses:
121
+ - MIT
122
+ metadata: {}
123
+ post_install_message:
124
+ rdoc_options: []
125
+ require_paths:
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ required_rubygems_version: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - ">="
135
+ - !ruby/object:Gem::Version
136
+ version: '0'
137
+ requirements: []
138
+ rubyforge_project:
139
+ rubygems_version: 2.4.8
140
+ signing_key:
141
+ specification_version: 4
142
+ summary: Tool to characterize Single Nucleotide Polymorphisms (SNP) in genomic non-coding
143
+ regions.
144
+ test_files: []