anncrsnp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,178 @@
1
+ CHR = 0
2
+ START = 1
3
+ ENDING = 2
4
+ ID = 3
5
+
6
+ class Dataset
7
+ def initialize(header = [])
8
+ @all_record = []
9
+ @metadata = {}
10
+ add_metadata(:header, [:chr, :start, :ending, :id].concat(header))
11
+ end
12
+
13
+ def add_record(fields_array) # Fixed col => 0 -> chr, 1 -> start, 2 -> end, 3 -> id
14
+ fields_array[START] = fields_array[START].to_i
15
+ fields_array[ENDING] = fields_array[ENDING].to_i
16
+ @all_record << fields_array
17
+ end
18
+
19
+ def add_record_batch(records_array)
20
+ records_array.each do |record|
21
+ add_record(record)
22
+ end
23
+ end
24
+
25
+ def get_elements_on_position(chr, start, ending)
26
+ results = []
27
+ @all_record.each do |record|
28
+ if record[CHR] == chr
29
+ if (record[START] >= start && record[START] <= ending) || (record[ENDING] >= start && record[ENDING] <= ending)
30
+ results << record
31
+ end
32
+ end
33
+ end
34
+ return results
35
+ end
36
+
37
+ def each_record
38
+ @all_record.each do |record|
39
+ yield(record)
40
+ end
41
+ end
42
+
43
+ def length
44
+ return @all_record.length
45
+ end
46
+
47
+ def first
48
+ return @all_record.first
49
+ end
50
+
51
+ def last
52
+ return @all_record.last
53
+ end
54
+
55
+ def numeric_filter(col_name, thresold)
56
+ index = get_metadata(:header).index(col_name)
57
+ @all_record.select!{|rec| rec[index].to_f >= thresold }
58
+ end
59
+
60
+ def drop_columns(col_names)
61
+ drop_index = col_names.map{|name| get_metadata(:header).index(name)}
62
+ each_record do |rec|
63
+ drop_index.reverse_each do |ind|
64
+ rec.delete_at(ind)
65
+ end
66
+ end
67
+ add_metadata(:header, get_metadata(:header) - col_names)
68
+ end
69
+
70
+ def extract_records_by_coordinates(dataset, results_column_name)
71
+ added_fields = get_metadata(:added_fields)
72
+ if added_fields
73
+ added_fields[results_column_name] = dataset.get_metadata(:header)
74
+ else
75
+ added_fields = {results_column_name => dataset.get_metadata(:header)}
76
+ end
77
+ add_metadata(:added_fields, added_fields)
78
+ add_metadata(:header, get_metadata(:header).concat([results_column_name])) # Add new column to dataset
79
+ each_record{|record|
80
+
81
+ record << dataset.get_elements_on_position(record[CHR], record[START], record[ENDING])
82
+ }
83
+ end
84
+
85
+ def empty?
86
+ return @all_record.empty?
87
+ end
88
+
89
+ def get_metadata(keyword)
90
+ return @metadata[keyword]
91
+ end
92
+
93
+ def add_metadata(keyword, value)
94
+ return @metadata[keyword] = value
95
+ end
96
+
97
+ def write(output_path, output_format)
98
+ path = "#{output_path}.#{output_format}"
99
+ if output_format == 'html'
100
+ write_html(path)
101
+ end
102
+ end
103
+ def write_html(path)
104
+ output_file = File.open(path, 'w')
105
+ # Header
106
+ output_file.puts "<html>",
107
+ "<header>",
108
+ "</header>",
109
+ "<body>"
110
+
111
+ #Table
112
+ output_file.puts '<table border=1>'
113
+ output_file.puts write_html_table_header
114
+ each_record{|record|
115
+ output_file.puts create_record_template(record)
116
+ }
117
+ output_file.puts "</table>"
118
+
119
+ # Footer
120
+ output_file.puts "</body>",
121
+ "</html>"
122
+
123
+ output_file.close
124
+ end
125
+
126
+ def create_record_template(record)
127
+ record_template = []
128
+ record_lentgh = record.select{|field| field.class == Array }.map{|field| field.length}.max
129
+ record_lentgh = 1 if record_lentgh.nil? || record_lentgh == 0
130
+ record_lentgh.times do
131
+ record_template << Array.new(@fields_number){" "}
132
+ end
133
+ field_position = 0
134
+ record.each_with_index do |field, column_number|
135
+ if field.class != Array
136
+ record_template[0][field_position] = field
137
+ field_position += 1
138
+ else
139
+ field.each_with_index do |added_record, raw|
140
+ added_record.each_with_index do |record_field, col|
141
+ record_template[raw][col+field_position+1] = record_field
142
+ end
143
+ end
144
+
145
+ field_position += @html_header[column_number- @html_header.first.last.length+1].last.length
146
+ #field_position += @html_header[get_metadata(:header)[column_number]].length
147
+ end
148
+ end
149
+ record_html = ""
150
+ record_template.each do |row|
151
+ record_html << "<tr><td>#{row.join("</td><td>")}</td></tr>\n"
152
+ end
153
+ return record_html
154
+ end
155
+
156
+ def write_html_table_header
157
+ @html_header = []
158
+ added_fields = get_metadata(:added_fields)
159
+ @html_header << [ get_metadata(:classification) , get_metadata(:header) - added_fields.keys]
160
+ puts @html_header.inspect
161
+ added_fields.each do |classification, fields|
162
+ @html_header << [classification, fields]
163
+ end
164
+
165
+ puts @html_header.inspect
166
+
167
+ main_header = ""
168
+ fields_header = ""
169
+ @fields_number = 0
170
+ @html_header.each do |classification, fields|
171
+ main_header << "<td colspan=\"#{fields.length}\">#{classification.to_s}"
172
+ fields_header << "<td>#{fields.map{|h| h.to_s}.join("</td><td>")}</td>"
173
+ @fields_number += fields.length
174
+ end
175
+
176
+ return "<tr>#{main_header}</tr>\n<tr>#{fields_header}</tr>"
177
+ end
178
+ end
@@ -0,0 +1,35 @@
1
+ require 'dataset'
2
+
3
+ def parseUCSCformat(file, header, skip_first_col = TRUE)
4
+ dataset = Dataset.new(header)
5
+ File.open(file).each do |line|
6
+ line.chomp!
7
+ fields = line.split("\t")
8
+ bin_signal = fields.shift if skip_first_col
9
+ dataset.add_record(fields)
10
+ end
11
+ return dataset
12
+ end
13
+
14
+ def parseUCSCrefseqformat(file, header, skip_first_col = TRUE)
15
+ dataset = Dataset.new(header)
16
+ File.open(file).each do |line|
17
+ line.chomp!
18
+ fields = line.split("\t")
19
+ bin_signal = fields.shift if skip_first_col
20
+ fields = [fields[1], fields[3], fields[4], fields[11], fields[0], fields[2], fields[5], fields[6], fields[7], fields[8], fields[9], fields[10], fields[12], fields[13], fields[14]]
21
+ dataset.add_record(fields)
22
+ end
23
+ return dataset
24
+ end
25
+
26
+ def parseDENdbCSVformat(file, header)
27
+ dataset = Dataset.new(header)
28
+ File.open(file).each do |line|
29
+ line.chomp!
30
+ fields = line.split(",")
31
+ fields = [fields[1], fields[2], fields[3], fields[0]].concat(fields[4..11])
32
+ dataset.add_record(fields)
33
+ end
34
+ return dataset
35
+ end
@@ -0,0 +1,3 @@
1
+ module Anncrsnp
2
+ VERSION = "0.1.0"
3
+ end
data/lib/anncrsnp.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "anncrsnp/version"
2
+
3
+ module Anncrsnp
4
+ # Your code goes here...
5
+ end
metadata ADDED
@@ -0,0 +1,144 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: anncrsnp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elena Rojano
8
+ - Pedro Seoane
9
+ autorequire:
10
+ bindir: exe
11
+ cert_chain: []
12
+ date: 2016-01-24 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.10'
21
+ type: :development
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '1.10'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rake
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '10.0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '10.0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rspec
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: sqlite3
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ - !ruby/object:Gem::Dependency
71
+ name: rubyzip
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ description: 'AnNCR-SNP integrates data from various sources, allowing the user to
85
+ investigate the potential effects of variants in non-coding regions of the human
86
+ genome. AnNCR-SNP consists of a database containing data on all non-coding elements
87
+ and two main programs: manager and finder. The manager program is responsible for
88
+ creating the local data-base, and the finder program receives the user queries in
89
+ order to search in the local database and retrieve information. The user can find
90
+ information about various regu-latory elements, such as TFBs, open chromatin, histone
91
+ modification and methyla-tion sites, information about SNPs from dbSNP and gene
92
+ information from RefSeq.'
93
+ email:
94
+ - elenarojano@outlook.com
95
+ - seoanezonjic@hotmail.com
96
+ executables: []
97
+ extensions: []
98
+ extra_rdoc_files: []
99
+ files:
100
+ - ".gitignore"
101
+ - ".rspec"
102
+ - ".travis.yml"
103
+ - Gemfile
104
+ - LICENSE.txt
105
+ - README.md
106
+ - Rakefile
107
+ - anncrsnp.gemspec
108
+ - bin/console
109
+ - bin/grdbfinder.rb
110
+ - bin/grdbmanager.rb
111
+ - bin/masterfeatures.rb
112
+ - bin/setup
113
+ - bin/statistics.rb
114
+ - database/deleteme
115
+ - lib/anncrsnp.rb
116
+ - lib/anncrsnp/dataset.rb
117
+ - lib/anncrsnp/parsers/ucscparser.rb
118
+ - lib/anncrsnp/version.rb
119
+ homepage: ''
120
+ licenses:
121
+ - MIT
122
+ metadata: {}
123
+ post_install_message:
124
+ rdoc_options: []
125
+ require_paths:
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ required_rubygems_version: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - ">="
135
+ - !ruby/object:Gem::Version
136
+ version: '0'
137
+ requirements: []
138
+ rubyforge_project:
139
+ rubygems_version: 2.4.8
140
+ signing_key:
141
+ specification_version: 4
142
+ summary: Tool to characterize Single Nucleotide Polymorphisms (SNP) in genomic non-coding
143
+ regions.
144
+ test_files: []