cytogenetics 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,157 @@
1
+ require 'yaml'
2
+
3
+ module Cytogenetics
4
+ class Aberration
5
+
6
+ attr_accessor :breakpoints
7
+ attr_reader :abr, :ab_objs, :fragments
8
+
9
+ class<<self
10
+ def instantiate_aberrations
11
+ aberration_obj = {}
12
+ ChromosomeAberrations.constants.each do |ca|
13
+ abr_obj = ChromosomeAberrations.const_get(ca)
14
+ aberration_obj[abr_obj.type.to_sym] = abr_obj
15
+ end
16
+ return aberration_obj
17
+ end
18
+ end
19
+
20
+ def self.type
21
+ return @kt
22
+ end
23
+
24
+ def self.regex
25
+ return @rx
26
+ end
27
+
28
+ def self.all_regex
29
+ rx = {}
30
+ ChromosomeAberrations.constants.each do |ca|
31
+ ca_obj = ChromosomeAberrations.const_get(ca)
32
+ rx[ca_obj.type.to_sym] = ca_obj.regex
33
+ end
34
+ return rx
35
+ end
36
+
37
+ # instantiate these
38
+ def self.aberration_objs
39
+ @ab_objs ||= self.instantiate_aberrations
40
+ end
41
+
42
+ def self.aberration_type
43
+ abr_breaks = Aberration.all_regex.keys
44
+ abr_breaks.delete_if { |c| c.to_s.match(/gain|loss/) }
45
+ return abr_breaks
46
+ end
47
+
48
+ def self.classify_aberration(abr)
49
+ Aberration.all_regex.each_pair do |k, regex|
50
+ return k if abr.match(regex)
51
+ end
52
+ return "unk".to_sym
53
+ end
54
+
55
+ def initialize(str)
56
+ config_logging()
57
+
58
+ @abr = str
59
+ @breakpoints = []; @fragments = []
60
+
61
+ #regex = Aberration.regex[@type.to_sym]
62
+ # make sure it really is an inversion first
63
+ #raise KaryotypeError, "#{str} does not appear to be a #{self.class}" unless str.match(self.regex)
64
+ get_breakpoints() #(@abr)
65
+ @breakpoints.flatten!
66
+ end
67
+
68
+ def remove_breakpoint(bp)
69
+ removed = @breakpoints.index(bp)
70
+ @breakpoints.delete_at(removed) if removed
71
+ return removed
72
+ end
73
+
74
+ def to_s
75
+ "#{@abr}: #{@breakpoints.join(',')}"
76
+ end
77
+
78
+ :private
79
+
80
+ def get_breakpoints
81
+ chr_i = find_chr(@abr)
82
+ return if chr_i.nil?
83
+
84
+ band_i = find_bands(@abr, chr_i[:end_index])
85
+
86
+ unless band_i.nil? # breakpoints aren't added if there is no band information
87
+ chr_i[:chr].each_with_index do |c, i|
88
+ fragments = find_fragments(band_i[:bands][i])
89
+ fragments.each { |f| @breakpoints << Breakpoint.new(c, f, self.class.type) }
90
+ end
91
+ else
92
+ ## No band --> TODO add this as information somewhere but not as a breakpoint
93
+ #@breakpoints << Breakpoint.new(c, "", @type)
94
+ end
95
+ end
96
+
97
+ # Parsing aberration strings to pull out the chromosome and band definitions
98
+ # These will result in breakpoint information
99
+ def find_chr(str)
100
+ chr_s = str.index(/\(/, 0)
101
+ chr_e = str.index(/\)/, chr_s)
102
+ chrs = str[chr_s+1..chr_e-1].split(/;|:/)
103
+ chrs.each do |chr|
104
+ unless chr.match(/^\d+|X|Y$/)
105
+ log.warn("No chromosome defined from #{str}, skipped.")
106
+ return
107
+ end
108
+ end
109
+ return {:start_index => chr_s, :end_index => chr_e, :chr => chrs}
110
+ end
111
+
112
+ def find_bands(str, index)
113
+ band_info = nil
114
+ #raise KaryotypeError, "No bands defined in #{str}" if str.length.eql?(index+1)
115
+ if str.length.eql?(index+1)
116
+ log.warn("No bands defined in #{str}, skipped.")
117
+ return
118
+ end
119
+
120
+ ei = str.index(/\(/, index)
121
+ if str.match(/(q|p)(\d+|\?)/) and str[ei-1..ei].eql?(")(") # has bands and is not a translocation
122
+ band_s = str.index(/\(/, index)
123
+ band_e = str.index(/\)/, band_s)
124
+ band_e = str.length-1 if band_e.nil?
125
+ bands = str[band_s+1..band_e-1].split(/;|:/)
126
+
127
+ if str[band_s+1..band_e-1].match(/::/)
128
+ log.warn("Aberration defined using different language, not currently parsed skipping: #{@abr}")
129
+ return band_info
130
+ else
131
+ bands.map! { |b| b.sub(/-[q|p]\d+$/, "") } # sometimes bands are given a range, for our purposes we'll take the first one (CyDas appears to do this as well)
132
+ bands.each do |b|
133
+ unless b.match(/^[p|q]\d+(\.\d)?$/)
134
+ log.warn("Bands incorrectly defined in #{str}")
135
+ return band_info
136
+ end
137
+ end
138
+ band_info = {:start_index => band_s, :end_index => band_e, :bands => bands}
139
+ end
140
+ end
141
+ return band_info
142
+ end
143
+
144
+ # sometimes bands are defined for a single chr as p13q22
145
+ def find_fragments(str)
146
+ return str.scan(/([p|q]\d+)/).collect { |a| a[0] }
147
+ end
148
+
149
+ :private
150
+ def config_logging
151
+ @log = Cytogenetics.logger
152
+ #@log.progname = self.class.name
153
+ end
154
+
155
+ end
156
+
157
+ end
@@ -0,0 +1,37 @@
1
+ module Cytogenetics
2
+
3
+ class Breakpoint
4
+
5
+ attr_accessor :chr, :band, :type
6
+
7
+ def initialize(*args)
8
+ config_logging
9
+ c = args[0]; b = args[1]
10
+ @type = args[2] if args.length > 2
11
+
12
+ unless ((c.is_a? String and c.match(/\d+|X|Y/)) and (b.is_a? String and b.length > 0))
13
+ @log.error("#{c}#{b} is not a valid breakpoint")
14
+ raise ArgumentError, "#{c}#{b} is not a valid breakpoint"
15
+ end
16
+ @chr = c; @band = b
17
+ end
18
+
19
+ def arm
20
+ @band.match(/(q|p)\d+/)
21
+ return $1
22
+ end
23
+
24
+ def to_s
25
+ return "#{@chr}#{@band}"
26
+ end
27
+
28
+ :private
29
+
30
+ def config_logging
31
+ @log = Cytogenetics.logger
32
+ #@log.progname = self.class.name
33
+ end
34
+
35
+ end
36
+
37
+ end
@@ -0,0 +1,68 @@
1
+ require 'cytogenetics/utils/band_reader'
2
+
3
+ module Cytogenetics
4
+
5
+ class Chromosome
6
+ include BandReader
7
+
8
+ class<<self
9
+ attr_accessor :normal_bands
10
+ end
11
+
12
+ attr_reader :name, :aberrations
13
+
14
+ def initialize(*args)
15
+ config_logging()
16
+ chr = args[0]
17
+ chr = chr.to_s if chr.is_a?Fixnum
18
+
19
+ raise ArgumentError, "#{chr} is not a valid chromosome identifier." unless (chr.is_a? String and chr.match(/^\d+|X|Y$/))
20
+ @name = chr
21
+ @aberrations = []
22
+ @normal_bands = bands(@name, "resources/HsBands.txt") if (args.length > 1 and args[1].eql? true) ## TODO quit hardcoding
23
+ end
24
+
25
+ def to_s
26
+ "#{@name}"
27
+ end
28
+
29
+ def aberration(obj)
30
+ raise ArgumentError, "Not an Aberration object" unless obj.is_a? Aberration
31
+
32
+ #obj.breakpoints.each do |bp|
33
+ # log.warn("Band #{bp.to_s} doesn't exist. Removing.") if @normal_bands.index(bp.to_s).nil?
34
+ #end
35
+
36
+ ## TODO Deal with bands, HOWEVER because the chromosome has aberration objects breakpoints can include
37
+ ## bands for which no chromosome object is created
38
+
39
+ #obj.breakpoints.reject {|bp|
40
+ # @normal_bands.index(bp.to_s).nil?
41
+ #}
42
+
43
+ @aberrations << obj
44
+ end
45
+
46
+ def breakpoints
47
+ bps = []
48
+ @aberrations.each { |a| bps << a.breakpoints }
49
+ return bps
50
+ end
51
+
52
+ def fragments
53
+ frags = []
54
+ @aberrations.each do |a|
55
+ frags << a.fragments
56
+ end
57
+ frags
58
+ end
59
+
60
+ :private
61
+ def config_logging
62
+ @log = Cytogenetics.logger
63
+ #@log.progname = self.class.name
64
+ end
65
+
66
+ end
67
+
68
+ end
@@ -0,0 +1,188 @@
1
+ require 'cytogenetics/aberration'
2
+
3
+ module Cytogenetics
4
+
5
+ module ChromosomeAberrations
6
+
7
+ ## INVERSION
8
+ class Inversion < Aberration
9
+ @kt = 'inv'
10
+ @rx = /^inv\((\d+|X|Y)\)/
11
+ end
12
+
13
+ ## DUPLICATION
14
+ class Duplication < Aberration
15
+ @kt = 'dup'
16
+ @rx = /^dup\((\d+|X|Y)\)/
17
+ end
18
+
19
+ ## INSERTION
20
+ class Insertion < Aberration
21
+ @kt = 'ins'
22
+ @rx = /^ins\((\d+|X|Y)\)/
23
+ end
24
+
25
+ ## DELETION
26
+ class Deletion < Aberration
27
+ @kt = 'del'
28
+ @rx = /^del\((\d+|X|Y)\)/
29
+ end
30
+
31
+ ## ADD (addition of unknown material)
32
+ class Addition < Aberration
33
+ @kt = 'add'
34
+ @rx = /^add\((\d+|X|Y)\)/
35
+ end
36
+
37
+ ## ISOCHROMOSOME
38
+ class Isochromosome < Aberration
39
+ @kt = 'iso'
40
+ @rx = /^i\((\d+|X|Y)\)/
41
+ end
42
+
43
+ ## DICENTRIC
44
+ class DicentricChromosome < Aberration
45
+ @kt = 'dic'
46
+ @rx = /^dic\((\d+|X|Y)[;|:](\d+|X|Y)\)/
47
+
48
+ #def get_breakpoints
49
+ # chr_i = find_chr(@abr)
50
+ # band_i = find_bands(@abr, chr_i[:end_index])
51
+ # chr_i[:chr].each_with_index do |c, i|
52
+ # @breakpoints << Breakpoint.new(c, band_i[:bands][i], 'dic')
53
+ # end
54
+ # # TODO am not sure how the dic rearrangment works, see this in CyDas dic(13;13)(q14;q32)
55
+ # #@fragments << Fragment.new( Breakpoint.new(@breakpoints[0].chr, "pter"), @breakpoints[0])
56
+ # #@fragments << Fragment.new( @breakpoints[1], Breakpoint.new(@breakpoints[1].chr, "#{@breakpoints[1].arm}ter"))
57
+ #end
58
+
59
+ end
60
+
61
+ ## RING ## TODO figure out the right regex for this
62
+ #class RingChromosome < Aberration
63
+ # @kt = 'ring'
64
+ # @rx = /^r\(/
65
+ #end
66
+
67
+ ## ROBERTSONIAN
68
+ #class Robertsonian < Aberration
69
+ # @kt = 'rob'
70
+ # @rx = /^rob\(/
71
+ #end
72
+
73
+ ## DERIVATIVE
74
+ class Derivative < Aberration
75
+ @kt = 'der'
76
+ @rx = /^der\((\d+|X|Y)\)/
77
+
78
+ def get_breakpoints
79
+ @aberrations = []
80
+
81
+ ab_objs = Aberration.aberration_objs
82
+
83
+ chr_i = find_chr(@abr)
84
+ derivative_abr = @abr[chr_i[:end_index]+1..@abr.length]
85
+
86
+ # separate different abnormalities within the derivative chromosome and clean it up to make it parseable
87
+ abnormalities = derivative_abr.scan(/([^\(\)]+\(([^\(\)]|\)\()*\))/).collect { |a| a[0] }
88
+
89
+ trans_bps = []
90
+ abnormalities.each do |abn|
91
+ abrclass = Aberration.classify_aberration(abn)
92
+
93
+ if abrclass.to_s.eql? 'unk' # not dealing with unknowns
94
+ log.warn("Cannot handle #{abn}, incorrect format.")
95
+ next
96
+ end
97
+
98
+ # special handling because translocations are written as a sliding window
99
+ # translocations should also only every have 2 breakpoints...
100
+ if abrclass.to_s.eql? ChromosomeAberrations::Translocation.type
101
+ trans = ChromosomeAberrations::Translocation.new(abn)
102
+ trans_bps << trans.breakpoints
103
+ @breakpoints << trans.breakpoints
104
+ else
105
+ ab_obj = ab_objs[abrclass].new(abn)
106
+ if ab_obj.breakpoints.length > 0
107
+ @aberrations << ab_obj
108
+ @breakpoints << ab_obj.breakpoints
109
+ end
110
+ end
111
+ end
112
+ trans_bps.delete_if { |c| c.empty? }
113
+ add_fragments(trans_bps.flatten!) if trans_bps.length > 0
114
+ end
115
+
116
+ :private
117
+ # have to reorder the array and then turn Breakpoints into fragments
118
+ def add_fragments(tbp_list)
119
+ sorted = []
120
+ tbp_list.each_with_index do |e, i|
121
+ if i <= 1
122
+ sorted << Breakpoint.new(e.chr, "#{e.arm}ter") if i.eql? 0
123
+ sorted << e
124
+ elsif i%2 == 0
125
+ sorted << tbp_list[i+1]
126
+ sorted << tbp_list[i]
127
+ end
128
+ end
129
+ sorted << Breakpoint.new(sorted[-1].chr, "#{sorted[-1].arm}ter")
130
+ sorted.each_slice(2).to_a.each do |pair|
131
+ @fragments << Fragment.new(pair[0], pair[1])
132
+ end
133
+ end
134
+
135
+ end
136
+
137
+ ## TRANSLOCATION ... this is typically a subset of Derivative chromosomes, but have seen it on it's own
138
+ class Translocation < Aberration
139
+ @kt = 'trans'
140
+ @rx = /^t\((\d+|X|Y)[;|:](\d+|X|Y)\)/
141
+
142
+ ## TWo ways of defining translocations:
143
+ ## 1) t(1;3)(p31;p13)
144
+ def get_breakpoints
145
+ chr_i = find_chr(@abr)
146
+ band_i = find_bands(@abr, chr_i[:end_index])
147
+ unless band_i
148
+ log.warn("No bands defined in #{@abr}")
149
+ else
150
+ chr_i[:chr].each_with_index do |c, i|
151
+ @breakpoints << Breakpoint.new(c, band_i[:bands][i], 'trans')
152
+ end
153
+ end
154
+ end
155
+
156
+ end
157
+
158
+ ## FRAGMENT
159
+ class ChromosomeFragment < Aberration
160
+ @kt = 'frag'
161
+ @rx = /^frag\((\d+|X|Y)\)/
162
+ end
163
+
164
+ ## CHROMOSOME GAIN
165
+ class ChromosomeGain < Aberration
166
+ @kt = 'gain'
167
+ @rx = /^\+(\d+|X|Y)$/
168
+
169
+ def initialize
170
+ config_logging()
171
+ @abr = str.sub("+", "")
172
+ @breakpoints = []
173
+ end
174
+ end
175
+
176
+ ## CHROMOSOME LOSS
177
+ class ChromosomeLoss < Aberration
178
+ @kt = 'loss'
179
+ @rx = /^-(\d+|X|Y)$/
180
+
181
+ def initialize
182
+ config_logging()
183
+ @abr = str.sub("-", "")
184
+ @breakpoints = []
185
+ end
186
+ end
187
+ end
188
+ end
@@ -0,0 +1,45 @@
1
+
2
+ module Cytogenetics
3
+
4
+ class Fragment
5
+ attr_reader :chr, :start, :end, :genes
6
+
7
+ def initialize(*args)
8
+ config_logging()
9
+ unless (args.length.eql? 2 and (args[0].is_a? Breakpoint and args[1].is_a? Breakpoint))
10
+ raise ArgumentError, "Expected arguments are missing or are not Breakpoints: #{args}"
11
+ end
12
+
13
+ #@genes = []
14
+ @start = args[0]
15
+ @end = args[1]
16
+ @chr = @start.chr
17
+
18
+ unless @start.chr.eql? @end.chr
19
+ raise GenomeStructureError, "Fragments must be within the same chromosome: #{args}"
20
+ end
21
+ end
22
+
23
+ def add_gene(gene)
24
+ @genes << gene
25
+ end
26
+
27
+ def to_s
28
+ return "#{@start.to_s} --> #{@end.to_s}"
29
+ end
30
+
31
+ ## TODO this will require length in basepairs of each band
32
+ #def length
33
+ #
34
+ #end
35
+
36
+ :private
37
+
38
+ def config_logging
39
+ @log = Cytogenetics.logger
40
+ #@log.progname = self.class.name
41
+ end
42
+
43
+ end
44
+ end
45
+
@@ -0,0 +1,172 @@
1
+ require 'yaml'
2
+
3
+ require 'cytogenetics/utils/karyotype_reader'
4
+
5
+ module Cytogenetics
6
+
7
+ class Karyotype
8
+
9
+ @@haploid = 23
10
+
11
+ attr_reader :aberrations, :karyotype, :ploidy, :sex, :abnormal_chr, :normal_chr
12
+
13
+ class<<self
14
+ attr_accessor :aberration_objs, :unclear_aberrations, :log
15
+ end
16
+
17
+ def initialize(karyotype_str)
18
+ config_logging()
19
+ raise ArgumentError, "#{karyotype_str} is not a karyotype." unless (karyotype_str.is_a? String and karyotype_str.length > 1)
20
+ @log.info("Reading karyotype #{karyotype_str}")
21
+
22
+ @karyotype = karyotype_str.gsub(/\s/, "")
23
+ @normal_chr = {}; @abnormal_chr = {}; @aberrations = {}; @unclear_aberrations = [];
24
+ setup_abberation_objs()
25
+ prep_karyotype()
26
+ handle_ploidy_diff()
27
+ analyze()
28
+ end
29
+
30
+ def analyze
31
+ Aberration.aberration_type.each do |abr_type|
32
+ next unless @aberrations.has_key? abr_type
33
+ regex = @aberration_obj[abr_type].regex
34
+
35
+ @aberrations[abr_type].each do |abr|
36
+ # if abr_type
37
+ abr.match(regex)
38
+ @log.warn("Aberration has two chromosomes #{abr} but only the first one is handled.") unless ($2.nil? or $1.eql? $2)
39
+
40
+ ## TODO deal with the case of 2 chromosomes defined in the aberration
41
+ chr = Chromosome.new($1, true)
42
+ chr.aberration(@aberration_obj[abr_type].new(abr))
43
+
44
+ @abnormal_chr[chr.name] = [] unless @abnormal_chr.has_key? chr.name
45
+ @abnormal_chr[chr.name] << chr
46
+ end
47
+ end
48
+ end
49
+
50
+ # get breakpoints for the karyotype
51
+ def report_breakpoints
52
+ bps = Array.new
53
+ @abnormal_chr.each_pair do |c, chr_list|
54
+ chr_list.each do |chr|
55
+ bps << chr.breakpoints
56
+ end
57
+ end
58
+ bps.delete_if { |c| c.empty? }
59
+ bps.flatten!
60
+ return bps
61
+ end
62
+
63
+ def report_fragments
64
+ frags = []
65
+ @abnormal_chr.each_pair do |c, chr_list|
66
+ chr_list.each do |chr|
67
+ frags << chr.fragments
68
+ end
69
+ end
70
+ frags.delete_if { |c| c.empty? }
71
+ frags.flatten!
72
+ return frags
73
+ end
74
+
75
+ def report_ploidy_change
76
+ pd = []
77
+ pd << @aberrations[:loss].map { |e| "-#{e}" } if @aberrations[:loss]
78
+ pd << @aberrations[:gain].map { |e| "+#{e}" } if @aberrations[:gain]
79
+ pd.flatten!
80
+ return pd
81
+ end
82
+
83
+ def summarize
84
+ summary = "NORMAL CHROMOSOMES\n"
85
+ @normal_chr.each_pair do |chr, count|
86
+ summary = "#{summary} #{chr}: #{count}\n"
87
+ end
88
+
89
+ summary = "#{summary}\nABNORMAL:"
90
+ @abnormal_chr.each_pair do |chr, list|
91
+ summary = "#{summary}\n#{chr}"
92
+ list.each do |c|
93
+ summary = "#{summary}\n#{c.aberrations}\n"
94
+ summary = "#{summary}\n#{c.breakpoints}\n"
95
+ end
96
+ end
97
+ end
98
+
99
+ # -------------------- # PRIVATE # -------------------- #
100
+ :private
101
+ def config_logging
102
+ @log = Cytogenetics.logger
103
+ #@log.progname = self.class.name
104
+ end
105
+
106
+
107
+ def setup_abberation_objs
108
+ @aberration_obj = Aberration.aberration_objs
109
+ end
110
+
111
+
112
+ def handle_ploidy_diff
113
+ @aberrations[:loss].each { |c| @normal_chr[c] -= 1 } if @aberrations[:loss]
114
+ @aberrations[:gain].each { |c| @normal_chr[c] += 1 } if @aberrations[:gain]
115
+ end
116
+
117
+ # determine ploidy & gender, clean up each aberration and drop any "unknown"
118
+ def prep_karyotype
119
+ @karyotype.gsub!(/\s/, "")
120
+ clones = @karyotype.scan(/(\[\d+\])/).collect { |a| a[0] }
121
+ @log.warn("Karyotype is a collection of clones, analysis may be inaccurate.") if clones.length > 3
122
+
123
+ @karyotype.gsub!(/\[\d+\]/, "") # don't care about numbers of cells: [5]
124
+
125
+ (pl, sc) = @karyotype.split(",")[0..1]
126
+ if (pl and sc)
127
+ @ploidy = KaryotypeReader.calculate_ploidy(pl, @@haploid)
128
+ sex_chr = KaryotypeReader.determine_sex(sc)
129
+ else
130
+ raise KaryotypeError, "'#{@karyotype}' is not a valid karyotype. Ploidy and sex defnitions are absent"
131
+ end
132
+
133
+ st = sex_chr.values.inject { |sum, v| sum+v }
134
+ @sex = nil
135
+ karyotype_index = 1 # sometimes the sex is not indicated and there's no case information to figure it out
136
+ if st > 0
137
+ @sex = sex_chr.keys.join("")
138
+ karyotype_index = 2
139
+ end
140
+
141
+ (Array(1..23)).each { |c| @normal_chr[c.to_s] = @ploidy.to_i }
142
+
143
+ sex_chr.each_pair { |c, p| @normal_chr[c] = p.to_i }
144
+
145
+ # deal with the most common karyotype string inconsistencies
146
+ cleaned_karyotype = []
147
+
148
+ @karyotype.split(",")[karyotype_index..-1].each do |abr|
149
+ cleaned_karyotype |= [cleaned_karyotype, KaryotypeReader.cleanup(abr)].flatten
150
+ end
151
+ @karyotype = cleaned_karyotype
152
+
153
+ # classify each type of aberration in the karyotype
154
+ @karyotype.each do |k|
155
+ abrclass = Aberration.classify_aberration(k)
156
+ @aberrations[abrclass] = [] unless @aberrations.has_key? abrclass
157
+ @aberrations[abrclass] << k.sub(/^(\+|-)?/, "")
158
+ end
159
+
160
+ @aberrations.each_pair do |abrclass, abrlist|
161
+ next if (abrclass.eql? ChromosomeAberrations::ChromosomeGain.type or abrclass.eql? ChromosomeAberrations::ChromosomeLoss.type)
162
+ # aberrations other than chromosome gains/losses should be uniquely represented
163
+
164
+ counts = abrlist.inject(Hash.new(0)) { |h, i| h[i] += 1; h }
165
+ counts.each_pair { |k, v| @log.warn("#{k} was seen multiple times. Analyzed only once.") if v > 1 }
166
+
167
+ @aberrations[abrclass] = abrlist.uniq
168
+ end
169
+
170
+ end
171
+ end
172
+ end
@@ -0,0 +1,4 @@
1
+ module Cytogenetics
2
+ class KaryotypeError < StandardError
3
+ end
4
+ end
@@ -0,0 +1,30 @@
1
+
2
+ module Cytogenetics
3
+
4
+ module BandReader
5
+
6
+ @bands_by_chr = {}
7
+
8
+ def bands(chr, file)
9
+ file = File.open(file, 'r') unless file.is_a? File
10
+ bands = read_file(file)
11
+ bds = bands[chr]
12
+ bds.uniq!
13
+ return bds
14
+ end
15
+
16
+ def read_file(file)
17
+ band_by_chr = {}
18
+ file.each_line do |line|
19
+ line.chomp!
20
+ line.match(/^(\d+|X|Y)([p|q].*)/)
21
+ c = $1; b = $2
22
+ band_by_chr[c] = Array.new unless band_by_chr.has_key? c
23
+ band_by_chr[c] << "#{c}#{b}"
24
+ band_by_chr[c] << "#{c}#{$1}" if b.match(/([p|q]\d+)\.\d+/)
25
+ end
26
+ return band_by_chr
27
+ end
28
+ end
29
+
30
+ end
@@ -0,0 +1,121 @@
1
+ require 'cytogenetics/karyotype_error'
2
+
3
+ module Cytogenetics
4
+ class KaryotypeReader
5
+
6
+ def self.config_logging
7
+ @log = Cytogenetics.logger
8
+ #@log.progname = self.name
9
+ end
10
+
11
+
12
+ def self.cleanup(abr)
13
+ config_logging
14
+
15
+ new_abr = []
16
+
17
+ # +t(13;X)(q13;p12) doesn't need a +
18
+ abr.sub!(/^[\+|-]/, "") unless abr.match(/^[\+|-][\d|X|Y]+$/)
19
+
20
+ # not going to bother with aberrations that are unclear/unknown '?' or with '**'
21
+ if (abr.match(/\?|\*\*/))
22
+ @log.warn("Removing aberration with unknown/unclear information: #{abr}")
23
+ return new_abr
24
+ end
25
+
26
+ # 13x2 is normal, 13x3 is a duplicate and should read +13
27
+ if abr.match(/^([\d+|X|Y]+)x(\d+)/)
28
+ chr = $1; dups = $2.to_i
29
+ if dups.eql? 0 # managed to lose both chromosomes in a diploidy karyotype
30
+ (Array(1..dups)).map { new_abr.push("-#{chr}") }
31
+ elsif dups > 2 # sometimes you have 13x3, really just means 1 additional chr 13 since normal ploidy is 2
32
+ dups -= 2
33
+ (Array(1..dups)).map { new_abr.push("+#{chr}") }
34
+ elsif dups.eql?(1)
35
+ new_abr.push("-#{chr}")
36
+ end
37
+ # add(9)(p21)x2 or add(7)x2 should indicate that this "additional material of unk origin" happened twice
38
+ elsif abr.match(/(.*)x(\d+)$/)
39
+ a = $1; dups = $2.to_i
40
+ (Array(1..dups)).map { new_abr.push(a) }
41
+ # del(7) should be -7 but not del(7)(q12)
42
+ else # everything else
43
+ new_abr.push(abr)
44
+ end
45
+
46
+ return new_abr
47
+ end
48
+
49
+ def self.determine_sex(str)
50
+ config_logging
51
+
52
+ sex_chr = {}
53
+ ['X', 'Y'].each { |c| sex_chr[c] = 0 }
54
+
55
+ unless str.match(/^(X|Y)+$/)
56
+ @log.warn("Definition of gender incorrect (#{str})")
57
+ else
58
+ #raise KaryotypeError, "Definition of gender incorrect (#{str})" unless str.match(/^(X|Y)+$/)
59
+ # ploidy number makes no difference since this string will tell us how many or at least what the gender should be
60
+
61
+ chrs = str.match(/([X|Y]+)/).to_s.split(//)
62
+ chrs.each { |c| sex_chr[c] +=1 }
63
+
64
+ # assume this was an XY karyotype that may have lost the Y, have only seen this in
65
+ # severely affected karyotypes NOT TRUE, some karyotypes are just not defined correctly
66
+ # often XX -X is listed as X,... Cannot assume it's a male missing Y
67
+ #sex_chr['Y'] += 1 if (chrs.length.eql?(1) and chrs[0].eql?('X'))
68
+ end
69
+
70
+ return sex_chr
71
+ end
72
+
73
+ def self.calculate_ploidy(str, haploid)
74
+ config_logging
75
+
76
+ str.sub!(/<.{2,}>/, "")
77
+ str = $1 if str.match(/\d+\((\d+-\d+)\)/)
78
+
79
+ diploid = haploid*2
80
+ triploid = haploid*3
81
+ quadraploid = haploid*4
82
+
83
+ # typically see di- tri- quad- if more than that it should be noted
84
+ ploidy = nil
85
+ min = diploid
86
+ max = diploid
87
+ #if str.match(/<\+(\d)n>/) # sometimes see this odd configuration: 46<+3n>
88
+ # ploidy = $1
89
+ if str.match(/(\d+)[-|~](\d+)/) # num and range or just range: 46-53
90
+ min = $1.to_i; max = $2.to_i
91
+ elsif str.match(/^(\d+)/) # single num: 72
92
+ min = $1.to_i; max = $1.to_i
93
+ end
94
+
95
+ if min < haploid
96
+ @log.warn("Ploidy determination may be bad as the min was less than haploid (#{str}). Setting to haploid.")
97
+ min = haploid
98
+ end
99
+
100
+ if ploidy.nil?
101
+ case
102
+ when (min.eql? diploid and max.eql? diploid)
103
+ @log.debug("Normal ploidy: #{str}")
104
+ ploidy = 2
105
+ when ((min >= haploid and max <= diploid) or (min <= diploid and max < triploid))
106
+ @log.debug("Relatively normal ploidy #{str}")
107
+ ploidy = 2
108
+ when (min >= haploid and max < quadraploid)
109
+ @log.debug("Triploid #{str}")
110
+ ploidy = 3
111
+ when (max >= quadraploid)
112
+ @log.debug("Quadraploid #{str}")
113
+ ploidy = 4
114
+ else
115
+ raise KaryotypeError, "Failed to determine ploidy for #{str}"
116
+ end
117
+ end
118
+ return ploidy
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,3 @@
1
+ module Cytogenetics
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,36 @@
1
+ require 'cytogenetics/aberration'
2
+ require 'cytogenetics/breakpoint'
3
+ require 'cytogenetics/chromosome'
4
+ require 'cytogenetics/chromosome_aberrations'
5
+ require 'cytogenetics/fragment'
6
+ require 'cytogenetics/karyotype'
7
+ require 'cytogenetics/karyotype_error'
8
+
9
+
10
+ require 'cytogenetics/utils/karyotype_reader'
11
+ require 'cytogenetics/utils/band_reader'
12
+
13
+ require 'yaml'
14
+ require 'logger'
15
+
16
+ module Cytogenetics
17
+
18
+ class << self
19
+ def logger=(log)
20
+ @clog = log
21
+ end
22
+
23
+ def logger
24
+ unless @clog
25
+ @clog = Logger.new(STDOUT)
26
+ @clog.level = Logger::FATAL
27
+ end
28
+ @clog
29
+ end
30
+ end
31
+
32
+ def self.karyotype(kary_str)
33
+ return Karyotype.new(kary_str)
34
+ end
35
+
36
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cytogenetics
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Sarah Killcoyne
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-11-26 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Karyotype parser based on ISCN specification. Note that there are still
15
+ many bugs. The ISCN language is poorly followed by most users so the parser is still
16
+ being developed.
17
+ email: sarah.killcoyne@uni.lu
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - lib/cytogenetics/aberration.rb
23
+ - lib/cytogenetics/breakpoint.rb
24
+ - lib/cytogenetics/chromosome.rb
25
+ - lib/cytogenetics/chromosome_aberrations.rb
26
+ - lib/cytogenetics/fragment.rb
27
+ - lib/cytogenetics/karyotype.rb
28
+ - lib/cytogenetics/karyotype_error.rb
29
+ - lib/cytogenetics/utils/band_reader.rb
30
+ - lib/cytogenetics/utils/karyotype_reader.rb
31
+ - lib/cytogenetics/version.rb
32
+ - lib/cytogenetics.rb
33
+ homepage:
34
+ licenses:
35
+ - http://www.apache.org/licenses/LICENSE-2.0.html
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 1.8.24
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: Karyotype parser based on ISCN specification.
58
+ test_files: []