cytogenetics 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,157 @@
1
+ require 'yaml'
2
+
3
+ module Cytogenetics
4
+ class Aberration
5
+
6
+ attr_accessor :breakpoints
7
+ attr_reader :abr, :ab_objs, :fragments
8
+
9
+ class<<self
10
+ def instantiate_aberrations
11
+ aberration_obj = {}
12
+ ChromosomeAberrations.constants.each do |ca|
13
+ abr_obj = ChromosomeAberrations.const_get(ca)
14
+ aberration_obj[abr_obj.type.to_sym] = abr_obj
15
+ end
16
+ return aberration_obj
17
+ end
18
+ end
19
+
20
+ def self.type
21
+ return @kt
22
+ end
23
+
24
+ def self.regex
25
+ return @rx
26
+ end
27
+
28
+ def self.all_regex
29
+ rx = {}
30
+ ChromosomeAberrations.constants.each do |ca|
31
+ ca_obj = ChromosomeAberrations.const_get(ca)
32
+ rx[ca_obj.type.to_sym] = ca_obj.regex
33
+ end
34
+ return rx
35
+ end
36
+
37
+ # instantiate these
38
+ def self.aberration_objs
39
+ @ab_objs ||= self.instantiate_aberrations
40
+ end
41
+
42
+ def self.aberration_type
43
+ abr_breaks = Aberration.all_regex.keys
44
+ abr_breaks.delete_if { |c| c.to_s.match(/gain|loss/) }
45
+ return abr_breaks
46
+ end
47
+
48
+ def self.classify_aberration(abr)
49
+ Aberration.all_regex.each_pair do |k, regex|
50
+ return k if abr.match(regex)
51
+ end
52
+ return "unk".to_sym
53
+ end
54
+
55
+ def initialize(str)
56
+ config_logging()
57
+
58
+ @abr = str
59
+ @breakpoints = []; @fragments = []
60
+
61
+ #regex = Aberration.regex[@type.to_sym]
62
+ # make sure it really is an inversion first
63
+ #raise KaryotypeError, "#{str} does not appear to be a #{self.class}" unless str.match(self.regex)
64
+ get_breakpoints() #(@abr)
65
+ @breakpoints.flatten!
66
+ end
67
+
68
+ def remove_breakpoint(bp)
69
+ removed = @breakpoints.index(bp)
70
+ @breakpoints.delete_at(removed) if removed
71
+ return removed
72
+ end
73
+
74
+ def to_s
75
+ "#{@abr}: #{@breakpoints.join(',')}"
76
+ end
77
+
78
+ :private
79
+
80
+ def get_breakpoints
81
+ chr_i = find_chr(@abr)
82
+ return if chr_i.nil?
83
+
84
+ band_i = find_bands(@abr, chr_i[:end_index])
85
+
86
+ unless band_i.nil? # breakpoints aren't added if there is no band information
87
+ chr_i[:chr].each_with_index do |c, i|
88
+ fragments = find_fragments(band_i[:bands][i])
89
+ fragments.each { |f| @breakpoints << Breakpoint.new(c, f, self.class.type) }
90
+ end
91
+ else
92
+ ## No band --> TODO add this as information somewhere but not as a breakpoint
93
+ #@breakpoints << Breakpoint.new(c, "", @type)
94
+ end
95
+ end
96
+
97
+ # Parsing aberration strings to pull out the chromosome and band definitions
98
+ # These will result in breakpoint information
99
+ def find_chr(str)
100
+ chr_s = str.index(/\(/, 0)
101
+ chr_e = str.index(/\)/, chr_s)
102
+ chrs = str[chr_s+1..chr_e-1].split(/;|:/)
103
+ chrs.each do |chr|
104
+ unless chr.match(/^\d+|X|Y$/)
105
+ log.warn("No chromosome defined from #{str}, skipped.")
106
+ return
107
+ end
108
+ end
109
+ return {:start_index => chr_s, :end_index => chr_e, :chr => chrs}
110
+ end
111
+
112
+ def find_bands(str, index)
113
+ band_info = nil
114
+ #raise KaryotypeError, "No bands defined in #{str}" if str.length.eql?(index+1)
115
+ if str.length.eql?(index+1)
116
+ log.warn("No bands defined in #{str}, skipped.")
117
+ return
118
+ end
119
+
120
+ ei = str.index(/\(/, index)
121
+ if str.match(/(q|p)(\d+|\?)/) and str[ei-1..ei].eql?(")(") # has bands and is not a translocation
122
+ band_s = str.index(/\(/, index)
123
+ band_e = str.index(/\)/, band_s)
124
+ band_e = str.length-1 if band_e.nil?
125
+ bands = str[band_s+1..band_e-1].split(/;|:/)
126
+
127
+ if str[band_s+1..band_e-1].match(/::/)
128
+ log.warn("Aberration defined using different language, not currently parsed skipping: #{@abr}")
129
+ return band_info
130
+ else
131
+ bands.map! { |b| b.sub(/-[q|p]\d+$/, "") } # sometimes bands are given a range, for our purposes we'll take the first one (CyDas appears to do this as well)
132
+ bands.each do |b|
133
+ unless b.match(/^[p|q]\d+(\.\d)?$/)
134
+ log.warn("Bands incorrectly defined in #{str}")
135
+ return band_info
136
+ end
137
+ end
138
+ band_info = {:start_index => band_s, :end_index => band_e, :bands => bands}
139
+ end
140
+ end
141
+ return band_info
142
+ end
143
+
144
+ # sometimes bands are defined for a single chr as p13q22
145
+ def find_fragments(str)
146
+ return str.scan(/([p|q]\d+)/).collect { |a| a[0] }
147
+ end
148
+
149
+ :private
150
+ def config_logging
151
+ @log = Cytogenetics.logger
152
+ #@log.progname = self.class.name
153
+ end
154
+
155
+ end
156
+
157
+ end
@@ -0,0 +1,37 @@
1
+ module Cytogenetics
2
+
3
+ class Breakpoint
4
+
5
+ attr_accessor :chr, :band, :type
6
+
7
+ def initialize(*args)
8
+ config_logging
9
+ c = args[0]; b = args[1]
10
+ @type = args[2] if args.length > 2
11
+
12
+ unless ((c.is_a? String and c.match(/\d+|X|Y/)) and (b.is_a? String and b.length > 0))
13
+ @log.error("#{c}#{b} is not a valid breakpoint")
14
+ raise ArgumentError, "#{c}#{b} is not a valid breakpoint"
15
+ end
16
+ @chr = c; @band = b
17
+ end
18
+
19
+ def arm
20
+ @band.match(/(q|p)\d+/)
21
+ return $1
22
+ end
23
+
24
+ def to_s
25
+ return "#{@chr}#{@band}"
26
+ end
27
+
28
+ :private
29
+
30
+ def config_logging
31
+ @log = Cytogenetics.logger
32
+ #@log.progname = self.class.name
33
+ end
34
+
35
+ end
36
+
37
+ end
@@ -0,0 +1,68 @@
1
+ require 'cytogenetics/utils/band_reader'
2
+
3
+ module Cytogenetics
4
+
5
+ class Chromosome
6
+ include BandReader
7
+
8
+ class<<self
9
+ attr_accessor :normal_bands
10
+ end
11
+
12
+ attr_reader :name, :aberrations
13
+
14
+ def initialize(*args)
15
+ config_logging()
16
+ chr = args[0]
17
+ chr = chr.to_s if chr.is_a?Fixnum
18
+
19
+ raise ArgumentError, "#{chr} is not a valid chromosome identifier." unless (chr.is_a? String and chr.match(/^\d+|X|Y$/))
20
+ @name = chr
21
+ @aberrations = []
22
+ @normal_bands = bands(@name, "resources/HsBands.txt") if (args.length > 1 and args[1].eql? true) ## TODO quit hardcoding
23
+ end
24
+
25
+ def to_s
26
+ "#{@name}"
27
+ end
28
+
29
+ def aberration(obj)
30
+ raise ArgumentError, "Not an Aberration object" unless obj.is_a? Aberration
31
+
32
+ #obj.breakpoints.each do |bp|
33
+ # log.warn("Band #{bp.to_s} doesn't exist. Removing.") if @normal_bands.index(bp.to_s).nil?
34
+ #end
35
+
36
+ ## TODO Deal with bands, HOWEVER because the chromosome has aberration objects breakpoints can include
37
+ ## bands for which no chromosome object is created
38
+
39
+ #obj.breakpoints.reject {|bp|
40
+ # @normal_bands.index(bp.to_s).nil?
41
+ #}
42
+
43
+ @aberrations << obj
44
+ end
45
+
46
+ def breakpoints
47
+ bps = []
48
+ @aberrations.each { |a| bps << a.breakpoints }
49
+ return bps
50
+ end
51
+
52
+ def fragments
53
+ frags = []
54
+ @aberrations.each do |a|
55
+ frags << a.fragments
56
+ end
57
+ frags
58
+ end
59
+
60
+ :private
61
+ def config_logging
62
+ @log = Cytogenetics.logger
63
+ #@log.progname = self.class.name
64
+ end
65
+
66
+ end
67
+
68
+ end
@@ -0,0 +1,188 @@
1
+ require 'cytogenetics/aberration'
2
+
3
+ module Cytogenetics
4
+
5
+ module ChromosomeAberrations
6
+
7
+ ## INVERSION
8
+ class Inversion < Aberration
9
+ @kt = 'inv'
10
+ @rx = /^inv\((\d+|X|Y)\)/
11
+ end
12
+
13
+ ## DUPLICATION
14
+ class Duplication < Aberration
15
+ @kt = 'dup'
16
+ @rx = /^dup\((\d+|X|Y)\)/
17
+ end
18
+
19
+ ## INSERTION
20
+ class Insertion < Aberration
21
+ @kt = 'ins'
22
+ @rx = /^ins\((\d+|X|Y)\)/
23
+ end
24
+
25
+ ## DELETION
26
+ class Deletion < Aberration
27
+ @kt = 'del'
28
+ @rx = /^del\((\d+|X|Y)\)/
29
+ end
30
+
31
+ ## ADD (addition of unknown material)
32
+ class Addition < Aberration
33
+ @kt = 'add'
34
+ @rx = /^add\((\d+|X|Y)\)/
35
+ end
36
+
37
+ ## ISOCHROMOSOME
38
+ class Isochromosome < Aberration
39
+ @kt = 'iso'
40
+ @rx = /^i\((\d+|X|Y)\)/
41
+ end
42
+
43
+ ## DICENTRIC
44
+ class DicentricChromosome < Aberration
45
+ @kt = 'dic'
46
+ @rx = /^dic\((\d+|X|Y)[;|:](\d+|X|Y)\)/
47
+
48
+ #def get_breakpoints
49
+ # chr_i = find_chr(@abr)
50
+ # band_i = find_bands(@abr, chr_i[:end_index])
51
+ # chr_i[:chr].each_with_index do |c, i|
52
+ # @breakpoints << Breakpoint.new(c, band_i[:bands][i], 'dic')
53
+ # end
54
+ # # TODO am not sure how the dic rearrangment works, see this in CyDas dic(13;13)(q14;q32)
55
+ # #@fragments << Fragment.new( Breakpoint.new(@breakpoints[0].chr, "pter"), @breakpoints[0])
56
+ # #@fragments << Fragment.new( @breakpoints[1], Breakpoint.new(@breakpoints[1].chr, "#{@breakpoints[1].arm}ter"))
57
+ #end
58
+
59
+ end
60
+
61
+ ## RING ## TODO figure out the right regex for this
62
+ #class RingChromosome < Aberration
63
+ # @kt = 'ring'
64
+ # @rx = /^r\(/
65
+ #end
66
+
67
+ ## ROBERTSONIAN
68
+ #class Robertsonian < Aberration
69
+ # @kt = 'rob'
70
+ # @rx = /^rob\(/
71
+ #end
72
+
73
+ ## DERIVATIVE
74
+ class Derivative < Aberration
75
+ @kt = 'der'
76
+ @rx = /^der\((\d+|X|Y)\)/
77
+
78
+ def get_breakpoints
79
+ @aberrations = []
80
+
81
+ ab_objs = Aberration.aberration_objs
82
+
83
+ chr_i = find_chr(@abr)
84
+ derivative_abr = @abr[chr_i[:end_index]+1..@abr.length]
85
+
86
+ # separate different abnormalities within the derivative chromosome and clean it up to make it parseable
87
+ abnormalities = derivative_abr.scan(/([^\(\)]+\(([^\(\)]|\)\()*\))/).collect { |a| a[0] }
88
+
89
+ trans_bps = []
90
+ abnormalities.each do |abn|
91
+ abrclass = Aberration.classify_aberration(abn)
92
+
93
+ if abrclass.to_s.eql? 'unk' # not dealing with unknowns
94
+ log.warn("Cannot handle #{abn}, incorrect format.")
95
+ next
96
+ end
97
+
98
+ # special handling because translocations are written as a sliding window
99
+ # translocations should also only every have 2 breakpoints...
100
+ if abrclass.to_s.eql? ChromosomeAberrations::Translocation.type
101
+ trans = ChromosomeAberrations::Translocation.new(abn)
102
+ trans_bps << trans.breakpoints
103
+ @breakpoints << trans.breakpoints
104
+ else
105
+ ab_obj = ab_objs[abrclass].new(abn)
106
+ if ab_obj.breakpoints.length > 0
107
+ @aberrations << ab_obj
108
+ @breakpoints << ab_obj.breakpoints
109
+ end
110
+ end
111
+ end
112
+ trans_bps.delete_if { |c| c.empty? }
113
+ add_fragments(trans_bps.flatten!) if trans_bps.length > 0
114
+ end
115
+
116
+ :private
117
+ # have to reorder the array and then turn Breakpoints into fragments
118
+ def add_fragments(tbp_list)
119
+ sorted = []
120
+ tbp_list.each_with_index do |e, i|
121
+ if i <= 1
122
+ sorted << Breakpoint.new(e.chr, "#{e.arm}ter") if i.eql? 0
123
+ sorted << e
124
+ elsif i%2 == 0
125
+ sorted << tbp_list[i+1]
126
+ sorted << tbp_list[i]
127
+ end
128
+ end
129
+ sorted << Breakpoint.new(sorted[-1].chr, "#{sorted[-1].arm}ter")
130
+ sorted.each_slice(2).to_a.each do |pair|
131
+ @fragments << Fragment.new(pair[0], pair[1])
132
+ end
133
+ end
134
+
135
+ end
136
+
137
+ ## TRANSLOCATION ... this is typically a subset of Derivative chromosomes, but have seen it on it's own
138
+ class Translocation < Aberration
139
+ @kt = 'trans'
140
+ @rx = /^t\((\d+|X|Y)[;|:](\d+|X|Y)\)/
141
+
142
+ ## TWo ways of defining translocations:
143
+ ## 1) t(1;3)(p31;p13)
144
+ def get_breakpoints
145
+ chr_i = find_chr(@abr)
146
+ band_i = find_bands(@abr, chr_i[:end_index])
147
+ unless band_i
148
+ log.warn("No bands defined in #{@abr}")
149
+ else
150
+ chr_i[:chr].each_with_index do |c, i|
151
+ @breakpoints << Breakpoint.new(c, band_i[:bands][i], 'trans')
152
+ end
153
+ end
154
+ end
155
+
156
+ end
157
+
158
+ ## FRAGMENT
159
+ class ChromosomeFragment < Aberration
160
+ @kt = 'frag'
161
+ @rx = /^frag\((\d+|X|Y)\)/
162
+ end
163
+
164
+ ## CHROMOSOME GAIN
165
+ class ChromosomeGain < Aberration
166
+ @kt = 'gain'
167
+ @rx = /^\+(\d+|X|Y)$/
168
+
169
+ def initialize
170
+ config_logging()
171
+ @abr = str.sub("+", "")
172
+ @breakpoints = []
173
+ end
174
+ end
175
+
176
+ ## CHROMOSOME LOSS
177
+ class ChromosomeLoss < Aberration
178
+ @kt = 'loss'
179
+ @rx = /^-(\d+|X|Y)$/
180
+
181
+ def initialize
182
+ config_logging()
183
+ @abr = str.sub("-", "")
184
+ @breakpoints = []
185
+ end
186
+ end
187
+ end
188
+ end
@@ -0,0 +1,45 @@
1
+
2
+ module Cytogenetics
3
+
4
+ class Fragment
5
+ attr_reader :chr, :start, :end, :genes
6
+
7
+ def initialize(*args)
8
+ config_logging()
9
+ unless (args.length.eql? 2 and (args[0].is_a? Breakpoint and args[1].is_a? Breakpoint))
10
+ raise ArgumentError, "Expected arguments are missing or are not Breakpoints: #{args}"
11
+ end
12
+
13
+ #@genes = []
14
+ @start = args[0]
15
+ @end = args[1]
16
+ @chr = @start.chr
17
+
18
+ unless @start.chr.eql? @end.chr
19
+ raise GenomeStructureError, "Fragments must be within the same chromosome: #{args}"
20
+ end
21
+ end
22
+
23
+ def add_gene(gene)
24
+ @genes << gene
25
+ end
26
+
27
+ def to_s
28
+ return "#{@start.to_s} --> #{@end.to_s}"
29
+ end
30
+
31
+ ## TODO this will require length in basepairs of each band
32
+ #def length
33
+ #
34
+ #end
35
+
36
+ :private
37
+
38
+ def config_logging
39
+ @log = Cytogenetics.logger
40
+ #@log.progname = self.class.name
41
+ end
42
+
43
+ end
44
+ end
45
+
@@ -0,0 +1,172 @@
1
+ require 'yaml'
2
+
3
+ require 'cytogenetics/utils/karyotype_reader'
4
+
5
+ module Cytogenetics
6
+
7
+ class Karyotype
8
+
9
+ @@haploid = 23
10
+
11
+ attr_reader :aberrations, :karyotype, :ploidy, :sex, :abnormal_chr, :normal_chr
12
+
13
+ class<<self
14
+ attr_accessor :aberration_objs, :unclear_aberrations, :log
15
+ end
16
+
17
+ def initialize(karyotype_str)
18
+ config_logging()
19
+ raise ArgumentError, "#{karyotype_str} is not a karyotype." unless (karyotype_str.is_a? String and karyotype_str.length > 1)
20
+ @log.info("Reading karyotype #{karyotype_str}")
21
+
22
+ @karyotype = karyotype_str.gsub(/\s/, "")
23
+ @normal_chr = {}; @abnormal_chr = {}; @aberrations = {}; @unclear_aberrations = [];
24
+ setup_abberation_objs()
25
+ prep_karyotype()
26
+ handle_ploidy_diff()
27
+ analyze()
28
+ end
29
+
30
+ def analyze
31
+ Aberration.aberration_type.each do |abr_type|
32
+ next unless @aberrations.has_key? abr_type
33
+ regex = @aberration_obj[abr_type].regex
34
+
35
+ @aberrations[abr_type].each do |abr|
36
+ # if abr_type
37
+ abr.match(regex)
38
+ @log.warn("Aberration has two chromosomes #{abr} but only the first one is handled.") unless ($2.nil? or $1.eql? $2)
39
+
40
+ ## TODO deal with the case of 2 chromosomes defined in the aberration
41
+ chr = Chromosome.new($1, true)
42
+ chr.aberration(@aberration_obj[abr_type].new(abr))
43
+
44
+ @abnormal_chr[chr.name] = [] unless @abnormal_chr.has_key? chr.name
45
+ @abnormal_chr[chr.name] << chr
46
+ end
47
+ end
48
+ end
49
+
50
+ # get breakpoints for the karyotype
51
+ def report_breakpoints
52
+ bps = Array.new
53
+ @abnormal_chr.each_pair do |c, chr_list|
54
+ chr_list.each do |chr|
55
+ bps << chr.breakpoints
56
+ end
57
+ end
58
+ bps.delete_if { |c| c.empty? }
59
+ bps.flatten!
60
+ return bps
61
+ end
62
+
63
+ def report_fragments
64
+ frags = []
65
+ @abnormal_chr.each_pair do |c, chr_list|
66
+ chr_list.each do |chr|
67
+ frags << chr.fragments
68
+ end
69
+ end
70
+ frags.delete_if { |c| c.empty? }
71
+ frags.flatten!
72
+ return frags
73
+ end
74
+
75
+ def report_ploidy_change
76
+ pd = []
77
+ pd << @aberrations[:loss].map { |e| "-#{e}" } if @aberrations[:loss]
78
+ pd << @aberrations[:gain].map { |e| "+#{e}" } if @aberrations[:gain]
79
+ pd.flatten!
80
+ return pd
81
+ end
82
+
83
+ def summarize
84
+ summary = "NORMAL CHROMOSOMES\n"
85
+ @normal_chr.each_pair do |chr, count|
86
+ summary = "#{summary} #{chr}: #{count}\n"
87
+ end
88
+
89
+ summary = "#{summary}\nABNORMAL:"
90
+ @abnormal_chr.each_pair do |chr, list|
91
+ summary = "#{summary}\n#{chr}"
92
+ list.each do |c|
93
+ summary = "#{summary}\n#{c.aberrations}\n"
94
+ summary = "#{summary}\n#{c.breakpoints}\n"
95
+ end
96
+ end
97
+ end
98
+
99
+ # -------------------- # PRIVATE # -------------------- #
100
+ :private
101
+ def config_logging
102
+ @log = Cytogenetics.logger
103
+ #@log.progname = self.class.name
104
+ end
105
+
106
+
107
+ def setup_abberation_objs
108
+ @aberration_obj = Aberration.aberration_objs
109
+ end
110
+
111
+
112
+ def handle_ploidy_diff
113
+ @aberrations[:loss].each { |c| @normal_chr[c] -= 1 } if @aberrations[:loss]
114
+ @aberrations[:gain].each { |c| @normal_chr[c] += 1 } if @aberrations[:gain]
115
+ end
116
+
117
+ # determine ploidy & gender, clean up each aberration and drop any "unknown"
118
+ def prep_karyotype
119
+ @karyotype.gsub!(/\s/, "")
120
+ clones = @karyotype.scan(/(\[\d+\])/).collect { |a| a[0] }
121
+ @log.warn("Karyotype is a collection of clones, analysis may be inaccurate.") if clones.length > 3
122
+
123
+ @karyotype.gsub!(/\[\d+\]/, "") # don't care about numbers of cells: [5]
124
+
125
+ (pl, sc) = @karyotype.split(",")[0..1]
126
+ if (pl and sc)
127
+ @ploidy = KaryotypeReader.calculate_ploidy(pl, @@haploid)
128
+ sex_chr = KaryotypeReader.determine_sex(sc)
129
+ else
130
+ raise KaryotypeError, "'#{@karyotype}' is not a valid karyotype. Ploidy and sex defnitions are absent"
131
+ end
132
+
133
+ st = sex_chr.values.inject { |sum, v| sum+v }
134
+ @sex = nil
135
+ karyotype_index = 1 # sometimes the sex is not indicated and there's no case information to figure it out
136
+ if st > 0
137
+ @sex = sex_chr.keys.join("")
138
+ karyotype_index = 2
139
+ end
140
+
141
+ (Array(1..23)).each { |c| @normal_chr[c.to_s] = @ploidy.to_i }
142
+
143
+ sex_chr.each_pair { |c, p| @normal_chr[c] = p.to_i }
144
+
145
+ # deal with the most common karyotype string inconsistencies
146
+ cleaned_karyotype = []
147
+
148
+ @karyotype.split(",")[karyotype_index..-1].each do |abr|
149
+ cleaned_karyotype |= [cleaned_karyotype, KaryotypeReader.cleanup(abr)].flatten
150
+ end
151
+ @karyotype = cleaned_karyotype
152
+
153
+ # classify each type of aberration in the karyotype
154
+ @karyotype.each do |k|
155
+ abrclass = Aberration.classify_aberration(k)
156
+ @aberrations[abrclass] = [] unless @aberrations.has_key? abrclass
157
+ @aberrations[abrclass] << k.sub(/^(\+|-)?/, "")
158
+ end
159
+
160
+ @aberrations.each_pair do |abrclass, abrlist|
161
+ next if (abrclass.eql? ChromosomeAberrations::ChromosomeGain.type or abrclass.eql? ChromosomeAberrations::ChromosomeLoss.type)
162
+ # aberrations other than chromosome gains/losses should be uniquely represented
163
+
164
+ counts = abrlist.inject(Hash.new(0)) { |h, i| h[i] += 1; h }
165
+ counts.each_pair { |k, v| @log.warn("#{k} was seen multiple times. Analyzed only once.") if v > 1 }
166
+
167
+ @aberrations[abrclass] = abrlist.uniq
168
+ end
169
+
170
+ end
171
+ end
172
+ end
@@ -0,0 +1,4 @@
1
+ module Cytogenetics
2
+ class KaryotypeError < StandardError
3
+ end
4
+ end
@@ -0,0 +1,30 @@
1
+
2
+ module Cytogenetics
3
+
4
+ module BandReader
5
+
6
+ @bands_by_chr = {}
7
+
8
+ def bands(chr, file)
9
+ file = File.open(file, 'r') unless file.is_a? File
10
+ bands = read_file(file)
11
+ bds = bands[chr]
12
+ bds.uniq!
13
+ return bds
14
+ end
15
+
16
+ def read_file(file)
17
+ band_by_chr = {}
18
+ file.each_line do |line|
19
+ line.chomp!
20
+ line.match(/^(\d+|X|Y)([p|q].*)/)
21
+ c = $1; b = $2
22
+ band_by_chr[c] = Array.new unless band_by_chr.has_key? c
23
+ band_by_chr[c] << "#{c}#{b}"
24
+ band_by_chr[c] << "#{c}#{$1}" if b.match(/([p|q]\d+)\.\d+/)
25
+ end
26
+ return band_by_chr
27
+ end
28
+ end
29
+
30
+ end
@@ -0,0 +1,121 @@
1
+ require 'cytogenetics/karyotype_error'
2
+
3
+ module Cytogenetics
4
+ class KaryotypeReader
5
+
6
+ def self.config_logging
7
+ @log = Cytogenetics.logger
8
+ #@log.progname = self.name
9
+ end
10
+
11
+
12
+ def self.cleanup(abr)
13
+ config_logging
14
+
15
+ new_abr = []
16
+
17
+ # +t(13;X)(q13;p12) doesn't need a +
18
+ abr.sub!(/^[\+|-]/, "") unless abr.match(/^[\+|-][\d|X|Y]+$/)
19
+
20
+ # not going to bother with aberrations that are unclear/unknown '?' or with '**'
21
+ if (abr.match(/\?|\*\*/))
22
+ @log.warn("Removing aberration with unknown/unclear information: #{abr}")
23
+ return new_abr
24
+ end
25
+
26
+ # 13x2 is normal, 13x3 is a duplicate and should read +13
27
+ if abr.match(/^([\d+|X|Y]+)x(\d+)/)
28
+ chr = $1; dups = $2.to_i
29
+ if dups.eql? 0 # managed to lose both chromosomes in a diploidy karyotype
30
+ (Array(1..dups)).map { new_abr.push("-#{chr}") }
31
+ elsif dups > 2 # sometimes you have 13x3, really just means 1 additional chr 13 since normal ploidy is 2
32
+ dups -= 2
33
+ (Array(1..dups)).map { new_abr.push("+#{chr}") }
34
+ elsif dups.eql?(1)
35
+ new_abr.push("-#{chr}")
36
+ end
37
+ # add(9)(p21)x2 or add(7)x2 should indicate that this "additional material of unk origin" happened twice
38
+ elsif abr.match(/(.*)x(\d+)$/)
39
+ a = $1; dups = $2.to_i
40
+ (Array(1..dups)).map { new_abr.push(a) }
41
+ # del(7) should be -7 but not del(7)(q12)
42
+ else # everything else
43
+ new_abr.push(abr)
44
+ end
45
+
46
+ return new_abr
47
+ end
48
+
49
+ def self.determine_sex(str)
50
+ config_logging
51
+
52
+ sex_chr = {}
53
+ ['X', 'Y'].each { |c| sex_chr[c] = 0 }
54
+
55
+ unless str.match(/^(X|Y)+$/)
56
+ @log.warn("Definition of gender incorrect (#{str})")
57
+ else
58
+ #raise KaryotypeError, "Definition of gender incorrect (#{str})" unless str.match(/^(X|Y)+$/)
59
+ # ploidy number makes no difference since this string will tell us how many or at least what the gender should be
60
+
61
+ chrs = str.match(/([X|Y]+)/).to_s.split(//)
62
+ chrs.each { |c| sex_chr[c] +=1 }
63
+
64
+ # assume this was an XY karyotype that may have lost the Y, have only seen this in
65
+ # severely affected karyotypes NOT TRUE, some karyotypes are just not defined correctly
66
+ # often XX -X is listed as X,... Cannot assume it's a male missing Y
67
+ #sex_chr['Y'] += 1 if (chrs.length.eql?(1) and chrs[0].eql?('X'))
68
+ end
69
+
70
+ return sex_chr
71
+ end
72
+
73
+ def self.calculate_ploidy(str, haploid)
74
+ config_logging
75
+
76
+ str.sub!(/<.{2,}>/, "")
77
+ str = $1 if str.match(/\d+\((\d+-\d+)\)/)
78
+
79
+ diploid = haploid*2
80
+ triploid = haploid*3
81
+ quadraploid = haploid*4
82
+
83
+ # typically see di- tri- quad- if more than that it should be noted
84
+ ploidy = nil
85
+ min = diploid
86
+ max = diploid
87
+ #if str.match(/<\+(\d)n>/) # sometimes see this odd configuration: 46<+3n>
88
+ # ploidy = $1
89
+ if str.match(/(\d+)[-|~](\d+)/) # num and range or just range: 46-53
90
+ min = $1.to_i; max = $2.to_i
91
+ elsif str.match(/^(\d+)/) # single num: 72
92
+ min = $1.to_i; max = $1.to_i
93
+ end
94
+
95
+ if min < haploid
96
+ @log.warn("Ploidy determination may be bad as the min was less than haploid (#{str}). Setting to haploid.")
97
+ min = haploid
98
+ end
99
+
100
+ if ploidy.nil?
101
+ case
102
+ when (min.eql? diploid and max.eql? diploid)
103
+ @log.debug("Normal ploidy: #{str}")
104
+ ploidy = 2
105
+ when ((min >= haploid and max <= diploid) or (min <= diploid and max < triploid))
106
+ @log.debug("Relatively normal ploidy #{str}")
107
+ ploidy = 2
108
+ when (min >= haploid and max < quadraploid)
109
+ @log.debug("Triploid #{str}")
110
+ ploidy = 3
111
+ when (max >= quadraploid)
112
+ @log.debug("Quadraploid #{str}")
113
+ ploidy = 4
114
+ else
115
+ raise KaryotypeError, "Failed to determine ploidy for #{str}"
116
+ end
117
+ end
118
+ return ploidy
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,3 @@
1
+ module Cytogenetics
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,36 @@
1
+ require 'cytogenetics/aberration'
2
+ require 'cytogenetics/breakpoint'
3
+ require 'cytogenetics/chromosome'
4
+ require 'cytogenetics/chromosome_aberrations'
5
+ require 'cytogenetics/fragment'
6
+ require 'cytogenetics/karyotype'
7
+ require 'cytogenetics/karyotype_error'
8
+
9
+
10
+ require 'cytogenetics/utils/karyotype_reader'
11
+ require 'cytogenetics/utils/band_reader'
12
+
13
+ require 'yaml'
14
+ require 'logger'
15
+
16
+ module Cytogenetics
17
+
18
+ class << self
19
+ def logger=(log)
20
+ @clog = log
21
+ end
22
+
23
+ def logger
24
+ unless @clog
25
+ @clog = Logger.new(STDOUT)
26
+ @clog.level = Logger::FATAL
27
+ end
28
+ @clog
29
+ end
30
+ end
31
+
32
+ def self.karyotype(kary_str)
33
+ return Karyotype.new(kary_str)
34
+ end
35
+
36
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cytogenetics
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Sarah Killcoyne
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-11-26 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Karyotype parser based on ISCN specification. Note that there are still
15
+ many bugs. The ISCN language is poorly followed by most users so the parser is still
16
+ being developed.
17
+ email: sarah.killcoyne@uni.lu
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - lib/cytogenetics/aberration.rb
23
+ - lib/cytogenetics/breakpoint.rb
24
+ - lib/cytogenetics/chromosome.rb
25
+ - lib/cytogenetics/chromosome_aberrations.rb
26
+ - lib/cytogenetics/fragment.rb
27
+ - lib/cytogenetics/karyotype.rb
28
+ - lib/cytogenetics/karyotype_error.rb
29
+ - lib/cytogenetics/utils/band_reader.rb
30
+ - lib/cytogenetics/utils/karyotype_reader.rb
31
+ - lib/cytogenetics/version.rb
32
+ - lib/cytogenetics.rb
33
+ homepage:
34
+ licenses:
35
+ - http://www.apache.org/licenses/LICENSE-2.0.html
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 1.8.24
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: Karyotype parser based on ISCN specification.
58
+ test_files: []