cytogenetics 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/cytogenetics/aberration.rb +157 -0
- data/lib/cytogenetics/breakpoint.rb +37 -0
- data/lib/cytogenetics/chromosome.rb +68 -0
- data/lib/cytogenetics/chromosome_aberrations.rb +188 -0
- data/lib/cytogenetics/fragment.rb +45 -0
- data/lib/cytogenetics/karyotype.rb +172 -0
- data/lib/cytogenetics/karyotype_error.rb +4 -0
- data/lib/cytogenetics/utils/band_reader.rb +30 -0
- data/lib/cytogenetics/utils/karyotype_reader.rb +121 -0
- data/lib/cytogenetics/version.rb +3 -0
- data/lib/cytogenetics.rb +36 -0
- metadata +58 -0
@@ -0,0 +1,157 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Cytogenetics
|
4
|
+
class Aberration
|
5
|
+
|
6
|
+
attr_accessor :breakpoints
|
7
|
+
attr_reader :abr, :ab_objs, :fragments
|
8
|
+
|
9
|
+
class<<self
|
10
|
+
def instantiate_aberrations
|
11
|
+
aberration_obj = {}
|
12
|
+
ChromosomeAberrations.constants.each do |ca|
|
13
|
+
abr_obj = ChromosomeAberrations.const_get(ca)
|
14
|
+
aberration_obj[abr_obj.type.to_sym] = abr_obj
|
15
|
+
end
|
16
|
+
return aberration_obj
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.type
|
21
|
+
return @kt
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.regex
|
25
|
+
return @rx
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.all_regex
|
29
|
+
rx = {}
|
30
|
+
ChromosomeAberrations.constants.each do |ca|
|
31
|
+
ca_obj = ChromosomeAberrations.const_get(ca)
|
32
|
+
rx[ca_obj.type.to_sym] = ca_obj.regex
|
33
|
+
end
|
34
|
+
return rx
|
35
|
+
end
|
36
|
+
|
37
|
+
# instantiate these
|
38
|
+
def self.aberration_objs
|
39
|
+
@ab_objs ||= self.instantiate_aberrations
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.aberration_type
|
43
|
+
abr_breaks = Aberration.all_regex.keys
|
44
|
+
abr_breaks.delete_if { |c| c.to_s.match(/gain|loss/) }
|
45
|
+
return abr_breaks
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.classify_aberration(abr)
|
49
|
+
Aberration.all_regex.each_pair do |k, regex|
|
50
|
+
return k if abr.match(regex)
|
51
|
+
end
|
52
|
+
return "unk".to_sym
|
53
|
+
end
|
54
|
+
|
55
|
+
def initialize(str)
|
56
|
+
config_logging()
|
57
|
+
|
58
|
+
@abr = str
|
59
|
+
@breakpoints = []; @fragments = []
|
60
|
+
|
61
|
+
#regex = Aberration.regex[@type.to_sym]
|
62
|
+
# make sure it really is an inversion first
|
63
|
+
#raise KaryotypeError, "#{str} does not appear to be a #{self.class}" unless str.match(self.regex)
|
64
|
+
get_breakpoints() #(@abr)
|
65
|
+
@breakpoints.flatten!
|
66
|
+
end
|
67
|
+
|
68
|
+
def remove_breakpoint(bp)
|
69
|
+
removed = @breakpoints.index(bp)
|
70
|
+
@breakpoints.delete_at(removed) if removed
|
71
|
+
return removed
|
72
|
+
end
|
73
|
+
|
74
|
+
def to_s
|
75
|
+
"#{@abr}: #{@breakpoints.join(',')}"
|
76
|
+
end
|
77
|
+
|
78
|
+
:private
|
79
|
+
|
80
|
+
def get_breakpoints
|
81
|
+
chr_i = find_chr(@abr)
|
82
|
+
return if chr_i.nil?
|
83
|
+
|
84
|
+
band_i = find_bands(@abr, chr_i[:end_index])
|
85
|
+
|
86
|
+
unless band_i.nil? # breakpoints aren't added if there is no band information
|
87
|
+
chr_i[:chr].each_with_index do |c, i|
|
88
|
+
fragments = find_fragments(band_i[:bands][i])
|
89
|
+
fragments.each { |f| @breakpoints << Breakpoint.new(c, f, self.class.type) }
|
90
|
+
end
|
91
|
+
else
|
92
|
+
## No band --> TODO add this as information somewhere but not as a breakpoint
|
93
|
+
#@breakpoints << Breakpoint.new(c, "", @type)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Parsing aberration strings to pull out the chromosome and band definitions
|
98
|
+
# These will result in breakpoint information
|
99
|
+
def find_chr(str)
|
100
|
+
chr_s = str.index(/\(/, 0)
|
101
|
+
chr_e = str.index(/\)/, chr_s)
|
102
|
+
chrs = str[chr_s+1..chr_e-1].split(/;|:/)
|
103
|
+
chrs.each do |chr|
|
104
|
+
unless chr.match(/^\d+|X|Y$/)
|
105
|
+
log.warn("No chromosome defined from #{str}, skipped.")
|
106
|
+
return
|
107
|
+
end
|
108
|
+
end
|
109
|
+
return {:start_index => chr_s, :end_index => chr_e, :chr => chrs}
|
110
|
+
end
|
111
|
+
|
112
|
+
def find_bands(str, index)
|
113
|
+
band_info = nil
|
114
|
+
#raise KaryotypeError, "No bands defined in #{str}" if str.length.eql?(index+1)
|
115
|
+
if str.length.eql?(index+1)
|
116
|
+
log.warn("No bands defined in #{str}, skipped.")
|
117
|
+
return
|
118
|
+
end
|
119
|
+
|
120
|
+
ei = str.index(/\(/, index)
|
121
|
+
if str.match(/(q|p)(\d+|\?)/) and str[ei-1..ei].eql?(")(") # has bands and is not a translocation
|
122
|
+
band_s = str.index(/\(/, index)
|
123
|
+
band_e = str.index(/\)/, band_s)
|
124
|
+
band_e = str.length-1 if band_e.nil?
|
125
|
+
bands = str[band_s+1..band_e-1].split(/;|:/)
|
126
|
+
|
127
|
+
if str[band_s+1..band_e-1].match(/::/)
|
128
|
+
log.warn("Aberration defined using different language, not currently parsed skipping: #{@abr}")
|
129
|
+
return band_info
|
130
|
+
else
|
131
|
+
bands.map! { |b| b.sub(/-[q|p]\d+$/, "") } # sometimes bands are given a range, for our purposes we'll take the first one (CyDas appears to do this as well)
|
132
|
+
bands.each do |b|
|
133
|
+
unless b.match(/^[p|q]\d+(\.\d)?$/)
|
134
|
+
log.warn("Bands incorrectly defined in #{str}")
|
135
|
+
return band_info
|
136
|
+
end
|
137
|
+
end
|
138
|
+
band_info = {:start_index => band_s, :end_index => band_e, :bands => bands}
|
139
|
+
end
|
140
|
+
end
|
141
|
+
return band_info
|
142
|
+
end
|
143
|
+
|
144
|
+
# sometimes bands are defined for a single chr as p13q22
|
145
|
+
def find_fragments(str)
|
146
|
+
return str.scan(/([p|q]\d+)/).collect { |a| a[0] }
|
147
|
+
end
|
148
|
+
|
149
|
+
:private
|
150
|
+
def config_logging
|
151
|
+
@log = Cytogenetics.logger
|
152
|
+
#@log.progname = self.class.name
|
153
|
+
end
|
154
|
+
|
155
|
+
end
|
156
|
+
|
157
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Cytogenetics
|
2
|
+
|
3
|
+
class Breakpoint
|
4
|
+
|
5
|
+
attr_accessor :chr, :band, :type
|
6
|
+
|
7
|
+
def initialize(*args)
|
8
|
+
config_logging
|
9
|
+
c = args[0]; b = args[1]
|
10
|
+
@type = args[2] if args.length > 2
|
11
|
+
|
12
|
+
unless ((c.is_a? String and c.match(/\d+|X|Y/)) and (b.is_a? String and b.length > 0))
|
13
|
+
@log.error("#{c}#{b} is not a valid breakpoint")
|
14
|
+
raise ArgumentError, "#{c}#{b} is not a valid breakpoint"
|
15
|
+
end
|
16
|
+
@chr = c; @band = b
|
17
|
+
end
|
18
|
+
|
19
|
+
def arm
|
20
|
+
@band.match(/(q|p)\d+/)
|
21
|
+
return $1
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
return "#{@chr}#{@band}"
|
26
|
+
end
|
27
|
+
|
28
|
+
:private
|
29
|
+
|
30
|
+
def config_logging
|
31
|
+
@log = Cytogenetics.logger
|
32
|
+
#@log.progname = self.class.name
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'cytogenetics/utils/band_reader'
|
2
|
+
|
3
|
+
module Cytogenetics
|
4
|
+
|
5
|
+
class Chromosome
|
6
|
+
include BandReader
|
7
|
+
|
8
|
+
class<<self
|
9
|
+
attr_accessor :normal_bands
|
10
|
+
end
|
11
|
+
|
12
|
+
attr_reader :name, :aberrations
|
13
|
+
|
14
|
+
def initialize(*args)
|
15
|
+
config_logging()
|
16
|
+
chr = args[0]
|
17
|
+
chr = chr.to_s if chr.is_a?Fixnum
|
18
|
+
|
19
|
+
raise ArgumentError, "#{chr} is not a valid chromosome identifier." unless (chr.is_a? String and chr.match(/^\d+|X|Y$/))
|
20
|
+
@name = chr
|
21
|
+
@aberrations = []
|
22
|
+
@normal_bands = bands(@name, "resources/HsBands.txt") if (args.length > 1 and args[1].eql? true) ## TODO quit hardcoding
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_s
|
26
|
+
"#{@name}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def aberration(obj)
|
30
|
+
raise ArgumentError, "Not an Aberration object" unless obj.is_a? Aberration
|
31
|
+
|
32
|
+
#obj.breakpoints.each do |bp|
|
33
|
+
# log.warn("Band #{bp.to_s} doesn't exist. Removing.") if @normal_bands.index(bp.to_s).nil?
|
34
|
+
#end
|
35
|
+
|
36
|
+
## TODO Deal with bands, HOWEVER because the chromosome has aberration objects breakpoints can include
|
37
|
+
## bands for which no chromosome object is created
|
38
|
+
|
39
|
+
#obj.breakpoints.reject {|bp|
|
40
|
+
# @normal_bands.index(bp.to_s).nil?
|
41
|
+
#}
|
42
|
+
|
43
|
+
@aberrations << obj
|
44
|
+
end
|
45
|
+
|
46
|
+
def breakpoints
|
47
|
+
bps = []
|
48
|
+
@aberrations.each { |a| bps << a.breakpoints }
|
49
|
+
return bps
|
50
|
+
end
|
51
|
+
|
52
|
+
def fragments
|
53
|
+
frags = []
|
54
|
+
@aberrations.each do |a|
|
55
|
+
frags << a.fragments
|
56
|
+
end
|
57
|
+
frags
|
58
|
+
end
|
59
|
+
|
60
|
+
:private
|
61
|
+
def config_logging
|
62
|
+
@log = Cytogenetics.logger
|
63
|
+
#@log.progname = self.class.name
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
@@ -0,0 +1,188 @@
|
|
1
|
+
require 'cytogenetics/aberration'
|
2
|
+
|
3
|
+
module Cytogenetics
|
4
|
+
|
5
|
+
module ChromosomeAberrations
|
6
|
+
|
7
|
+
## INVERSION
|
8
|
+
class Inversion < Aberration
|
9
|
+
@kt = 'inv'
|
10
|
+
@rx = /^inv\((\d+|X|Y)\)/
|
11
|
+
end
|
12
|
+
|
13
|
+
## DUPLICATION
|
14
|
+
class Duplication < Aberration
|
15
|
+
@kt = 'dup'
|
16
|
+
@rx = /^dup\((\d+|X|Y)\)/
|
17
|
+
end
|
18
|
+
|
19
|
+
## INSERTION
|
20
|
+
class Insertion < Aberration
|
21
|
+
@kt = 'ins'
|
22
|
+
@rx = /^ins\((\d+|X|Y)\)/
|
23
|
+
end
|
24
|
+
|
25
|
+
## DELETION
|
26
|
+
class Deletion < Aberration
|
27
|
+
@kt = 'del'
|
28
|
+
@rx = /^del\((\d+|X|Y)\)/
|
29
|
+
end
|
30
|
+
|
31
|
+
## ADD (addition of unknown material)
|
32
|
+
class Addition < Aberration
|
33
|
+
@kt = 'add'
|
34
|
+
@rx = /^add\((\d+|X|Y)\)/
|
35
|
+
end
|
36
|
+
|
37
|
+
## ISOCHROMOSOME
|
38
|
+
class Isochromosome < Aberration
|
39
|
+
@kt = 'iso'
|
40
|
+
@rx = /^i\((\d+|X|Y)\)/
|
41
|
+
end
|
42
|
+
|
43
|
+
## DICENTRIC
|
44
|
+
class DicentricChromosome < Aberration
|
45
|
+
@kt = 'dic'
|
46
|
+
@rx = /^dic\((\d+|X|Y)[;|:](\d+|X|Y)\)/
|
47
|
+
|
48
|
+
#def get_breakpoints
|
49
|
+
# chr_i = find_chr(@abr)
|
50
|
+
# band_i = find_bands(@abr, chr_i[:end_index])
|
51
|
+
# chr_i[:chr].each_with_index do |c, i|
|
52
|
+
# @breakpoints << Breakpoint.new(c, band_i[:bands][i], 'dic')
|
53
|
+
# end
|
54
|
+
# # TODO am not sure how the dic rearrangment works, see this in CyDas dic(13;13)(q14;q32)
|
55
|
+
# #@fragments << Fragment.new( Breakpoint.new(@breakpoints[0].chr, "pter"), @breakpoints[0])
|
56
|
+
# #@fragments << Fragment.new( @breakpoints[1], Breakpoint.new(@breakpoints[1].chr, "#{@breakpoints[1].arm}ter"))
|
57
|
+
#end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
## RING ## TODO figure out the right regex for this
|
62
|
+
#class RingChromosome < Aberration
|
63
|
+
# @kt = 'ring'
|
64
|
+
# @rx = /^r\(/
|
65
|
+
#end
|
66
|
+
|
67
|
+
## ROBERTSONIAN
|
68
|
+
#class Robertsonian < Aberration
|
69
|
+
# @kt = 'rob'
|
70
|
+
# @rx = /^rob\(/
|
71
|
+
#end
|
72
|
+
|
73
|
+
## DERIVATIVE
|
74
|
+
class Derivative < Aberration
|
75
|
+
@kt = 'der'
|
76
|
+
@rx = /^der\((\d+|X|Y)\)/
|
77
|
+
|
78
|
+
def get_breakpoints
|
79
|
+
@aberrations = []
|
80
|
+
|
81
|
+
ab_objs = Aberration.aberration_objs
|
82
|
+
|
83
|
+
chr_i = find_chr(@abr)
|
84
|
+
derivative_abr = @abr[chr_i[:end_index]+1..@abr.length]
|
85
|
+
|
86
|
+
# separate different abnormalities within the derivative chromosome and clean it up to make it parseable
|
87
|
+
abnormalities = derivative_abr.scan(/([^\(\)]+\(([^\(\)]|\)\()*\))/).collect { |a| a[0] }
|
88
|
+
|
89
|
+
trans_bps = []
|
90
|
+
abnormalities.each do |abn|
|
91
|
+
abrclass = Aberration.classify_aberration(abn)
|
92
|
+
|
93
|
+
if abrclass.to_s.eql? 'unk' # not dealing with unknowns
|
94
|
+
log.warn("Cannot handle #{abn}, incorrect format.")
|
95
|
+
next
|
96
|
+
end
|
97
|
+
|
98
|
+
# special handling because translocations are written as a sliding window
|
99
|
+
# translocations should also only every have 2 breakpoints...
|
100
|
+
if abrclass.to_s.eql? ChromosomeAberrations::Translocation.type
|
101
|
+
trans = ChromosomeAberrations::Translocation.new(abn)
|
102
|
+
trans_bps << trans.breakpoints
|
103
|
+
@breakpoints << trans.breakpoints
|
104
|
+
else
|
105
|
+
ab_obj = ab_objs[abrclass].new(abn)
|
106
|
+
if ab_obj.breakpoints.length > 0
|
107
|
+
@aberrations << ab_obj
|
108
|
+
@breakpoints << ab_obj.breakpoints
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
trans_bps.delete_if { |c| c.empty? }
|
113
|
+
add_fragments(trans_bps.flatten!) if trans_bps.length > 0
|
114
|
+
end
|
115
|
+
|
116
|
+
:private
|
117
|
+
# have to reorder the array and then turn Breakpoints into fragments
|
118
|
+
def add_fragments(tbp_list)
|
119
|
+
sorted = []
|
120
|
+
tbp_list.each_with_index do |e, i|
|
121
|
+
if i <= 1
|
122
|
+
sorted << Breakpoint.new(e.chr, "#{e.arm}ter") if i.eql? 0
|
123
|
+
sorted << e
|
124
|
+
elsif i%2 == 0
|
125
|
+
sorted << tbp_list[i+1]
|
126
|
+
sorted << tbp_list[i]
|
127
|
+
end
|
128
|
+
end
|
129
|
+
sorted << Breakpoint.new(sorted[-1].chr, "#{sorted[-1].arm}ter")
|
130
|
+
sorted.each_slice(2).to_a.each do |pair|
|
131
|
+
@fragments << Fragment.new(pair[0], pair[1])
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
## TRANSLOCATION ... this is typically a subset of Derivative chromosomes, but have seen it on it's own
|
138
|
+
class Translocation < Aberration
|
139
|
+
@kt = 'trans'
|
140
|
+
@rx = /^t\((\d+|X|Y)[;|:](\d+|X|Y)\)/
|
141
|
+
|
142
|
+
## TWo ways of defining translocations:
|
143
|
+
## 1) t(1;3)(p31;p13)
|
144
|
+
def get_breakpoints
|
145
|
+
chr_i = find_chr(@abr)
|
146
|
+
band_i = find_bands(@abr, chr_i[:end_index])
|
147
|
+
unless band_i
|
148
|
+
log.warn("No bands defined in #{@abr}")
|
149
|
+
else
|
150
|
+
chr_i[:chr].each_with_index do |c, i|
|
151
|
+
@breakpoints << Breakpoint.new(c, band_i[:bands][i], 'trans')
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
## FRAGMENT
|
159
|
+
class ChromosomeFragment < Aberration
|
160
|
+
@kt = 'frag'
|
161
|
+
@rx = /^frag\((\d+|X|Y)\)/
|
162
|
+
end
|
163
|
+
|
164
|
+
## CHROMOSOME GAIN
|
165
|
+
class ChromosomeGain < Aberration
|
166
|
+
@kt = 'gain'
|
167
|
+
@rx = /^\+(\d+|X|Y)$/
|
168
|
+
|
169
|
+
def initialize
|
170
|
+
config_logging()
|
171
|
+
@abr = str.sub("+", "")
|
172
|
+
@breakpoints = []
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
## CHROMOSOME LOSS
|
177
|
+
class ChromosomeLoss < Aberration
|
178
|
+
@kt = 'loss'
|
179
|
+
@rx = /^-(\d+|X|Y)$/
|
180
|
+
|
181
|
+
def initialize
|
182
|
+
config_logging()
|
183
|
+
@abr = str.sub("-", "")
|
184
|
+
@breakpoints = []
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
|
2
|
+
module Cytogenetics
|
3
|
+
|
4
|
+
class Fragment
|
5
|
+
attr_reader :chr, :start, :end, :genes
|
6
|
+
|
7
|
+
def initialize(*args)
|
8
|
+
config_logging()
|
9
|
+
unless (args.length.eql? 2 and (args[0].is_a? Breakpoint and args[1].is_a? Breakpoint))
|
10
|
+
raise ArgumentError, "Expected arguments are missing or are not Breakpoints: #{args}"
|
11
|
+
end
|
12
|
+
|
13
|
+
#@genes = []
|
14
|
+
@start = args[0]
|
15
|
+
@end = args[1]
|
16
|
+
@chr = @start.chr
|
17
|
+
|
18
|
+
unless @start.chr.eql? @end.chr
|
19
|
+
raise GenomeStructureError, "Fragments must be within the same chromosome: #{args}"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def add_gene(gene)
|
24
|
+
@genes << gene
|
25
|
+
end
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
return "#{@start.to_s} --> #{@end.to_s}"
|
29
|
+
end
|
30
|
+
|
31
|
+
## TODO this will require length in basepairs of each band
|
32
|
+
#def length
|
33
|
+
#
|
34
|
+
#end
|
35
|
+
|
36
|
+
:private
|
37
|
+
|
38
|
+
def config_logging
|
39
|
+
@log = Cytogenetics.logger
|
40
|
+
#@log.progname = self.class.name
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
@@ -0,0 +1,172 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
require 'cytogenetics/utils/karyotype_reader'
|
4
|
+
|
5
|
+
module Cytogenetics
|
6
|
+
|
7
|
+
class Karyotype
|
8
|
+
|
9
|
+
@@haploid = 23
|
10
|
+
|
11
|
+
attr_reader :aberrations, :karyotype, :ploidy, :sex, :abnormal_chr, :normal_chr
|
12
|
+
|
13
|
+
class<<self
|
14
|
+
attr_accessor :aberration_objs, :unclear_aberrations, :log
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(karyotype_str)
|
18
|
+
config_logging()
|
19
|
+
raise ArgumentError, "#{karyotype_str} is not a karyotype." unless (karyotype_str.is_a? String and karyotype_str.length > 1)
|
20
|
+
@log.info("Reading karyotype #{karyotype_str}")
|
21
|
+
|
22
|
+
@karyotype = karyotype_str.gsub(/\s/, "")
|
23
|
+
@normal_chr = {}; @abnormal_chr = {}; @aberrations = {}; @unclear_aberrations = [];
|
24
|
+
setup_abberation_objs()
|
25
|
+
prep_karyotype()
|
26
|
+
handle_ploidy_diff()
|
27
|
+
analyze()
|
28
|
+
end
|
29
|
+
|
30
|
+
def analyze
|
31
|
+
Aberration.aberration_type.each do |abr_type|
|
32
|
+
next unless @aberrations.has_key? abr_type
|
33
|
+
regex = @aberration_obj[abr_type].regex
|
34
|
+
|
35
|
+
@aberrations[abr_type].each do |abr|
|
36
|
+
# if abr_type
|
37
|
+
abr.match(regex)
|
38
|
+
@log.warn("Aberration has two chromosomes #{abr} but only the first one is handled.") unless ($2.nil? or $1.eql? $2)
|
39
|
+
|
40
|
+
## TODO deal with the case of 2 chromosomes defined in the aberration
|
41
|
+
chr = Chromosome.new($1, true)
|
42
|
+
chr.aberration(@aberration_obj[abr_type].new(abr))
|
43
|
+
|
44
|
+
@abnormal_chr[chr.name] = [] unless @abnormal_chr.has_key? chr.name
|
45
|
+
@abnormal_chr[chr.name] << chr
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# get breakpoints for the karyotype
|
51
|
+
def report_breakpoints
|
52
|
+
bps = Array.new
|
53
|
+
@abnormal_chr.each_pair do |c, chr_list|
|
54
|
+
chr_list.each do |chr|
|
55
|
+
bps << chr.breakpoints
|
56
|
+
end
|
57
|
+
end
|
58
|
+
bps.delete_if { |c| c.empty? }
|
59
|
+
bps.flatten!
|
60
|
+
return bps
|
61
|
+
end
|
62
|
+
|
63
|
+
def report_fragments
|
64
|
+
frags = []
|
65
|
+
@abnormal_chr.each_pair do |c, chr_list|
|
66
|
+
chr_list.each do |chr|
|
67
|
+
frags << chr.fragments
|
68
|
+
end
|
69
|
+
end
|
70
|
+
frags.delete_if { |c| c.empty? }
|
71
|
+
frags.flatten!
|
72
|
+
return frags
|
73
|
+
end
|
74
|
+
|
75
|
+
def report_ploidy_change
|
76
|
+
pd = []
|
77
|
+
pd << @aberrations[:loss].map { |e| "-#{e}" } if @aberrations[:loss]
|
78
|
+
pd << @aberrations[:gain].map { |e| "+#{e}" } if @aberrations[:gain]
|
79
|
+
pd.flatten!
|
80
|
+
return pd
|
81
|
+
end
|
82
|
+
|
83
|
+
def summarize
|
84
|
+
summary = "NORMAL CHROMOSOMES\n"
|
85
|
+
@normal_chr.each_pair do |chr, count|
|
86
|
+
summary = "#{summary} #{chr}: #{count}\n"
|
87
|
+
end
|
88
|
+
|
89
|
+
summary = "#{summary}\nABNORMAL:"
|
90
|
+
@abnormal_chr.each_pair do |chr, list|
|
91
|
+
summary = "#{summary}\n#{chr}"
|
92
|
+
list.each do |c|
|
93
|
+
summary = "#{summary}\n#{c.aberrations}\n"
|
94
|
+
summary = "#{summary}\n#{c.breakpoints}\n"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# -------------------- # PRIVATE # -------------------- #
|
100
|
+
:private
|
101
|
+
def config_logging
|
102
|
+
@log = Cytogenetics.logger
|
103
|
+
#@log.progname = self.class.name
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
def setup_abberation_objs
|
108
|
+
@aberration_obj = Aberration.aberration_objs
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
def handle_ploidy_diff
|
113
|
+
@aberrations[:loss].each { |c| @normal_chr[c] -= 1 } if @aberrations[:loss]
|
114
|
+
@aberrations[:gain].each { |c| @normal_chr[c] += 1 } if @aberrations[:gain]
|
115
|
+
end
|
116
|
+
|
117
|
+
# determine ploidy & gender, clean up each aberration and drop any "unknown"
|
118
|
+
def prep_karyotype
|
119
|
+
@karyotype.gsub!(/\s/, "")
|
120
|
+
clones = @karyotype.scan(/(\[\d+\])/).collect { |a| a[0] }
|
121
|
+
@log.warn("Karyotype is a collection of clones, analysis may be inaccurate.") if clones.length > 3
|
122
|
+
|
123
|
+
@karyotype.gsub!(/\[\d+\]/, "") # don't care about numbers of cells: [5]
|
124
|
+
|
125
|
+
(pl, sc) = @karyotype.split(",")[0..1]
|
126
|
+
if (pl and sc)
|
127
|
+
@ploidy = KaryotypeReader.calculate_ploidy(pl, @@haploid)
|
128
|
+
sex_chr = KaryotypeReader.determine_sex(sc)
|
129
|
+
else
|
130
|
+
raise KaryotypeError, "'#{@karyotype}' is not a valid karyotype. Ploidy and sex defnitions are absent"
|
131
|
+
end
|
132
|
+
|
133
|
+
st = sex_chr.values.inject { |sum, v| sum+v }
|
134
|
+
@sex = nil
|
135
|
+
karyotype_index = 1 # sometimes the sex is not indicated and there's no case information to figure it out
|
136
|
+
if st > 0
|
137
|
+
@sex = sex_chr.keys.join("")
|
138
|
+
karyotype_index = 2
|
139
|
+
end
|
140
|
+
|
141
|
+
(Array(1..23)).each { |c| @normal_chr[c.to_s] = @ploidy.to_i }
|
142
|
+
|
143
|
+
sex_chr.each_pair { |c, p| @normal_chr[c] = p.to_i }
|
144
|
+
|
145
|
+
# deal with the most common karyotype string inconsistencies
|
146
|
+
cleaned_karyotype = []
|
147
|
+
|
148
|
+
@karyotype.split(",")[karyotype_index..-1].each do |abr|
|
149
|
+
cleaned_karyotype |= [cleaned_karyotype, KaryotypeReader.cleanup(abr)].flatten
|
150
|
+
end
|
151
|
+
@karyotype = cleaned_karyotype
|
152
|
+
|
153
|
+
# classify each type of aberration in the karyotype
|
154
|
+
@karyotype.each do |k|
|
155
|
+
abrclass = Aberration.classify_aberration(k)
|
156
|
+
@aberrations[abrclass] = [] unless @aberrations.has_key? abrclass
|
157
|
+
@aberrations[abrclass] << k.sub(/^(\+|-)?/, "")
|
158
|
+
end
|
159
|
+
|
160
|
+
@aberrations.each_pair do |abrclass, abrlist|
|
161
|
+
next if (abrclass.eql? ChromosomeAberrations::ChromosomeGain.type or abrclass.eql? ChromosomeAberrations::ChromosomeLoss.type)
|
162
|
+
# aberrations other than chromosome gains/losses should be uniquely represented
|
163
|
+
|
164
|
+
counts = abrlist.inject(Hash.new(0)) { |h, i| h[i] += 1; h }
|
165
|
+
counts.each_pair { |k, v| @log.warn("#{k} was seen multiple times. Analyzed only once.") if v > 1 }
|
166
|
+
|
167
|
+
@aberrations[abrclass] = abrlist.uniq
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
|
2
|
+
module Cytogenetics
|
3
|
+
|
4
|
+
module BandReader
|
5
|
+
|
6
|
+
@bands_by_chr = {}
|
7
|
+
|
8
|
+
def bands(chr, file)
|
9
|
+
file = File.open(file, 'r') unless file.is_a? File
|
10
|
+
bands = read_file(file)
|
11
|
+
bds = bands[chr]
|
12
|
+
bds.uniq!
|
13
|
+
return bds
|
14
|
+
end
|
15
|
+
|
16
|
+
def read_file(file)
|
17
|
+
band_by_chr = {}
|
18
|
+
file.each_line do |line|
|
19
|
+
line.chomp!
|
20
|
+
line.match(/^(\d+|X|Y)([p|q].*)/)
|
21
|
+
c = $1; b = $2
|
22
|
+
band_by_chr[c] = Array.new unless band_by_chr.has_key? c
|
23
|
+
band_by_chr[c] << "#{c}#{b}"
|
24
|
+
band_by_chr[c] << "#{c}#{$1}" if b.match(/([p|q]\d+)\.\d+/)
|
25
|
+
end
|
26
|
+
return band_by_chr
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
require 'cytogenetics/karyotype_error'
|
2
|
+
|
3
|
+
module Cytogenetics
|
4
|
+
class KaryotypeReader
|
5
|
+
|
6
|
+
def self.config_logging
|
7
|
+
@log = Cytogenetics.logger
|
8
|
+
#@log.progname = self.name
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
def self.cleanup(abr)
|
13
|
+
config_logging
|
14
|
+
|
15
|
+
new_abr = []
|
16
|
+
|
17
|
+
# +t(13;X)(q13;p12) doesn't need a +
|
18
|
+
abr.sub!(/^[\+|-]/, "") unless abr.match(/^[\+|-][\d|X|Y]+$/)
|
19
|
+
|
20
|
+
# not going to bother with aberrations that are unclear/unknown '?' or with '**'
|
21
|
+
if (abr.match(/\?|\*\*/))
|
22
|
+
@log.warn("Removing aberration with unknown/unclear information: #{abr}")
|
23
|
+
return new_abr
|
24
|
+
end
|
25
|
+
|
26
|
+
# 13x2 is normal, 13x3 is a duplicate and should read +13
|
27
|
+
if abr.match(/^([\d+|X|Y]+)x(\d+)/)
|
28
|
+
chr = $1; dups = $2.to_i
|
29
|
+
if dups.eql? 0 # managed to lose both chromosomes in a diploidy karyotype
|
30
|
+
(Array(1..dups)).map { new_abr.push("-#{chr}") }
|
31
|
+
elsif dups > 2 # sometimes you have 13x3, really just means 1 additional chr 13 since normal ploidy is 2
|
32
|
+
dups -= 2
|
33
|
+
(Array(1..dups)).map { new_abr.push("+#{chr}") }
|
34
|
+
elsif dups.eql?(1)
|
35
|
+
new_abr.push("-#{chr}")
|
36
|
+
end
|
37
|
+
# add(9)(p21)x2 or add(7)x2 should indicate that this "additional material of unk origin" happened twice
|
38
|
+
elsif abr.match(/(.*)x(\d+)$/)
|
39
|
+
a = $1; dups = $2.to_i
|
40
|
+
(Array(1..dups)).map { new_abr.push(a) }
|
41
|
+
# del(7) should be -7 but not del(7)(q12)
|
42
|
+
else # everything else
|
43
|
+
new_abr.push(abr)
|
44
|
+
end
|
45
|
+
|
46
|
+
return new_abr
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.determine_sex(str)
|
50
|
+
config_logging
|
51
|
+
|
52
|
+
sex_chr = {}
|
53
|
+
['X', 'Y'].each { |c| sex_chr[c] = 0 }
|
54
|
+
|
55
|
+
unless str.match(/^(X|Y)+$/)
|
56
|
+
@log.warn("Definition of gender incorrect (#{str})")
|
57
|
+
else
|
58
|
+
#raise KaryotypeError, "Definition of gender incorrect (#{str})" unless str.match(/^(X|Y)+$/)
|
59
|
+
# ploidy number makes no difference since this string will tell us how many or at least what the gender should be
|
60
|
+
|
61
|
+
chrs = str.match(/([X|Y]+)/).to_s.split(//)
|
62
|
+
chrs.each { |c| sex_chr[c] +=1 }
|
63
|
+
|
64
|
+
# assume this was an XY karyotype that may have lost the Y, have only seen this in
|
65
|
+
# severely affected karyotypes NOT TRUE, some karyotypes are just not defined correctly
|
66
|
+
# often XX -X is listed as X,... Cannot assume it's a male missing Y
|
67
|
+
#sex_chr['Y'] += 1 if (chrs.length.eql?(1) and chrs[0].eql?('X'))
|
68
|
+
end
|
69
|
+
|
70
|
+
return sex_chr
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.calculate_ploidy(str, haploid)
|
74
|
+
config_logging
|
75
|
+
|
76
|
+
str.sub!(/<.{2,}>/, "")
|
77
|
+
str = $1 if str.match(/\d+\((\d+-\d+)\)/)
|
78
|
+
|
79
|
+
diploid = haploid*2
|
80
|
+
triploid = haploid*3
|
81
|
+
quadraploid = haploid*4
|
82
|
+
|
83
|
+
# typically see di- tri- quad- if more than that it should be noted
|
84
|
+
ploidy = nil
|
85
|
+
min = diploid
|
86
|
+
max = diploid
|
87
|
+
#if str.match(/<\+(\d)n>/) # sometimes see this odd configuration: 46<+3n>
|
88
|
+
# ploidy = $1
|
89
|
+
if str.match(/(\d+)[-|~](\d+)/) # num and range or just range: 46-53
|
90
|
+
min = $1.to_i; max = $2.to_i
|
91
|
+
elsif str.match(/^(\d+)/) # single num: 72
|
92
|
+
min = $1.to_i; max = $1.to_i
|
93
|
+
end
|
94
|
+
|
95
|
+
if min < haploid
|
96
|
+
@log.warn("Ploidy determination may be bad as the min was less than haploid (#{str}). Setting to haploid.")
|
97
|
+
min = haploid
|
98
|
+
end
|
99
|
+
|
100
|
+
if ploidy.nil?
|
101
|
+
case
|
102
|
+
when (min.eql? diploid and max.eql? diploid)
|
103
|
+
@log.debug("Normal ploidy: #{str}")
|
104
|
+
ploidy = 2
|
105
|
+
when ((min >= haploid and max <= diploid) or (min <= diploid and max < triploid))
|
106
|
+
@log.debug("Relatively normal ploidy #{str}")
|
107
|
+
ploidy = 2
|
108
|
+
when (min >= haploid and max < quadraploid)
|
109
|
+
@log.debug("Triploid #{str}")
|
110
|
+
ploidy = 3
|
111
|
+
when (max >= quadraploid)
|
112
|
+
@log.debug("Quadraploid #{str}")
|
113
|
+
ploidy = 4
|
114
|
+
else
|
115
|
+
raise KaryotypeError, "Failed to determine ploidy for #{str}"
|
116
|
+
end
|
117
|
+
end
|
118
|
+
return ploidy
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
data/lib/cytogenetics.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'cytogenetics/aberration'
|
2
|
+
require 'cytogenetics/breakpoint'
|
3
|
+
require 'cytogenetics/chromosome'
|
4
|
+
require 'cytogenetics/chromosome_aberrations'
|
5
|
+
require 'cytogenetics/fragment'
|
6
|
+
require 'cytogenetics/karyotype'
|
7
|
+
require 'cytogenetics/karyotype_error'
|
8
|
+
|
9
|
+
|
10
|
+
require 'cytogenetics/utils/karyotype_reader'
|
11
|
+
require 'cytogenetics/utils/band_reader'
|
12
|
+
|
13
|
+
require 'yaml'
|
14
|
+
require 'logger'
|
15
|
+
|
16
|
+
module Cytogenetics
|
17
|
+
|
18
|
+
class << self
|
19
|
+
def logger=(log)
|
20
|
+
@clog = log
|
21
|
+
end
|
22
|
+
|
23
|
+
def logger
|
24
|
+
unless @clog
|
25
|
+
@clog = Logger.new(STDOUT)
|
26
|
+
@clog.level = Logger::FATAL
|
27
|
+
end
|
28
|
+
@clog
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.karyotype(kary_str)
|
33
|
+
return Karyotype.new(kary_str)
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
metadata
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cytogenetics
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Sarah Killcoyne
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-11-26 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Karyotype parser based on ISCN specification. Note that there are still
|
15
|
+
many bugs. The ISCN language is poorly followed by most users so the parser is still
|
16
|
+
being developed.
|
17
|
+
email: sarah.killcoyne@uni.lu
|
18
|
+
executables: []
|
19
|
+
extensions: []
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- lib/cytogenetics/aberration.rb
|
23
|
+
- lib/cytogenetics/breakpoint.rb
|
24
|
+
- lib/cytogenetics/chromosome.rb
|
25
|
+
- lib/cytogenetics/chromosome_aberrations.rb
|
26
|
+
- lib/cytogenetics/fragment.rb
|
27
|
+
- lib/cytogenetics/karyotype.rb
|
28
|
+
- lib/cytogenetics/karyotype_error.rb
|
29
|
+
- lib/cytogenetics/utils/band_reader.rb
|
30
|
+
- lib/cytogenetics/utils/karyotype_reader.rb
|
31
|
+
- lib/cytogenetics/version.rb
|
32
|
+
- lib/cytogenetics.rb
|
33
|
+
homepage:
|
34
|
+
licenses:
|
35
|
+
- http://www.apache.org/licenses/LICENSE-2.0.html
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
require_paths:
|
39
|
+
- lib
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ! '>='
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
requirements: []
|
53
|
+
rubyforge_project:
|
54
|
+
rubygems_version: 1.8.24
|
55
|
+
signing_key:
|
56
|
+
specification_version: 3
|
57
|
+
summary: Karyotype parser based on ISCN specification.
|
58
|
+
test_files: []
|