cytogenetics 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/cytogenetics/aberration.rb +157 -0
- data/lib/cytogenetics/breakpoint.rb +37 -0
- data/lib/cytogenetics/chromosome.rb +68 -0
- data/lib/cytogenetics/chromosome_aberrations.rb +188 -0
- data/lib/cytogenetics/fragment.rb +45 -0
- data/lib/cytogenetics/karyotype.rb +172 -0
- data/lib/cytogenetics/karyotype_error.rb +4 -0
- data/lib/cytogenetics/utils/band_reader.rb +30 -0
- data/lib/cytogenetics/utils/karyotype_reader.rb +121 -0
- data/lib/cytogenetics/version.rb +3 -0
- data/lib/cytogenetics.rb +36 -0
- metadata +58 -0
@@ -0,0 +1,157 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Cytogenetics
|
4
|
+
class Aberration
|
5
|
+
|
6
|
+
attr_accessor :breakpoints
|
7
|
+
attr_reader :abr, :ab_objs, :fragments
|
8
|
+
|
9
|
+
class<<self
|
10
|
+
def instantiate_aberrations
|
11
|
+
aberration_obj = {}
|
12
|
+
ChromosomeAberrations.constants.each do |ca|
|
13
|
+
abr_obj = ChromosomeAberrations.const_get(ca)
|
14
|
+
aberration_obj[abr_obj.type.to_sym] = abr_obj
|
15
|
+
end
|
16
|
+
return aberration_obj
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.type
|
21
|
+
return @kt
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.regex
|
25
|
+
return @rx
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.all_regex
|
29
|
+
rx = {}
|
30
|
+
ChromosomeAberrations.constants.each do |ca|
|
31
|
+
ca_obj = ChromosomeAberrations.const_get(ca)
|
32
|
+
rx[ca_obj.type.to_sym] = ca_obj.regex
|
33
|
+
end
|
34
|
+
return rx
|
35
|
+
end
|
36
|
+
|
37
|
+
# instantiate these
|
38
|
+
def self.aberration_objs
|
39
|
+
@ab_objs ||= self.instantiate_aberrations
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.aberration_type
|
43
|
+
abr_breaks = Aberration.all_regex.keys
|
44
|
+
abr_breaks.delete_if { |c| c.to_s.match(/gain|loss/) }
|
45
|
+
return abr_breaks
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.classify_aberration(abr)
|
49
|
+
Aberration.all_regex.each_pair do |k, regex|
|
50
|
+
return k if abr.match(regex)
|
51
|
+
end
|
52
|
+
return "unk".to_sym
|
53
|
+
end
|
54
|
+
|
55
|
+
def initialize(str)
|
56
|
+
config_logging()
|
57
|
+
|
58
|
+
@abr = str
|
59
|
+
@breakpoints = []; @fragments = []
|
60
|
+
|
61
|
+
#regex = Aberration.regex[@type.to_sym]
|
62
|
+
# make sure it really is an inversion first
|
63
|
+
#raise KaryotypeError, "#{str} does not appear to be a #{self.class}" unless str.match(self.regex)
|
64
|
+
get_breakpoints() #(@abr)
|
65
|
+
@breakpoints.flatten!
|
66
|
+
end
|
67
|
+
|
68
|
+
def remove_breakpoint(bp)
|
69
|
+
removed = @breakpoints.index(bp)
|
70
|
+
@breakpoints.delete_at(removed) if removed
|
71
|
+
return removed
|
72
|
+
end
|
73
|
+
|
74
|
+
def to_s
|
75
|
+
"#{@abr}: #{@breakpoints.join(',')}"
|
76
|
+
end
|
77
|
+
|
78
|
+
:private
|
79
|
+
|
80
|
+
def get_breakpoints
|
81
|
+
chr_i = find_chr(@abr)
|
82
|
+
return if chr_i.nil?
|
83
|
+
|
84
|
+
band_i = find_bands(@abr, chr_i[:end_index])
|
85
|
+
|
86
|
+
unless band_i.nil? # breakpoints aren't added if there is no band information
|
87
|
+
chr_i[:chr].each_with_index do |c, i|
|
88
|
+
fragments = find_fragments(band_i[:bands][i])
|
89
|
+
fragments.each { |f| @breakpoints << Breakpoint.new(c, f, self.class.type) }
|
90
|
+
end
|
91
|
+
else
|
92
|
+
## No band --> TODO add this as information somewhere but not as a breakpoint
|
93
|
+
#@breakpoints << Breakpoint.new(c, "", @type)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Parsing aberration strings to pull out the chromosome and band definitions
|
98
|
+
# These will result in breakpoint information
|
99
|
+
def find_chr(str)
|
100
|
+
chr_s = str.index(/\(/, 0)
|
101
|
+
chr_e = str.index(/\)/, chr_s)
|
102
|
+
chrs = str[chr_s+1..chr_e-1].split(/;|:/)
|
103
|
+
chrs.each do |chr|
|
104
|
+
unless chr.match(/^\d+|X|Y$/)
|
105
|
+
log.warn("No chromosome defined from #{str}, skipped.")
|
106
|
+
return
|
107
|
+
end
|
108
|
+
end
|
109
|
+
return {:start_index => chr_s, :end_index => chr_e, :chr => chrs}
|
110
|
+
end
|
111
|
+
|
112
|
+
def find_bands(str, index)
|
113
|
+
band_info = nil
|
114
|
+
#raise KaryotypeError, "No bands defined in #{str}" if str.length.eql?(index+1)
|
115
|
+
if str.length.eql?(index+1)
|
116
|
+
log.warn("No bands defined in #{str}, skipped.")
|
117
|
+
return
|
118
|
+
end
|
119
|
+
|
120
|
+
ei = str.index(/\(/, index)
|
121
|
+
if str.match(/(q|p)(\d+|\?)/) and str[ei-1..ei].eql?(")(") # has bands and is not a translocation
|
122
|
+
band_s = str.index(/\(/, index)
|
123
|
+
band_e = str.index(/\)/, band_s)
|
124
|
+
band_e = str.length-1 if band_e.nil?
|
125
|
+
bands = str[band_s+1..band_e-1].split(/;|:/)
|
126
|
+
|
127
|
+
if str[band_s+1..band_e-1].match(/::/)
|
128
|
+
log.warn("Aberration defined using different language, not currently parsed skipping: #{@abr}")
|
129
|
+
return band_info
|
130
|
+
else
|
131
|
+
bands.map! { |b| b.sub(/-[q|p]\d+$/, "") } # sometimes bands are given a range, for our purposes we'll take the first one (CyDas appears to do this as well)
|
132
|
+
bands.each do |b|
|
133
|
+
unless b.match(/^[p|q]\d+(\.\d)?$/)
|
134
|
+
log.warn("Bands incorrectly defined in #{str}")
|
135
|
+
return band_info
|
136
|
+
end
|
137
|
+
end
|
138
|
+
band_info = {:start_index => band_s, :end_index => band_e, :bands => bands}
|
139
|
+
end
|
140
|
+
end
|
141
|
+
return band_info
|
142
|
+
end
|
143
|
+
|
144
|
+
# sometimes bands are defined for a single chr as p13q22
|
145
|
+
def find_fragments(str)
|
146
|
+
return str.scan(/([p|q]\d+)/).collect { |a| a[0] }
|
147
|
+
end
|
148
|
+
|
149
|
+
:private
|
150
|
+
def config_logging
|
151
|
+
@log = Cytogenetics.logger
|
152
|
+
#@log.progname = self.class.name
|
153
|
+
end
|
154
|
+
|
155
|
+
end
|
156
|
+
|
157
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Cytogenetics
|
2
|
+
|
3
|
+
class Breakpoint
|
4
|
+
|
5
|
+
attr_accessor :chr, :band, :type
|
6
|
+
|
7
|
+
def initialize(*args)
|
8
|
+
config_logging
|
9
|
+
c = args[0]; b = args[1]
|
10
|
+
@type = args[2] if args.length > 2
|
11
|
+
|
12
|
+
unless ((c.is_a? String and c.match(/\d+|X|Y/)) and (b.is_a? String and b.length > 0))
|
13
|
+
@log.error("#{c}#{b} is not a valid breakpoint")
|
14
|
+
raise ArgumentError, "#{c}#{b} is not a valid breakpoint"
|
15
|
+
end
|
16
|
+
@chr = c; @band = b
|
17
|
+
end
|
18
|
+
|
19
|
+
def arm
|
20
|
+
@band.match(/(q|p)\d+/)
|
21
|
+
return $1
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
return "#{@chr}#{@band}"
|
26
|
+
end
|
27
|
+
|
28
|
+
:private
|
29
|
+
|
30
|
+
def config_logging
|
31
|
+
@log = Cytogenetics.logger
|
32
|
+
#@log.progname = self.class.name
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'cytogenetics/utils/band_reader'
|
2
|
+
|
3
|
+
module Cytogenetics
|
4
|
+
|
5
|
+
class Chromosome
|
6
|
+
include BandReader
|
7
|
+
|
8
|
+
class<<self
|
9
|
+
attr_accessor :normal_bands
|
10
|
+
end
|
11
|
+
|
12
|
+
attr_reader :name, :aberrations
|
13
|
+
|
14
|
+
def initialize(*args)
|
15
|
+
config_logging()
|
16
|
+
chr = args[0]
|
17
|
+
chr = chr.to_s if chr.is_a?Fixnum
|
18
|
+
|
19
|
+
raise ArgumentError, "#{chr} is not a valid chromosome identifier." unless (chr.is_a? String and chr.match(/^\d+|X|Y$/))
|
20
|
+
@name = chr
|
21
|
+
@aberrations = []
|
22
|
+
@normal_bands = bands(@name, "resources/HsBands.txt") if (args.length > 1 and args[1].eql? true) ## TODO quit hardcoding
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_s
|
26
|
+
"#{@name}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def aberration(obj)
|
30
|
+
raise ArgumentError, "Not an Aberration object" unless obj.is_a? Aberration
|
31
|
+
|
32
|
+
#obj.breakpoints.each do |bp|
|
33
|
+
# log.warn("Band #{bp.to_s} doesn't exist. Removing.") if @normal_bands.index(bp.to_s).nil?
|
34
|
+
#end
|
35
|
+
|
36
|
+
## TODO Deal with bands, HOWEVER because the chromosome has aberration objects breakpoints can include
|
37
|
+
## bands for which no chromosome object is created
|
38
|
+
|
39
|
+
#obj.breakpoints.reject {|bp|
|
40
|
+
# @normal_bands.index(bp.to_s).nil?
|
41
|
+
#}
|
42
|
+
|
43
|
+
@aberrations << obj
|
44
|
+
end
|
45
|
+
|
46
|
+
def breakpoints
|
47
|
+
bps = []
|
48
|
+
@aberrations.each { |a| bps << a.breakpoints }
|
49
|
+
return bps
|
50
|
+
end
|
51
|
+
|
52
|
+
def fragments
|
53
|
+
frags = []
|
54
|
+
@aberrations.each do |a|
|
55
|
+
frags << a.fragments
|
56
|
+
end
|
57
|
+
frags
|
58
|
+
end
|
59
|
+
|
60
|
+
:private
|
61
|
+
def config_logging
|
62
|
+
@log = Cytogenetics.logger
|
63
|
+
#@log.progname = self.class.name
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
@@ -0,0 +1,188 @@
|
|
1
|
+
require 'cytogenetics/aberration'
|
2
|
+
|
3
|
+
module Cytogenetics
|
4
|
+
|
5
|
+
module ChromosomeAberrations
|
6
|
+
|
7
|
+
## INVERSION
|
8
|
+
class Inversion < Aberration
|
9
|
+
@kt = 'inv'
|
10
|
+
@rx = /^inv\((\d+|X|Y)\)/
|
11
|
+
end
|
12
|
+
|
13
|
+
## DUPLICATION
|
14
|
+
class Duplication < Aberration
|
15
|
+
@kt = 'dup'
|
16
|
+
@rx = /^dup\((\d+|X|Y)\)/
|
17
|
+
end
|
18
|
+
|
19
|
+
## INSERTION
|
20
|
+
class Insertion < Aberration
|
21
|
+
@kt = 'ins'
|
22
|
+
@rx = /^ins\((\d+|X|Y)\)/
|
23
|
+
end
|
24
|
+
|
25
|
+
## DELETION
|
26
|
+
class Deletion < Aberration
|
27
|
+
@kt = 'del'
|
28
|
+
@rx = /^del\((\d+|X|Y)\)/
|
29
|
+
end
|
30
|
+
|
31
|
+
## ADD (addition of unknown material)
|
32
|
+
class Addition < Aberration
|
33
|
+
@kt = 'add'
|
34
|
+
@rx = /^add\((\d+|X|Y)\)/
|
35
|
+
end
|
36
|
+
|
37
|
+
## ISOCHROMOSOME
|
38
|
+
class Isochromosome < Aberration
|
39
|
+
@kt = 'iso'
|
40
|
+
@rx = /^i\((\d+|X|Y)\)/
|
41
|
+
end
|
42
|
+
|
43
|
+
## DICENTRIC
|
44
|
+
class DicentricChromosome < Aberration
|
45
|
+
@kt = 'dic'
|
46
|
+
@rx = /^dic\((\d+|X|Y)[;|:](\d+|X|Y)\)/
|
47
|
+
|
48
|
+
#def get_breakpoints
|
49
|
+
# chr_i = find_chr(@abr)
|
50
|
+
# band_i = find_bands(@abr, chr_i[:end_index])
|
51
|
+
# chr_i[:chr].each_with_index do |c, i|
|
52
|
+
# @breakpoints << Breakpoint.new(c, band_i[:bands][i], 'dic')
|
53
|
+
# end
|
54
|
+
# # TODO am not sure how the dic rearrangment works, see this in CyDas dic(13;13)(q14;q32)
|
55
|
+
# #@fragments << Fragment.new( Breakpoint.new(@breakpoints[0].chr, "pter"), @breakpoints[0])
|
56
|
+
# #@fragments << Fragment.new( @breakpoints[1], Breakpoint.new(@breakpoints[1].chr, "#{@breakpoints[1].arm}ter"))
|
57
|
+
#end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
## RING ## TODO figure out the right regex for this
|
62
|
+
#class RingChromosome < Aberration
|
63
|
+
# @kt = 'ring'
|
64
|
+
# @rx = /^r\(/
|
65
|
+
#end
|
66
|
+
|
67
|
+
## ROBERTSONIAN
|
68
|
+
#class Robertsonian < Aberration
|
69
|
+
# @kt = 'rob'
|
70
|
+
# @rx = /^rob\(/
|
71
|
+
#end
|
72
|
+
|
73
|
+
## DERIVATIVE
|
74
|
+
class Derivative < Aberration
|
75
|
+
@kt = 'der'
|
76
|
+
@rx = /^der\((\d+|X|Y)\)/
|
77
|
+
|
78
|
+
def get_breakpoints
|
79
|
+
@aberrations = []
|
80
|
+
|
81
|
+
ab_objs = Aberration.aberration_objs
|
82
|
+
|
83
|
+
chr_i = find_chr(@abr)
|
84
|
+
derivative_abr = @abr[chr_i[:end_index]+1..@abr.length]
|
85
|
+
|
86
|
+
# separate different abnormalities within the derivative chromosome and clean it up to make it parseable
|
87
|
+
abnormalities = derivative_abr.scan(/([^\(\)]+\(([^\(\)]|\)\()*\))/).collect { |a| a[0] }
|
88
|
+
|
89
|
+
trans_bps = []
|
90
|
+
abnormalities.each do |abn|
|
91
|
+
abrclass = Aberration.classify_aberration(abn)
|
92
|
+
|
93
|
+
if abrclass.to_s.eql? 'unk' # not dealing with unknowns
|
94
|
+
log.warn("Cannot handle #{abn}, incorrect format.")
|
95
|
+
next
|
96
|
+
end
|
97
|
+
|
98
|
+
# special handling because translocations are written as a sliding window
|
99
|
+
# translocations should also only every have 2 breakpoints...
|
100
|
+
if abrclass.to_s.eql? ChromosomeAberrations::Translocation.type
|
101
|
+
trans = ChromosomeAberrations::Translocation.new(abn)
|
102
|
+
trans_bps << trans.breakpoints
|
103
|
+
@breakpoints << trans.breakpoints
|
104
|
+
else
|
105
|
+
ab_obj = ab_objs[abrclass].new(abn)
|
106
|
+
if ab_obj.breakpoints.length > 0
|
107
|
+
@aberrations << ab_obj
|
108
|
+
@breakpoints << ab_obj.breakpoints
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
trans_bps.delete_if { |c| c.empty? }
|
113
|
+
add_fragments(trans_bps.flatten!) if trans_bps.length > 0
|
114
|
+
end
|
115
|
+
|
116
|
+
:private
|
117
|
+
# have to reorder the array and then turn Breakpoints into fragments
|
118
|
+
def add_fragments(tbp_list)
|
119
|
+
sorted = []
|
120
|
+
tbp_list.each_with_index do |e, i|
|
121
|
+
if i <= 1
|
122
|
+
sorted << Breakpoint.new(e.chr, "#{e.arm}ter") if i.eql? 0
|
123
|
+
sorted << e
|
124
|
+
elsif i%2 == 0
|
125
|
+
sorted << tbp_list[i+1]
|
126
|
+
sorted << tbp_list[i]
|
127
|
+
end
|
128
|
+
end
|
129
|
+
sorted << Breakpoint.new(sorted[-1].chr, "#{sorted[-1].arm}ter")
|
130
|
+
sorted.each_slice(2).to_a.each do |pair|
|
131
|
+
@fragments << Fragment.new(pair[0], pair[1])
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
## TRANSLOCATION ... this is typically a subset of Derivative chromosomes, but have seen it on it's own
|
138
|
+
class Translocation < Aberration
|
139
|
+
@kt = 'trans'
|
140
|
+
@rx = /^t\((\d+|X|Y)[;|:](\d+|X|Y)\)/
|
141
|
+
|
142
|
+
## TWo ways of defining translocations:
|
143
|
+
## 1) t(1;3)(p31;p13)
|
144
|
+
def get_breakpoints
|
145
|
+
chr_i = find_chr(@abr)
|
146
|
+
band_i = find_bands(@abr, chr_i[:end_index])
|
147
|
+
unless band_i
|
148
|
+
log.warn("No bands defined in #{@abr}")
|
149
|
+
else
|
150
|
+
chr_i[:chr].each_with_index do |c, i|
|
151
|
+
@breakpoints << Breakpoint.new(c, band_i[:bands][i], 'trans')
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
## FRAGMENT
|
159
|
+
class ChromosomeFragment < Aberration
|
160
|
+
@kt = 'frag'
|
161
|
+
@rx = /^frag\((\d+|X|Y)\)/
|
162
|
+
end
|
163
|
+
|
164
|
+
## CHROMOSOME GAIN
|
165
|
+
class ChromosomeGain < Aberration
|
166
|
+
@kt = 'gain'
|
167
|
+
@rx = /^\+(\d+|X|Y)$/
|
168
|
+
|
169
|
+
def initialize
|
170
|
+
config_logging()
|
171
|
+
@abr = str.sub("+", "")
|
172
|
+
@breakpoints = []
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
## CHROMOSOME LOSS
|
177
|
+
class ChromosomeLoss < Aberration
|
178
|
+
@kt = 'loss'
|
179
|
+
@rx = /^-(\d+|X|Y)$/
|
180
|
+
|
181
|
+
def initialize
|
182
|
+
config_logging()
|
183
|
+
@abr = str.sub("-", "")
|
184
|
+
@breakpoints = []
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
|
2
|
+
module Cytogenetics
|
3
|
+
|
4
|
+
class Fragment
|
5
|
+
attr_reader :chr, :start, :end, :genes
|
6
|
+
|
7
|
+
def initialize(*args)
|
8
|
+
config_logging()
|
9
|
+
unless (args.length.eql? 2 and (args[0].is_a? Breakpoint and args[1].is_a? Breakpoint))
|
10
|
+
raise ArgumentError, "Expected arguments are missing or are not Breakpoints: #{args}"
|
11
|
+
end
|
12
|
+
|
13
|
+
#@genes = []
|
14
|
+
@start = args[0]
|
15
|
+
@end = args[1]
|
16
|
+
@chr = @start.chr
|
17
|
+
|
18
|
+
unless @start.chr.eql? @end.chr
|
19
|
+
raise GenomeStructureError, "Fragments must be within the same chromosome: #{args}"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def add_gene(gene)
|
24
|
+
@genes << gene
|
25
|
+
end
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
return "#{@start.to_s} --> #{@end.to_s}"
|
29
|
+
end
|
30
|
+
|
31
|
+
## TODO this will require length in basepairs of each band
|
32
|
+
#def length
|
33
|
+
#
|
34
|
+
#end
|
35
|
+
|
36
|
+
:private
|
37
|
+
|
38
|
+
def config_logging
|
39
|
+
@log = Cytogenetics.logger
|
40
|
+
#@log.progname = self.class.name
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
@@ -0,0 +1,172 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
require 'cytogenetics/utils/karyotype_reader'
|
4
|
+
|
5
|
+
module Cytogenetics
|
6
|
+
|
7
|
+
class Karyotype
|
8
|
+
|
9
|
+
@@haploid = 23
|
10
|
+
|
11
|
+
attr_reader :aberrations, :karyotype, :ploidy, :sex, :abnormal_chr, :normal_chr
|
12
|
+
|
13
|
+
class<<self
|
14
|
+
attr_accessor :aberration_objs, :unclear_aberrations, :log
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(karyotype_str)
|
18
|
+
config_logging()
|
19
|
+
raise ArgumentError, "#{karyotype_str} is not a karyotype." unless (karyotype_str.is_a? String and karyotype_str.length > 1)
|
20
|
+
@log.info("Reading karyotype #{karyotype_str}")
|
21
|
+
|
22
|
+
@karyotype = karyotype_str.gsub(/\s/, "")
|
23
|
+
@normal_chr = {}; @abnormal_chr = {}; @aberrations = {}; @unclear_aberrations = [];
|
24
|
+
setup_abberation_objs()
|
25
|
+
prep_karyotype()
|
26
|
+
handle_ploidy_diff()
|
27
|
+
analyze()
|
28
|
+
end
|
29
|
+
|
30
|
+
def analyze
|
31
|
+
Aberration.aberration_type.each do |abr_type|
|
32
|
+
next unless @aberrations.has_key? abr_type
|
33
|
+
regex = @aberration_obj[abr_type].regex
|
34
|
+
|
35
|
+
@aberrations[abr_type].each do |abr|
|
36
|
+
# if abr_type
|
37
|
+
abr.match(regex)
|
38
|
+
@log.warn("Aberration has two chromosomes #{abr} but only the first one is handled.") unless ($2.nil? or $1.eql? $2)
|
39
|
+
|
40
|
+
## TODO deal with the case of 2 chromosomes defined in the aberration
|
41
|
+
chr = Chromosome.new($1, true)
|
42
|
+
chr.aberration(@aberration_obj[abr_type].new(abr))
|
43
|
+
|
44
|
+
@abnormal_chr[chr.name] = [] unless @abnormal_chr.has_key? chr.name
|
45
|
+
@abnormal_chr[chr.name] << chr
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# get breakpoints for the karyotype
|
51
|
+
def report_breakpoints
|
52
|
+
bps = Array.new
|
53
|
+
@abnormal_chr.each_pair do |c, chr_list|
|
54
|
+
chr_list.each do |chr|
|
55
|
+
bps << chr.breakpoints
|
56
|
+
end
|
57
|
+
end
|
58
|
+
bps.delete_if { |c| c.empty? }
|
59
|
+
bps.flatten!
|
60
|
+
return bps
|
61
|
+
end
|
62
|
+
|
63
|
+
def report_fragments
|
64
|
+
frags = []
|
65
|
+
@abnormal_chr.each_pair do |c, chr_list|
|
66
|
+
chr_list.each do |chr|
|
67
|
+
frags << chr.fragments
|
68
|
+
end
|
69
|
+
end
|
70
|
+
frags.delete_if { |c| c.empty? }
|
71
|
+
frags.flatten!
|
72
|
+
return frags
|
73
|
+
end
|
74
|
+
|
75
|
+
def report_ploidy_change
|
76
|
+
pd = []
|
77
|
+
pd << @aberrations[:loss].map { |e| "-#{e}" } if @aberrations[:loss]
|
78
|
+
pd << @aberrations[:gain].map { |e| "+#{e}" } if @aberrations[:gain]
|
79
|
+
pd.flatten!
|
80
|
+
return pd
|
81
|
+
end
|
82
|
+
|
83
|
+
def summarize
|
84
|
+
summary = "NORMAL CHROMOSOMES\n"
|
85
|
+
@normal_chr.each_pair do |chr, count|
|
86
|
+
summary = "#{summary} #{chr}: #{count}\n"
|
87
|
+
end
|
88
|
+
|
89
|
+
summary = "#{summary}\nABNORMAL:"
|
90
|
+
@abnormal_chr.each_pair do |chr, list|
|
91
|
+
summary = "#{summary}\n#{chr}"
|
92
|
+
list.each do |c|
|
93
|
+
summary = "#{summary}\n#{c.aberrations}\n"
|
94
|
+
summary = "#{summary}\n#{c.breakpoints}\n"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# -------------------- # PRIVATE # -------------------- #
|
100
|
+
:private
|
101
|
+
def config_logging
|
102
|
+
@log = Cytogenetics.logger
|
103
|
+
#@log.progname = self.class.name
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
def setup_abberation_objs
|
108
|
+
@aberration_obj = Aberration.aberration_objs
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
def handle_ploidy_diff
|
113
|
+
@aberrations[:loss].each { |c| @normal_chr[c] -= 1 } if @aberrations[:loss]
|
114
|
+
@aberrations[:gain].each { |c| @normal_chr[c] += 1 } if @aberrations[:gain]
|
115
|
+
end
|
116
|
+
|
117
|
+
# determine ploidy & gender, clean up each aberration and drop any "unknown"
|
118
|
+
def prep_karyotype
|
119
|
+
@karyotype.gsub!(/\s/, "")
|
120
|
+
clones = @karyotype.scan(/(\[\d+\])/).collect { |a| a[0] }
|
121
|
+
@log.warn("Karyotype is a collection of clones, analysis may be inaccurate.") if clones.length > 3
|
122
|
+
|
123
|
+
@karyotype.gsub!(/\[\d+\]/, "") # don't care about numbers of cells: [5]
|
124
|
+
|
125
|
+
(pl, sc) = @karyotype.split(",")[0..1]
|
126
|
+
if (pl and sc)
|
127
|
+
@ploidy = KaryotypeReader.calculate_ploidy(pl, @@haploid)
|
128
|
+
sex_chr = KaryotypeReader.determine_sex(sc)
|
129
|
+
else
|
130
|
+
raise KaryotypeError, "'#{@karyotype}' is not a valid karyotype. Ploidy and sex defnitions are absent"
|
131
|
+
end
|
132
|
+
|
133
|
+
st = sex_chr.values.inject { |sum, v| sum+v }
|
134
|
+
@sex = nil
|
135
|
+
karyotype_index = 1 # sometimes the sex is not indicated and there's no case information to figure it out
|
136
|
+
if st > 0
|
137
|
+
@sex = sex_chr.keys.join("")
|
138
|
+
karyotype_index = 2
|
139
|
+
end
|
140
|
+
|
141
|
+
(Array(1..23)).each { |c| @normal_chr[c.to_s] = @ploidy.to_i }
|
142
|
+
|
143
|
+
sex_chr.each_pair { |c, p| @normal_chr[c] = p.to_i }
|
144
|
+
|
145
|
+
# deal with the most common karyotype string inconsistencies
|
146
|
+
cleaned_karyotype = []
|
147
|
+
|
148
|
+
@karyotype.split(",")[karyotype_index..-1].each do |abr|
|
149
|
+
cleaned_karyotype |= [cleaned_karyotype, KaryotypeReader.cleanup(abr)].flatten
|
150
|
+
end
|
151
|
+
@karyotype = cleaned_karyotype
|
152
|
+
|
153
|
+
# classify each type of aberration in the karyotype
|
154
|
+
@karyotype.each do |k|
|
155
|
+
abrclass = Aberration.classify_aberration(k)
|
156
|
+
@aberrations[abrclass] = [] unless @aberrations.has_key? abrclass
|
157
|
+
@aberrations[abrclass] << k.sub(/^(\+|-)?/, "")
|
158
|
+
end
|
159
|
+
|
160
|
+
@aberrations.each_pair do |abrclass, abrlist|
|
161
|
+
next if (abrclass.eql? ChromosomeAberrations::ChromosomeGain.type or abrclass.eql? ChromosomeAberrations::ChromosomeLoss.type)
|
162
|
+
# aberrations other than chromosome gains/losses should be uniquely represented
|
163
|
+
|
164
|
+
counts = abrlist.inject(Hash.new(0)) { |h, i| h[i] += 1; h }
|
165
|
+
counts.each_pair { |k, v| @log.warn("#{k} was seen multiple times. Analyzed only once.") if v > 1 }
|
166
|
+
|
167
|
+
@aberrations[abrclass] = abrlist.uniq
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
|
2
|
+
module Cytogenetics
|
3
|
+
|
4
|
+
module BandReader
|
5
|
+
|
6
|
+
@bands_by_chr = {}
|
7
|
+
|
8
|
+
def bands(chr, file)
|
9
|
+
file = File.open(file, 'r') unless file.is_a? File
|
10
|
+
bands = read_file(file)
|
11
|
+
bds = bands[chr]
|
12
|
+
bds.uniq!
|
13
|
+
return bds
|
14
|
+
end
|
15
|
+
|
16
|
+
def read_file(file)
|
17
|
+
band_by_chr = {}
|
18
|
+
file.each_line do |line|
|
19
|
+
line.chomp!
|
20
|
+
line.match(/^(\d+|X|Y)([p|q].*)/)
|
21
|
+
c = $1; b = $2
|
22
|
+
band_by_chr[c] = Array.new unless band_by_chr.has_key? c
|
23
|
+
band_by_chr[c] << "#{c}#{b}"
|
24
|
+
band_by_chr[c] << "#{c}#{$1}" if b.match(/([p|q]\d+)\.\d+/)
|
25
|
+
end
|
26
|
+
return band_by_chr
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
require 'cytogenetics/karyotype_error'
|
2
|
+
|
3
|
+
module Cytogenetics
|
4
|
+
class KaryotypeReader
|
5
|
+
|
6
|
+
def self.config_logging
|
7
|
+
@log = Cytogenetics.logger
|
8
|
+
#@log.progname = self.name
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
def self.cleanup(abr)
|
13
|
+
config_logging
|
14
|
+
|
15
|
+
new_abr = []
|
16
|
+
|
17
|
+
# +t(13;X)(q13;p12) doesn't need a +
|
18
|
+
abr.sub!(/^[\+|-]/, "") unless abr.match(/^[\+|-][\d|X|Y]+$/)
|
19
|
+
|
20
|
+
# not going to bother with aberrations that are unclear/unknown '?' or with '**'
|
21
|
+
if (abr.match(/\?|\*\*/))
|
22
|
+
@log.warn("Removing aberration with unknown/unclear information: #{abr}")
|
23
|
+
return new_abr
|
24
|
+
end
|
25
|
+
|
26
|
+
# 13x2 is normal, 13x3 is a duplicate and should read +13
|
27
|
+
if abr.match(/^([\d+|X|Y]+)x(\d+)/)
|
28
|
+
chr = $1; dups = $2.to_i
|
29
|
+
if dups.eql? 0 # managed to lose both chromosomes in a diploidy karyotype
|
30
|
+
(Array(1..dups)).map { new_abr.push("-#{chr}") }
|
31
|
+
elsif dups > 2 # sometimes you have 13x3, really just means 1 additional chr 13 since normal ploidy is 2
|
32
|
+
dups -= 2
|
33
|
+
(Array(1..dups)).map { new_abr.push("+#{chr}") }
|
34
|
+
elsif dups.eql?(1)
|
35
|
+
new_abr.push("-#{chr}")
|
36
|
+
end
|
37
|
+
# add(9)(p21)x2 or add(7)x2 should indicate that this "additional material of unk origin" happened twice
|
38
|
+
elsif abr.match(/(.*)x(\d+)$/)
|
39
|
+
a = $1; dups = $2.to_i
|
40
|
+
(Array(1..dups)).map { new_abr.push(a) }
|
41
|
+
# del(7) should be -7 but not del(7)(q12)
|
42
|
+
else # everything else
|
43
|
+
new_abr.push(abr)
|
44
|
+
end
|
45
|
+
|
46
|
+
return new_abr
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.determine_sex(str)
|
50
|
+
config_logging
|
51
|
+
|
52
|
+
sex_chr = {}
|
53
|
+
['X', 'Y'].each { |c| sex_chr[c] = 0 }
|
54
|
+
|
55
|
+
unless str.match(/^(X|Y)+$/)
|
56
|
+
@log.warn("Definition of gender incorrect (#{str})")
|
57
|
+
else
|
58
|
+
#raise KaryotypeError, "Definition of gender incorrect (#{str})" unless str.match(/^(X|Y)+$/)
|
59
|
+
# ploidy number makes no difference since this string will tell us how many or at least what the gender should be
|
60
|
+
|
61
|
+
chrs = str.match(/([X|Y]+)/).to_s.split(//)
|
62
|
+
chrs.each { |c| sex_chr[c] +=1 }
|
63
|
+
|
64
|
+
# assume this was an XY karyotype that may have lost the Y, have only seen this in
|
65
|
+
# severely affected karyotypes NOT TRUE, some karyotypes are just not defined correctly
|
66
|
+
# often XX -X is listed as X,... Cannot assume it's a male missing Y
|
67
|
+
#sex_chr['Y'] += 1 if (chrs.length.eql?(1) and chrs[0].eql?('X'))
|
68
|
+
end
|
69
|
+
|
70
|
+
return sex_chr
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.calculate_ploidy(str, haploid)
|
74
|
+
config_logging
|
75
|
+
|
76
|
+
str.sub!(/<.{2,}>/, "")
|
77
|
+
str = $1 if str.match(/\d+\((\d+-\d+)\)/)
|
78
|
+
|
79
|
+
diploid = haploid*2
|
80
|
+
triploid = haploid*3
|
81
|
+
quadraploid = haploid*4
|
82
|
+
|
83
|
+
# typically see di- tri- quad- if more than that it should be noted
|
84
|
+
ploidy = nil
|
85
|
+
min = diploid
|
86
|
+
max = diploid
|
87
|
+
#if str.match(/<\+(\d)n>/) # sometimes see this odd configuration: 46<+3n>
|
88
|
+
# ploidy = $1
|
89
|
+
if str.match(/(\d+)[-|~](\d+)/) # num and range or just range: 46-53
|
90
|
+
min = $1.to_i; max = $2.to_i
|
91
|
+
elsif str.match(/^(\d+)/) # single num: 72
|
92
|
+
min = $1.to_i; max = $1.to_i
|
93
|
+
end
|
94
|
+
|
95
|
+
if min < haploid
|
96
|
+
@log.warn("Ploidy determination may be bad as the min was less than haploid (#{str}). Setting to haploid.")
|
97
|
+
min = haploid
|
98
|
+
end
|
99
|
+
|
100
|
+
if ploidy.nil?
|
101
|
+
case
|
102
|
+
when (min.eql? diploid and max.eql? diploid)
|
103
|
+
@log.debug("Normal ploidy: #{str}")
|
104
|
+
ploidy = 2
|
105
|
+
when ((min >= haploid and max <= diploid) or (min <= diploid and max < triploid))
|
106
|
+
@log.debug("Relatively normal ploidy #{str}")
|
107
|
+
ploidy = 2
|
108
|
+
when (min >= haploid and max < quadraploid)
|
109
|
+
@log.debug("Triploid #{str}")
|
110
|
+
ploidy = 3
|
111
|
+
when (max >= quadraploid)
|
112
|
+
@log.debug("Quadraploid #{str}")
|
113
|
+
ploidy = 4
|
114
|
+
else
|
115
|
+
raise KaryotypeError, "Failed to determine ploidy for #{str}"
|
116
|
+
end
|
117
|
+
end
|
118
|
+
return ploidy
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
data/lib/cytogenetics.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'cytogenetics/aberration'
|
2
|
+
require 'cytogenetics/breakpoint'
|
3
|
+
require 'cytogenetics/chromosome'
|
4
|
+
require 'cytogenetics/chromosome_aberrations'
|
5
|
+
require 'cytogenetics/fragment'
|
6
|
+
require 'cytogenetics/karyotype'
|
7
|
+
require 'cytogenetics/karyotype_error'
|
8
|
+
|
9
|
+
|
10
|
+
require 'cytogenetics/utils/karyotype_reader'
|
11
|
+
require 'cytogenetics/utils/band_reader'
|
12
|
+
|
13
|
+
require 'yaml'
|
14
|
+
require 'logger'
|
15
|
+
|
16
|
+
module Cytogenetics
|
17
|
+
|
18
|
+
class << self
|
19
|
+
def logger=(log)
|
20
|
+
@clog = log
|
21
|
+
end
|
22
|
+
|
23
|
+
def logger
|
24
|
+
unless @clog
|
25
|
+
@clog = Logger.new(STDOUT)
|
26
|
+
@clog.level = Logger::FATAL
|
27
|
+
end
|
28
|
+
@clog
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.karyotype(kary_str)
|
33
|
+
return Karyotype.new(kary_str)
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
metadata
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cytogenetics
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Sarah Killcoyne
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-11-26 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Karyotype parser based on ISCN specification. Note that there are still
|
15
|
+
many bugs. The ISCN language is poorly followed by most users so the parser is still
|
16
|
+
being developed.
|
17
|
+
email: sarah.killcoyne@uni.lu
|
18
|
+
executables: []
|
19
|
+
extensions: []
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- lib/cytogenetics/aberration.rb
|
23
|
+
- lib/cytogenetics/breakpoint.rb
|
24
|
+
- lib/cytogenetics/chromosome.rb
|
25
|
+
- lib/cytogenetics/chromosome_aberrations.rb
|
26
|
+
- lib/cytogenetics/fragment.rb
|
27
|
+
- lib/cytogenetics/karyotype.rb
|
28
|
+
- lib/cytogenetics/karyotype_error.rb
|
29
|
+
- lib/cytogenetics/utils/band_reader.rb
|
30
|
+
- lib/cytogenetics/utils/karyotype_reader.rb
|
31
|
+
- lib/cytogenetics/version.rb
|
32
|
+
- lib/cytogenetics.rb
|
33
|
+
homepage:
|
34
|
+
licenses:
|
35
|
+
- http://www.apache.org/licenses/LICENSE-2.0.html
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
require_paths:
|
39
|
+
- lib
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ! '>='
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
requirements: []
|
53
|
+
rubyforge_project:
|
54
|
+
rubygems_version: 1.8.24
|
55
|
+
signing_key:
|
56
|
+
specification_version: 3
|
57
|
+
summary: Karyotype parser based on ISCN specification.
|
58
|
+
test_files: []
|