text_alignment 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,160 @@
1
+ #!/usr/bin/env ruby
2
+ require 'diff-lcs'
3
+
4
+ module TextAlignment; end unless defined? TextAlignment
5
+
6
+ # change the class definition of ContextChange to allow update of the two instance variables
7
+ class Diff::LCS::ContextChange
8
+ attr_accessor :old_position, :new_position
9
+ end
10
+
11
+ # It finds minimal lcs and sdiff of the given strings, str1 and str2.
12
+ # It relies on the diff-lcs gem for the computation of lcs table.
13
+ class TextAlignment::LCSMin
14
+ attr_reader :sdiff, :lcs, :m1_initial, :m1_final, :m2_initial, :m2_final
15
+
16
+ PLACEHOLDER_CHAR = '_'
17
+
18
+ def initialize (str1, str2)
19
+ raise ArgumentError, "nil string" if str1.nil? || str2.nil?
20
+ raise ArgumentError, "empty string" if str1.empty? || str2.empty?
21
+
22
+ # str1 is copied as it is.
23
+ # str2 is copied with w/s characters replaced with the placeholder characters,
24
+ # to avoid overfitting to w/s characters during LCS computation.
25
+ @str1 = str1
26
+ @str2 = str2.gsub(/\s/, PLACEHOLDER_CHAR)
27
+
28
+ # find the corresponding minimal range of the two strings
29
+ r = _find_min_range(0, @str1.length - 1, 0, @str2.length - 1)
30
+ @m1_initial, @m1_final, @m2_initial, @m2_final = r[:m1_initial], r[:m1_final], r[:m2_initial], r[:m2_final]
31
+
32
+ if @m1_initial.nil?
33
+ @sdiff = nil
34
+ @lcs = 0
35
+ else
36
+ # compute sdiff and lcs
37
+ # here the original str2 is used with all the w/s characters preserved.
38
+ @sdiff = Diff::LCS.sdiff(@str1[@m1_initial..@m1_final], str2[@m2_initial..@m2_final])
39
+ @lcs = @sdiff.count{|d| d.action == '='}
40
+
41
+ # adjust the position values of sdiff
42
+ @sdiff.each do |h|
43
+ h.old_position += @m1_initial unless h.old_position.nil?
44
+ h.new_position += @m2_initial unless h.new_position.nil?
45
+ end
46
+
47
+ (0 ... @m2_initial).reverse_each{|i| @sdiff.unshift(Diff::LCS::ContextChange.new('+', nil, nil, i, @str2[i]))}
48
+ (0 ... @m1_initial).reverse_each{|i| @sdiff.unshift(Diff::LCS::ContextChange.new('-', i, @str1[i], nil, nil))}
49
+ (@m1_final + 1 ... @str1.length).each{|i| @sdiff.push(Diff::LCS::ContextChange.new('-', i, @str1[i], nil, nil))}
50
+ (@m2_final + 1 ... @str2.length).each{|i| @sdiff.push(Diff::LCS::ContextChange.new('+', nil, nil, i, @str2[i]))}
51
+ end
52
+ end
53
+
54
+ def _find_min_range (m1_initial, m1_final, m2_initial, m2_final, clcs = 0)
55
+ return nil if (m1_final - m1_initial < 0) || (m2_final - m2_initial < 0)
56
+ sdiff = Diff::LCS.sdiff(@str1[m1_initial..m1_final], @str2[m2_initial..m2_final])
57
+ lcs = sdiff.count{|d| d.action == '='}
58
+
59
+ return nil if lcs == 0
60
+ return nil if lcs < clcs
61
+
62
+ match_last = sdiff.rindex{|d| d.action == '='}
63
+ m1_final = sdiff[match_last].old_position + m1_initial
64
+ m2_final = sdiff[match_last].new_position + m2_initial
65
+
66
+ match_first = sdiff.index{|d| d.action == '='}
67
+ m1_initial = sdiff[match_first].old_position + m1_initial
68
+ m2_initial = sdiff[match_first].new_position + m2_initial
69
+
70
+ # attempt for shorter match
71
+ if ((m1_final - m1_initial) > (m2_final - m2_initial))
72
+ r = _find_min_range(m1_initial + 1, m1_final, m2_initial, m2_final, lcs)
73
+ return r unless r.nil?
74
+ r = _find_min_range(m1_initial, m1_final - 1, m2_initial, m2_final, lcs)
75
+ return r unless r.nil?
76
+ else
77
+ r = _find_min_range(m1_initial, m1_final, m2_initial + 1, m2_final, lcs)
78
+ return r unless r.nil?
79
+ r = _find_min_range(m1_initial, m1_final, m2_initial, m2_final - 1, lcs)
80
+ return r unless r.nil?
81
+ end
82
+
83
+ return {
84
+ m1_initial: m1_initial,
85
+ m1_final: m1_final,
86
+ m2_initial: m2_initial,
87
+ m2_final: m2_final
88
+ }
89
+ end
90
+
91
+ def num_big_gaps (sdiff, initial, last)
92
+ raise ArgumentError, "nil sdiff" if sdiff.nil?
93
+ raise ArgumentError, "invalid indice: #{initial}, #{last}" unless last >= initial
94
+
95
+ state1 = :initial
96
+ state2 = :initial
97
+ gaps1 = []
98
+ gaps2 = []
99
+
100
+ (initial .. last).each do |i|
101
+ case sdiff[i].action
102
+ when '='
103
+ state1 = :continue
104
+ state2 = :continue
105
+ when '!'
106
+ gaps1 << 1
107
+ state1 = :break
108
+
109
+ if state2 == :break
110
+ gaps2[-1] += 1
111
+ else
112
+ gaps2 << 1
113
+ end
114
+ state2 = :continue
115
+ when '+'
116
+ if state1 == :break
117
+ gaps1[-1] += 1
118
+ else
119
+ gaps1 << 1
120
+ end
121
+ state1 = :break
122
+ when '-'
123
+ if state2 == :break
124
+ gaps2[-1] += 1
125
+ else
126
+ gaps2 << 1
127
+ end
128
+ state2 = :break
129
+ end
130
+ end
131
+
132
+ num_big_gaps1 = gaps1.select{|g| g > MAX_LEN_BIG_GAP}.length
133
+ num_big_gaps2 = gaps2.select{|g| g > MAX_LEN_BIG_GAP}.length
134
+ num_big_gaps1 + num_big_gaps2
135
+ end
136
+
137
+ end
138
+
139
+
140
+ if __FILE__ == $0
141
+ require 'json'
142
+ require 'text_alignment/lcs_cdiff'
143
+
144
+ str2 = 'abcde'
145
+ str1 = 'naxbyzabcdexydzem'
146
+
147
+ str1 = "TI - Identification of a region which directs the monocytic activity of the\n colony-stimulating factor 1 (macrophage colony-stimulating factor) receptor\n promoter and binds PEBP2/CBF (AML1)."
148
+ str2 = "Identification of a region which directs the monocytic activity of the colony-stimulating factor 1 (macrophage colony-stimulating factor) receptor promoter and binds PEBP2/CBF (AML1).\nThe receptor for the macrophage colony-stimulating factor (or colony-stimulating factor 1 [CSF-1]) is expressed from different promoters in monocytic cells and placental trophoblasts. We have demonstrated that the monocyte-specific expression of the CSF-1 receptor is regulated at the level of transcription by a tissue-specific promoter whose activity is stimulated by the monocyte/B-cell-specific transcription factor PU.1 (D.-E. Zhang, C.J. Hetherington, H.-M. Chen, and D.G. Tenen, Mol. Cell. Biol. 14:373-381, 1994). Here we report that the tissue specificity of this promoter is also mediated by sequences in a region II (bp -88 to -59), which lies 10 bp upstream from the PU.1-binding site. When analyzed by DNase footprinting, region II was protected preferentially in monocytic cells. Electrophoretic mobility shift assays confirmed that region II interacts specifically with nuclear proteins from monocytic cells. Two gel shift complexes (Mono A and Mono B) were formed with separate sequence elements within this region. Competition and supershift experiments indicate that Mono B contains a member of the polyomavirus enhancer-binding protein 2/core-binding factor (PEBP2/CBF) family, which includes the AML1 gene product, while Mono A is a distinct complex preferentially expressed in monocytic cells. Promoter constructs with mutations in these sequence elements were no longer expressed specifically in monocytes. Furthermore, multimerized region II sequence elements enhanced the activity of a heterologous thymidine kinase promoter in monocytic cells but not other cell types tested. These results indicate that the monocyte/B-cell-specific transcription factor PU.1 and the Mono A and Mono B protein complexes act in concert to regulate monocyte-specific transcription of the CSF-1 receptor."
149
+ # str2 = "Identification of a region which directs the monocytic activity of the colony-stimulating factor 1 (macrophage colony-stimulating factor) receptor promoter and binds PEBP2/CBF (AML1).\nThe receptor for the macrophage colony-stimulating factor (or colony-stimulating factor 1 [CSF-1]) is expressed from different promoters in monocytic cells and placental trophoblasts."
150
+
151
+ if ARGV.length == 2
152
+ str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
153
+ str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
154
+ end
155
+
156
+ lcsmin = TextAlignment::LCSMin.new(str1, str2)
157
+ # puts lcs
158
+ # sdiff.each {|h| p h}
159
+ puts TextAlignment.sdiff2cdiff(lcsmin.sdiff)
160
+ end
@@ -0,0 +1,75 @@
1
+ module TextAlignment
2
+
3
+ TextAlignment::MAPPINGS = [
4
+ ["©", "(c)"], #U+00A9 (Copyright Sign)
5
+
6
+ ["α", "alpha"], #U+03B1 (greek small letter alpha)
7
+ ["β", "beta"], #U+03B2 (greek small letter beta)
8
+ ["γ", "gamma"], #U+03B3 (greek small letter gamma)
9
+ ["δ", "delta"], #U+03B4 (greek small letter delta)
10
+ ["ε", "epsilon"], #U+03B5 (greek small letter epsilon)
11
+ ["ζ", "zeta"], #U+03B6 (greek small letter zeta)
12
+ ["η", "eta"], #U+03B7 (greek small letter eta)
13
+ ["θ", "theta"], #U+03B7 (greek small letter eta)
14
+ ["ι", "iota"], #U+03B7 (greek small letter eta)
15
+ ["κ", "kappa"], #U+03BA (greek small letter kappa)
16
+ ["λ", "lambda"], #U+03BB (greek small letter lambda)
17
+ ["λ", "lamda"], #U+03BB (greek small letter lambda)
18
+ ["μ", "mu"], #U+03BC (greek small letter mu)
19
+ ["ν", "nu"], #U+03BD (greek small letter nu)
20
+ ["ξ", "xi"], #U+03BE (greek small letter xi)
21
+ ["ο", "omicron"], #U+03BF (greek small letter omicron)
22
+ ["π", "pi"], #U+03C0 (greek small letter pi)
23
+ ["ρ", "rho"], #U+03C1 (greek small letter rho)
24
+ ["σ", "sigma"], #U+03C3 (greek small letter sigma)
25
+ ["τ", "tau"], #U+03C4 (greek small letter tau)
26
+ ["υ", "upsilon"], #U+03C5 (greek small letter upsilon)
27
+ ["φ", "phi"], #U+03C6 (greek small letter phi)
28
+ ["χ", "chi"], #U+03C7 (greek small letter chi)
29
+ ["ψ", "psi"], #U+03C8 (greek small letter psi)
30
+ ["ω", "omega"], #U+03C9 (greek small letter omega)
31
+
32
+ ["Α", "Alpha"], #U+0391 (greek capital letter alpha)
33
+ ["Β", "Beta"], #U+0392 (greek capital letter beta)
34
+ ["Γ", "Gamma"], #U+0393 (greek capital letter gamma)
35
+ ["Δ", "Delta"], #U+0394 (greek capital letter delta)
36
+ ["Ε", "Epsilon"], #U+0395 (greek capital letter epsilon)
37
+ ["Ζ", "Zeta"], #U+0396 (greek capital letter zeta)
38
+ ["Η", "Eta"], #U+0397 (greek capital letter eta)
39
+ ["Θ", "Theta"], #U+0398 (greek capital letter theta)
40
+ ["Ι", "Iota"], #U+0399 (greek capital letter iota)
41
+ ["Κ", "Kappa"], #U+039A (greek capital letter kappa)
42
+ ["Λ", "Lambda"], #U+039B (greek capital letter lambda)
43
+ ["Λ", "Lamda"], #U+039B (greek capital letter lambda)
44
+ ["Μ", "Mu"], #U+039C (greek capital letter mu)
45
+ ["Ν", "Nu"], #U+039D (greek capital letter nu)
46
+ ["Ξ", "Xi"], #U+039E (greek capital letter xi)
47
+ ["Ο", "Omicron"], #U+039F (greek capital letter omicron)
48
+ ["Π", "Pi"], #U+03A0 (greek capital letter pi)
49
+ ["Ρ", "Rho"], #U+03A1 (greek capital letter rho)
50
+ ["Σ", "Sigma"], #U+03A3 (greek capital letter sigma)
51
+ ["Τ", "Tau"], #U+03A4 (greek capital letter tau)
52
+ ["Υ", "Upsilon"], #U+03A5 (greek capital letter upsilon)
53
+ ["Φ", "Phi"], #U+03A6 (greek capital letter phi)
54
+ ["Χ", "Chi"], #U+03A7 (greek capital letter chi)
55
+ ["Ψ", "Psi"], #U+03A8 (greek capital letter Psi)
56
+ ["Ω", "Omega"], #U+03A9 (greek capital letter omega)
57
+
58
+ ["ϕ", "phi"], #U+03D5 (greek phi symbol)
59
+
60
+ ["×", "x"], #U+00D7 (multiplication sign)
61
+ ["•", "*"], #U+2022 (bullet)
62
+ [" ", " "], #U+2009 (thin space)
63
+ [" ", " "], #U+200A (hair space)
64
+ [" ", " "], #U+00A0 (no-break space)
65
+ [" ", " "], #U+3000 (ideographic space)
66
+ ["−", "-"], #U+2212 (minus sign)
67
+ ["–", "-"], #U+2013 (en dash)
68
+ ["′", "'"], #U+2032 (prime)
69
+ ["‘", "'"], #U+2018 (left single quotation mark)
70
+ ["’", "'"], #U+2019 (right single quotation mark)
71
+ ["“", '"'], #U+201C (left double quotation mark)
72
+ ["”", '"'], #U+201D (right double quotation mark)
73
+ ['"', "''"]
74
+ ]
75
+ end
@@ -0,0 +1,223 @@
1
+ #!/usr/bin/env ruby
2
+ require 'diff-lcs'
3
+ require 'text_alignment/lcs_min'
4
+ require 'text_alignment/find_divisions'
5
+ require 'text_alignment/lcs_comparison'
6
+ require 'text_alignment/lcs_alignment'
7
+ require 'text_alignment/glcs_alignment'
8
+ require 'text_alignment/mappings'
9
+
10
+ module TextAlignment; end unless defined? TextAlignment
11
+
12
+ TextAlignment::SIGNATURE_NGRAM = 5 unless defined? TextAlignment::SIGNATURE_NGRAM
13
+ TextAlignment::NOMATCH_CHARS = "@^|#$%&_" unless defined? TextAlignment::NOMATCH_CHARS
14
+
15
+ class TextAlignment::TextAlignment
16
+ attr_reader :sdiff
17
+ attr_reader :position_map_begin, :position_map_end
18
+ attr_reader :common_elements, :mapped_elements
19
+ attr_reader :similarity
20
+ attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
21
+
22
+ def initialize(str1, str2, mappings = [])
23
+ raise ArgumentError, "nil string" if str1.nil? || str2.nil?
24
+ raise ArgumentError, "nil mappings" if mappings.nil?
25
+
26
+ ## preprocessing
27
+ str1 = str1.dup
28
+ str2 = str2.dup
29
+
30
+ ## find the first nomatch character
31
+ TextAlignment::NOMATCH_CHARS.each_char do |c|
32
+ if str2.index(c).nil?
33
+ @nomatch_char1 = c
34
+ break
35
+ end
36
+ end
37
+ raise RuntimeError, "Cannot find nomatch character" if @nomatch_char1.nil?
38
+
39
+ ## find the first nomatch character
40
+ TextAlignment::NOMATCH_CHARS.each_char do |c|
41
+ if c != @nomatch_char1 && str1.index(c).nil?
42
+ @nomatch_char2 = c
43
+ break
44
+ end
45
+ end
46
+ raise RuntimeError, "Cannot find nomatch character" if @nomatch_char2.nil?
47
+
48
+ # single character mappings
49
+ character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
50
+ characters_from = character_mappings.collect{|m| m[0]}.join
51
+ characters_to = character_mappings.collect{|m| m[1]}.join
52
+ characters_to.gsub!(/-/, '\-')
53
+
54
+ str1.tr!(characters_from, characters_to)
55
+ str2.tr!(characters_from, characters_to)
56
+
57
+ mappings.delete_if{|m| m[0].length == 1 && m[1].length == 1}
58
+
59
+ # ASCII foldings
60
+ # ascii_foldings = mappings.select{|m| m[0].length == 1 && m[1].length > 1}
61
+ # ascii_foldings.each do |f|
62
+ # from = f[1]
63
+
64
+ # if str2.index(f[0])
65
+ # to = f[0] + (@nomatch_char1 * (f[1].length - 1))
66
+ # str1.gsub!(from, to)
67
+ # end
68
+
69
+ # if str1.index(f[0])
70
+ # to = f[0] + (@nomatch_char2 * (f[1].length - 1))
71
+ # str2.gsub!(from, to)
72
+ # end
73
+ # end
74
+
75
+ # mappings.delete_if{|m| m[0].length == 1 && m[1].length > 1}
76
+
77
+ _compute_mixed_alignment(str1, str2, mappings)
78
+ end
79
+
80
+ def transform_a_span(span)
81
+ {:begin=>@position_map_begin[span[:begin]], :end=>@position_map_end[span[:end]]}
82
+ end
83
+
84
+ def transform_spans(spans)
85
+ spans.map{|span| transform_a_span(span)}
86
+ end
87
+
88
+ def transform_denotations!(denotations)
89
+ denotations.map!{|d| d.begin = @position_map_begin[d.begin]; d.end = @position_map_end[d.end]; d} unless denotations.nil?
90
+ end
91
+
92
+ def transform_hdenotations(hdenotations)
93
+ unless hdenotations.nil?
94
+ hdenotations_new = Array.new(hdenotations)
95
+ (0...hdenotations.length).each {|i| hdenotations_new[i][:span] = transform_a_span(hdenotations[i][:span])}
96
+ hdenotations_new
97
+ end
98
+ end
99
+
100
+ private
101
+
102
+ def _compute_mixed_alignment(str1, str2, mappings = [])
103
+ lcsmin = TextAlignment::LCSMin.new(str1, str2)
104
+ lcs = lcsmin.lcs
105
+ @sdiff = lcsmin.sdiff
106
+
107
+ cmp = TextAlignment::LCSComparison.new(str1, str2, lcs, @sdiff)
108
+ @similarity = cmp.similarity
109
+ @str1_match_initial = cmp.str1_match_initial
110
+ @str1_match_final = cmp.str1_match_final
111
+ @str2_match_initial = cmp.str2_match_initial
112
+ @str2_match_final = cmp.str2_match_final
113
+
114
+ posmap_begin, posmap_end = {}, {}
115
+ @common_elements, @mapped_elements = [], []
116
+
117
+ addition, deletion = [], []
118
+
119
+ @sdiff.each do |h|
120
+ case h.action
121
+ when '='
122
+ p1, p2 = h.old_position, h.new_position
123
+
124
+ @common_elements << [str1[p1], str2[p2]]
125
+ posmap_begin[p1], posmap_end[p1] = p2, p2
126
+
127
+ if !addition.empty? && deletion.empty?
128
+ posmap_end[p1] = p2 - addition.length unless p1 == 0
129
+ elsif addition.empty? && !deletion.empty?
130
+ deletion.each{|p| posmap_begin[p], posmap_end[p] = p2, p2}
131
+ elsif !addition.empty? && !deletion.empty?
132
+ if addition.length > 1 || deletion.length > 1
133
+ galign = TextAlignment::GLCSAlignment.new(str1[deletion[0] .. deletion[-1]], str2[addition[0] .. addition[-1]], mappings)
134
+ galign.position_map_begin.each {|k, v| posmap_begin[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
135
+ galign.position_map_end.each {|k, v| posmap_end[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
136
+ posmap_begin[p1], posmap_end[p1] = p2, p2
137
+ @common_elements += galign.common_elements
138
+ @mapped_elements += galign.mapped_elements
139
+ else
140
+ posmap_begin[deletion[0]], posmap_end[deletion[0]] = addition[0], addition[0]
141
+ deletion[1..-1].each{|p| posmap_begin[p], posmap_end[p] = nil, nil}
142
+ @mapped_elements << [str1[deletion[0], deletion.length], str2[addition[0], addition.length]]
143
+ end
144
+ end
145
+
146
+ addition.clear; deletion.clear
147
+
148
+ when '!'
149
+ deletion << h.old_position
150
+ addition << h.new_position
151
+ when '-'
152
+ deletion << h.old_position
153
+ when '+'
154
+ addition << h.new_position
155
+ end
156
+ end
157
+
158
+ p1, p2 = str1.length, str2.length
159
+ posmap_begin[p1], posmap_end[p1] = p2, p2
160
+
161
+ if !addition.empty? && deletion.empty?
162
+ posmap_end[p1] = p2 - addition.length unless p1 == 0
163
+ elsif addition.empty? && !deletion.empty?
164
+ deletion.each{|p| posmap_begin[p], posmap_end[p] = p2, p2}
165
+ elsif !addition.empty? && !deletion.empty?
166
+ if addition.length > 1 && deletion.length > 1
167
+ galign = TextAlignment::GLCSAlignment.new(str1[deletion[0] .. deletion[-1]], str2[addition[0] .. addition[-1]], mappings)
168
+ galign.position_map_begin.each {|k, v| posmap_begin[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
169
+ galign.position_map_end.each {|k, v| posmap_end[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
170
+ posmap_begin[p1], posmap_end[p1] = p2, p2
171
+ @common_elements += galign.common_elements
172
+ @mapped_elements += galign.mapped_elements
173
+ else
174
+ posmap_begin[deletion[0]], posmap_end[deletion[0]] = addition[0], addition[0]
175
+ deletion[1..-1].each{|p| posmap_begin[p], posmap_end[p] = nil, nil}
176
+ @mapped_elements << [str1[deletion[0], deletion.length], str2[addition[0], addition.length]]
177
+ end
178
+ end
179
+
180
+ @position_map_begin = posmap_begin.sort.to_h
181
+ @position_map_end = posmap_end.sort.to_h
182
+ end
183
+ end
184
+
185
+ if __FILE__ == $0
186
+ require 'json'
187
+ require 'text_alignment/lcs_cdiff'
188
+
189
+ str1 = "TI - Identification of a region which directs the monocytic activity of the\n colony-stimulating factor 1 (macrophage colony-stimulating factor) receptor\n promoter and binds PEBP2/CBF (AML1)."
190
+ str2 = "Identification of a region which directs the monocytic activity of the colony-stimulating factor 1 (macrophage colony-stimulating factor) receptor promoter and binds PEBP2/CBF (AML1).\nThe receptor for the macrophage colony-stimulating factor (or colony-stimulating factor 1 [CSF-1]) is expressed from different promoters in monocytic cells and placental trophoblasts. We have demonstrated that the monocyte-specific expression of the CSF-1 receptor is regulated at the level of transcription by a tissue-specific promoter whose activity is stimulated by the monocyte/B-cell-specific transcription factor PU.1 (D.-E. Zhang, C.J. Hetherington, H.-M. Chen, and D.G. Tenen, Mol. Cell. Biol. 14:373-381, 1994). Here we report that the tissue specificity of this promoter is also mediated by sequences in a region II (bp -88 to -59), which lies 10 bp upstream from the PU.1-binding site. When analyzed by DNase footprinting, region II was protected preferentially in monocytic cells. Electrophoretic mobility shift assays confirmed that region II interacts specifically with nuclear proteins from monocytic cells. Two gel shift complexes (Mono A and Mono B) were formed with separate sequence elements within this region. Competition and supershift experiments indicate that Mono B contains a member of the polyomavirus enhancer-binding protein 2/core-binding factor (PEBP2/CBF) family, which includes the AML1 gene product, while Mono A is a distinct complex preferentially expressed in monocytic cells. Promoter constructs with mutations in these sequence elements were no longer expressed specifically in monocytes. Furthermore, multimerized region II sequence elements enhanced the activity of a heterologous thymidine kinase promoter in monocytic cells but not other cell types tested. These results indicate that the monocyte/B-cell-specific transcription factor PU.1 and the Mono A and Mono B protein complexes act in concert to regulate monocyte-specific transcription of the CSF-1 receptor."
191
+
192
+ # anns1 = JSON.parse File.read(ARGV[0]), :symbolize_names => true
193
+ # anns2 = JSON.parse File.read(ARGV[1]), :symbolize_names => true
194
+
195
+ if ARGV.length == 2
196
+ # str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
197
+ # denotations = JSON.parse(File.read(ARGV[0]).strip, symbolize_names:true)[:denotations]
198
+ # str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
199
+ str1 = File.read(ARGV[0])
200
+ str2 = File.read(ARGV[1])
201
+ end
202
+
203
+ # dictionary = [["β", "beta"]]
204
+ # align = TextAlignment::TextAlignment.new(str1, str2)
205
+ align = TextAlignment::TextAlignment.new(str1, str2, TextAlignment::MAPPINGS)
206
+
207
+ # p align.common_elements
208
+ # puts "---------------"
209
+ # p align.mapped_elements
210
+
211
+ puts TextAlignment::sdiff2cdiff(align.sdiff)
212
+ # p align
213
+ # puts "-----"
214
+
215
+ # p denotations
216
+ # puts "-----"
217
+
218
+ # new_denotations = align.transform_hdenotations(denotations)
219
+
220
+ # p new_denotations
221
+ # puts "-----"
222
+
223
+ end