text_alignment 0.2.9 → 0.3.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,59 +3,57 @@ require 'diff-lcs'
3
3
 
4
4
  module TextAlignment; end unless defined? TextAlignment
5
5
 
6
- module TextAlignment
7
- NIL_CHARACTER = '_'
8
- end
6
+ TextAlignment::NIL_CHARACTER = '_' unless defined? TextAlignment::NIL_CHARACTER
9
7
 
10
8
  class << TextAlignment
11
9
 
12
- def cdiff(str1, str2)
13
- raise ArgumentError, "nil string" if str1.nil? || str2.nil?
14
- raise "a nil character appears in the input string" if str1.index(TextAlignment::NIL_CHARACTER) || str2.index(TextAlignment::NIL_CHARACTER)
15
- sdiff2cdiff(Diff::LCS.sdiff(str1, str2))
16
- end
17
-
18
- def sdiff2cdiff (sdiff)
19
- raise ArgumentError, "nil sdiff" if sdiff.nil?
20
-
21
- cdiff_str1, cdiff_str2 = '', ''
22
-
23
- sdiff.each do |h|
24
- case h.action
25
- when '='
26
- cdiff_str1 += h.old_element
27
- cdiff_str2 += h.new_element
28
- when '!'
29
- cdiff_str1 += h.old_element + TextAlignment::NIL_CHARACTER
30
- cdiff_str2 += TextAlignment::NIL_CHARACTER + h.new_element
31
- when '-'
32
- cdiff_str1 += h.old_element
33
- cdiff_str2 += TextAlignment::NIL_CHARACTER
34
- when '+'
35
- cdiff_str1 += TextAlignment::NIL_CHARACTER
36
- cdiff_str2 += h.new_element
37
- end
38
- end
39
-
40
- cdiff_str1.gsub(/\n/, ' ') + "\n>>>>><<<<<\n" + cdiff_str2.gsub(/\n/, ' ')
41
- end
10
+ def cdiff(str1, str2)
11
+ raise ArgumentError, "nil string" if str1.nil? || str2.nil?
12
+ raise "a nil character appears in the input string" if str1.index(TextAlignment::NIL_CHARACTER) || str2.index(TextAlignment::NIL_CHARACTER)
13
+ sdiff2cdiff(Diff::LCS.sdiff(str1, str2))
14
+ end
15
+
16
+ def sdiff2cdiff (sdiff)
17
+ raise ArgumentError, "nil sdiff" if sdiff.nil?
18
+
19
+ cdiff_str1, cdiff_str2 = '', ''
20
+
21
+ sdiff.each do |h|
22
+ case h.action
23
+ when '='
24
+ cdiff_str1 += h.old_element
25
+ cdiff_str2 += h.new_element
26
+ when '!'
27
+ cdiff_str1 += h.old_element + TextAlignment::NIL_CHARACTER
28
+ cdiff_str2 += TextAlignment::NIL_CHARACTER + h.new_element
29
+ when '-'
30
+ cdiff_str1 += h.old_element
31
+ cdiff_str2 += TextAlignment::NIL_CHARACTER
32
+ when '+'
33
+ cdiff_str1 += TextAlignment::NIL_CHARACTER
34
+ cdiff_str2 += h.new_element
35
+ end
36
+ end
37
+
38
+ cdiff_str1.gsub(/\n/, ' ') + "\n>>>>><<<<<\n" + cdiff_str2.gsub(/\n/, ' ')
39
+ end
42
40
 
43
41
  end
44
42
 
45
43
  if __FILE__ == $0
46
- require 'json'
47
- str1 = 'abcde'
48
- str2 = 'naxbyzabcdexydzem'
49
-
50
- if ARGV.length == 2
51
- str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
52
- str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
53
- end
54
-
55
- puts "string 1: #{str1}"
56
- puts "-----"
57
- puts "string 2: #{str2}"
58
- puts "-----"
59
- puts "[cdiff]"
60
- puts TextAlignment::cdiff(str1, str2)
44
+ require 'json'
45
+ str1 = 'abcde'
46
+ str2 = 'naxbyzabcdexydzem'
47
+
48
+ if ARGV.length == 2
49
+ str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
50
+ str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
51
+ end
52
+
53
+ puts "string 1: #{str1}"
54
+ puts "-----"
55
+ puts "string 2: #{str2}"
56
+ puts "-----"
57
+ puts "[cdiff]"
58
+ puts TextAlignment::cdiff(str1, str2)
61
59
  end
@@ -4,60 +4,60 @@ require 'text_alignment/lcs_min'
4
4
  module TextAlignment; end unless defined? TextAlignment
5
5
 
6
6
  class TextAlignment::LCSComparison
7
- # The similarity ratio of the given two strings after stripping unmatched prefixes and suffixes
8
- attr_reader :similarity
9
-
10
- # The initial and final matching positions of str1 and str2
11
- attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
12
-
13
- def initialize(str1, str2, lcs = nil, sdiff = nil)
14
- raise ArgumentError, "nil string" if str1 == nil || str2 == nil
15
- @str1, @str2 = str1, str2
16
- _lcs_comparison(str1, str2, lcs, sdiff)
17
- end
18
-
19
- private
20
-
21
- def _lcs_comparison(str1, str2, lcs = nil, sdiff = nil)
22
- if lcs.nil?
23
- lcsmin = TextAlignment::LCSMin.new(str1, str2)
24
- lcs = lcsmin.lcs
25
- sdiff = lcsmin.sdiff
26
- end
27
-
28
- if lcs > 0
29
- match_initial = sdiff.index{|d| d.action == '='}
30
- match_final = sdiff.rindex{|d| d.action == '='}
31
-
32
- @str1_match_initial = sdiff[match_initial].old_position
33
- @str2_match_initial = sdiff[match_initial].new_position
34
- @str1_match_final = sdiff[match_final].old_position
35
- @str2_match_final = sdiff[match_final].new_position
36
- @similarity = 2 * lcs / ((@str1_match_final - @str1_match_initial + 1) + (@str2_match_final - @str2_match_initial + 1)).to_f
37
- else
38
- @str1_match_initial = 0
39
- @str2_match_initial = 0
40
- @str1_match_final = 0
41
- @str2_match_final = 0
42
- @similarity = 0
43
- end
44
- end
7
+ # The similarity ratio of the given two strings after stripping unmatched prefixes and suffixes
8
+ attr_reader :similarity
9
+
10
+ # The initial and final matching positions of str1 and str2
11
+ attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
12
+
13
+ def initialize(str1, str2, lcs = nil, sdiff = nil)
14
+ raise ArgumentError, "nil string" if str1 == nil || str2 == nil
15
+ @str1, @str2 = str1, str2
16
+ _lcs_comparison(str1, str2, lcs, sdiff)
17
+ end
18
+
19
+ private
20
+
21
+ def _lcs_comparison(str1, str2, lcs = nil, sdiff = nil)
22
+ if lcs.nil?
23
+ lcsmin = TextAlignment::LCSMin.new(str1, str2)
24
+ lcs = lcsmin.lcs
25
+ sdiff = lcsmin.sdiff
26
+ end
27
+
28
+ if lcs > 0
29
+ match_initial = sdiff.index{|d| d.action == '='}
30
+ match_final = sdiff.rindex{|d| d.action == '='}
31
+
32
+ @str1_match_initial = sdiff[match_initial].old_position
33
+ @str2_match_initial = sdiff[match_initial].new_position
34
+ @str1_match_final = sdiff[match_final].old_position
35
+ @str2_match_final = sdiff[match_final].new_position
36
+ @similarity = 2 * lcs / ((@str1_match_final - @str1_match_initial + 1) + (@str2_match_final - @str2_match_initial + 1)).to_f
37
+ else
38
+ @str1_match_initial = 0
39
+ @str2_match_initial = 0
40
+ @str1_match_final = 0
41
+ @str2_match_final = 0
42
+ @similarity = 0
43
+ end
44
+ end
45
45
  end
46
46
 
47
47
  if __FILE__ == $0
48
- require 'json'
49
- str1 = 'naxbyzabcdexydzem'
50
- str2 = 'abcde'
51
- if ARGV.length == 2
52
- str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
53
- str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
54
- end
55
- comparison = TextAlignment::LCSComparison.new(str1, str2)
56
- puts "Similarity: #{comparison.similarity}"
57
- puts "String 1 match: (#{comparison.str1_match_initial}, #{comparison.str1_match_final})"
58
- puts "String 2 match: (#{comparison.str2_match_initial}, #{comparison.str2_match_final})"
59
- puts "-----"
60
- puts '[' + str1[comparison.str1_match_initial .. comparison.str1_match_final] + ']'
61
- puts "-----"
62
- puts '[' + str2[comparison.str2_match_initial .. comparison.str2_match_final] + ']'
48
+ require 'json'
49
+ str1 = 'naxbyzabcdexydzem'
50
+ str2 = 'abcde'
51
+ if ARGV.length == 2
52
+ str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
53
+ str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
54
+ end
55
+ comparison = TextAlignment::LCSComparison.new(str1, str2)
56
+ puts "Similarity: #{comparison.similarity}"
57
+ puts "String 1 match: (#{comparison.str1_match_initial}, #{comparison.str1_match_final})"
58
+ puts "String 2 match: (#{comparison.str2_match_initial}, #{comparison.str2_match_final})"
59
+ puts "-----"
60
+ puts '[' + str1[comparison.str1_match_initial .. comparison.str1_match_final] + ']'
61
+ puts "-----"
62
+ puts '[' + str2[comparison.str2_match_initial .. comparison.str2_match_final] + ']'
63
63
  end
@@ -5,156 +5,162 @@ module TextAlignment; end unless defined? TextAlignment
5
5
 
6
6
  # change the class definition of ContextChange to allow update of the two instance variables
7
7
  class Diff::LCS::ContextChange
8
- attr_accessor :old_position, :new_position
8
+ attr_accessor :old_position, :new_position
9
9
  end
10
10
 
11
11
  # It finds minimal lcs and sdiff of the given strings, str1 and str2.
12
12
  # It relies on the diff-lcs gem for the computation of lcs table.
13
13
  class TextAlignment::LCSMin
14
- attr_reader :sdiff, :lcs, :m1_initial, :m1_final, :m2_initial, :m2_final
15
-
16
- PLACEHOLDER_CHAR = '_'
17
-
18
- def initialize (str1, str2)
19
- raise ArgumentError, "nil string" if str1.nil? || str2.nil?
20
- raise ArgumentError, "empty string" if str1.empty? || str2.empty?
21
-
22
- # str1 is copied as it is.
23
- # str2 is copied with w/s characters replaced with the placeholder characters,
24
- # to avoid overfitting to w/s characters during LCS computation.
25
- @str1 = str1
26
- @str2 = str2.gsub(/\s/, PLACEHOLDER_CHAR)
27
-
28
- # find the corresponding minimal range of the two strings
29
- r = _find_min_range(0, @str1.length - 1, 0, @str2.length - 1)
30
- @m1_initial, @m1_final, @m2_initial, @m2_final = r[:m1_initial], r[:m1_final], r[:m2_initial], r[:m2_final]
31
-
32
- if @m1_initial.nil?
33
- @sdiff = nil
34
- @lcs = 0
35
- else
36
- # compute sdiff and lcs
37
- # here the original str2 is used with all the w/s characters preserved.
38
- @sdiff = Diff::LCS.sdiff(@str1[@m1_initial..@m1_final], str2[@m2_initial..@m2_final])
39
- @lcs = @sdiff.count{|d| d.action == '='}
40
-
41
- # adjust the position values of sdiff
42
- @sdiff.each do |h|
43
- h.old_position += @m1_initial unless h.old_position.nil?
44
- h.new_position += @m2_initial unless h.new_position.nil?
45
- end
46
-
47
- (0 ... @m2_initial).reverse_each{|i| @sdiff.unshift(Diff::LCS::ContextChange.new('+', nil, nil, i, @str2[i]))}
48
- (0 ... @m1_initial).reverse_each{|i| @sdiff.unshift(Diff::LCS::ContextChange.new('-', i, @str1[i], nil, nil))}
49
- (@m1_final + 1 ... @str1.length).each{|i| @sdiff.push(Diff::LCS::ContextChange.new('-', i, @str1[i], nil, nil))}
50
- (@m2_final + 1 ... @str2.length).each{|i| @sdiff.push(Diff::LCS::ContextChange.new('+', nil, nil, i, @str2[i]))}
51
- end
52
- end
53
-
54
- def _find_min_range (m1_initial, m1_final, m2_initial, m2_final, clcs = 0)
55
- return nil if (m1_final - m1_initial < 0) || (m2_final - m2_initial < 0)
56
- sdiff = Diff::LCS.sdiff(@str1[m1_initial..m1_final], @str2[m2_initial..m2_final])
57
- lcs = sdiff.count{|d| d.action == '='}
58
-
59
- return nil if lcs == 0
60
- return nil if lcs < clcs
61
-
62
- match_last = sdiff.rindex{|d| d.action == '='}
63
- m1_final = sdiff[match_last].old_position + m1_initial
64
- m2_final = sdiff[match_last].new_position + m2_initial
65
-
66
- match_first = sdiff.index{|d| d.action == '='}
67
- m1_initial = sdiff[match_first].old_position + m1_initial
68
- m2_initial = sdiff[match_first].new_position + m2_initial
69
-
70
- # attempt for shorter match
71
- if ((m1_final - m1_initial) > (m2_final - m2_initial))
72
- r = _find_min_range(m1_initial + 1, m1_final, m2_initial, m2_final, lcs)
73
- return r unless r.nil?
74
- r = _find_min_range(m1_initial, m1_final - 1, m2_initial, m2_final, lcs)
75
- return r unless r.nil?
76
- else
77
- r = _find_min_range(m1_initial, m1_final, m2_initial + 1, m2_final, lcs)
78
- return r unless r.nil?
79
- r = _find_min_range(m1_initial, m1_final, m2_initial, m2_final - 1, lcs)
80
- return r unless r.nil?
81
- end
82
-
83
- return {
84
- m1_initial: m1_initial,
85
- m1_final: m1_final,
86
- m2_initial: m2_initial,
87
- m2_final: m2_final
88
- }
89
- end
90
-
91
- def num_big_gaps (sdiff, initial, last)
92
- raise ArgumentError, "nil sdiff" if sdiff.nil?
93
- raise ArgumentError, "invalid indice: #{initial}, #{last}" unless last >= initial
94
-
95
- state1 = :initial
96
- state2 = :initial
97
- gaps1 = []
98
- gaps2 = []
99
-
100
- (initial .. last).each do |i|
101
- case sdiff[i].action
102
- when '='
103
- state1 = :continue
104
- state2 = :continue
105
- when '!'
106
- gaps1 << 1
107
- state1 = :break
108
-
109
- if state2 == :break
110
- gaps2[-1] += 1
111
- else
112
- gaps2 << 1
113
- end
114
- state2 = :continue
115
- when '+'
116
- if state1 == :break
117
- gaps1[-1] += 1
118
- else
119
- gaps1 << 1
120
- end
121
- state1 = :break
122
- when '-'
123
- if state2 == :break
124
- gaps2[-1] += 1
125
- else
126
- gaps2 << 1
127
- end
128
- state2 = :break
129
- end
130
- end
131
-
132
- num_big_gaps1 = gaps1.select{|g| g > MAX_LEN_BIG_GAP}.length
133
- num_big_gaps2 = gaps2.select{|g| g > MAX_LEN_BIG_GAP}.length
134
- num_big_gaps1 + num_big_gaps2
135
- end
14
+ attr_reader :sdiff, :lcs, :m1_initial, :m1_final, :m2_initial, :m2_final
15
+
16
+ PLACEHOLDER_CHAR = '_'
17
+
18
+ def initialize (str1, str2)
19
+ raise ArgumentError, "nil string" if str1.nil? || str2.nil?
20
+ raise ArgumentError, "empty string" if str1.empty? || str2.empty?
21
+
22
+ # str1 is copied as it is.
23
+ # str2 is copied with w/s characters replaced with the placeholder characters,
24
+ # to avoid overfitting to w/s characters during LCS computation.
25
+ @str1 = str1
26
+ @str2 = str2.gsub(/\s/, PLACEHOLDER_CHAR)
27
+
28
+ # find the corresponding minimal range of the two strings
29
+ r = _find_min_range(0, @str1.length - 1, 0, @str2.length - 1)
30
+ if r.nil?
31
+ @sdiff = nil
32
+ @lcs = 0
33
+ return
34
+ end
35
+
36
+ @m1_initial, @m1_final, @m2_initial, @m2_final = r[:m1_initial], r[:m1_final], r[:m2_initial], r[:m2_final]
37
+
38
+ if @m1_initial.nil?
39
+ @sdiff = nil
40
+ @lcs = 0
41
+ else
42
+ # compute sdiff and lcs
43
+ # here the original str2 is used with all the w/s characters preserved.
44
+ @sdiff = Diff::LCS.sdiff(@str1[@m1_initial..@m1_final], str2[@m2_initial..@m2_final])
45
+ @lcs = @sdiff.count{|d| d.action == '='}
46
+
47
+ # adjust the position values of sdiff
48
+ @sdiff.each do |h|
49
+ h.old_position += @m1_initial unless h.old_position.nil?
50
+ h.new_position += @m2_initial unless h.new_position.nil?
51
+ end
52
+
53
+ (0 ... @m2_initial).reverse_each{|i| @sdiff.unshift(Diff::LCS::ContextChange.new('+', nil, nil, i, @str2[i]))}
54
+ (0 ... @m1_initial).reverse_each{|i| @sdiff.unshift(Diff::LCS::ContextChange.new('-', i, @str1[i], nil, nil))}
55
+ (@m1_final + 1 ... @str1.length).each{|i| @sdiff.push(Diff::LCS::ContextChange.new('-', i, @str1[i], nil, nil))}
56
+ (@m2_final + 1 ... @str2.length).each{|i| @sdiff.push(Diff::LCS::ContextChange.new('+', nil, nil, i, @str2[i]))}
57
+ end
58
+ end
59
+
60
+ def _find_min_range (m1_initial, m1_final, m2_initial, m2_final, clcs = 0)
61
+ return nil if (m1_final - m1_initial < 0) || (m2_final - m2_initial < 0)
62
+ sdiff = Diff::LCS.sdiff(@str1[m1_initial..m1_final], @str2[m2_initial..m2_final])
63
+ lcs = sdiff.count{|d| d.action == '='}
64
+
65
+ return nil if lcs == 0
66
+ return nil if lcs < clcs
67
+
68
+ match_last = sdiff.rindex{|d| d.action == '='}
69
+ m1_final = sdiff[match_last].old_position + m1_initial
70
+ m2_final = sdiff[match_last].new_position + m2_initial
71
+
72
+ match_first = sdiff.index{|d| d.action == '='}
73
+ m1_initial = sdiff[match_first].old_position + m1_initial
74
+ m2_initial = sdiff[match_first].new_position + m2_initial
75
+
76
+ # attempt for shorter match
77
+ if ((m1_final - m1_initial) > (m2_final - m2_initial))
78
+ r = _find_min_range(m1_initial + 1, m1_final, m2_initial, m2_final, lcs)
79
+ return r unless r.nil?
80
+ r = _find_min_range(m1_initial, m1_final - 1, m2_initial, m2_final, lcs)
81
+ return r unless r.nil?
82
+ else
83
+ r = _find_min_range(m1_initial, m1_final, m2_initial + 1, m2_final, lcs)
84
+ return r unless r.nil?
85
+ r = _find_min_range(m1_initial, m1_final, m2_initial, m2_final - 1, lcs)
86
+ return r unless r.nil?
87
+ end
88
+
89
+ return {
90
+ m1_initial: m1_initial,
91
+ m1_final: m1_final,
92
+ m2_initial: m2_initial,
93
+ m2_final: m2_final
94
+ }
95
+ end
96
+
97
+ def num_big_gaps (sdiff, initial, last)
98
+ raise ArgumentError, "nil sdiff" if sdiff.nil?
99
+ raise ArgumentError, "invalid indice: #{initial}, #{last}" unless last >= initial
100
+
101
+ state1 = :initial
102
+ state2 = :initial
103
+ gaps1 = []
104
+ gaps2 = []
105
+
106
+ (initial .. last).each do |i|
107
+ case sdiff[i].action
108
+ when '='
109
+ state1 = :continue
110
+ state2 = :continue
111
+ when '!'
112
+ gaps1 << 1
113
+ state1 = :break
114
+
115
+ if state2 == :break
116
+ gaps2[-1] += 1
117
+ else
118
+ gaps2 << 1
119
+ end
120
+ state2 = :continue
121
+ when '+'
122
+ if state1 == :break
123
+ gaps1[-1] += 1
124
+ else
125
+ gaps1 << 1
126
+ end
127
+ state1 = :break
128
+ when '-'
129
+ if state2 == :break
130
+ gaps2[-1] += 1
131
+ else
132
+ gaps2 << 1
133
+ end
134
+ state2 = :break
135
+ end
136
+ end
137
+
138
+ num_big_gaps1 = gaps1.select{|g| g > MAX_LEN_BIG_GAP}.length
139
+ num_big_gaps2 = gaps2.select{|g| g > MAX_LEN_BIG_GAP}.length
140
+ num_big_gaps1 + num_big_gaps2
141
+ end
136
142
 
137
143
  end
138
144
 
139
145
 
140
146
  if __FILE__ == $0
141
- require 'json'
142
- require 'text_alignment/lcs_cdiff'
147
+ require 'json'
148
+ require 'text_alignment/lcs_cdiff'
143
149
 
144
- str2 = 'abcde'
145
- str1 = 'naxbyzabcdexydzem'
150
+ str2 = 'abcde'
151
+ str1 = 'naxbyzabcdexydzem'
146
152
 
147
- str1 = "TI - Identification of a region which directs the monocytic activity of the\n colony-stimulating factor 1 (macrophage colony-stimulating factor) receptor\n promoter and binds PEBP2/CBF (AML1)."
148
- str2 = "Identification of a region which directs the monocytic activity of the colony-stimulating factor 1 (macrophage colony-stimulating factor) receptor promoter and binds PEBP2/CBF (AML1).\nThe receptor for the macrophage colony-stimulating factor (or colony-stimulating factor 1 [CSF-1]) is expressed from different promoters in monocytic cells and placental trophoblasts. We have demonstrated that the monocyte-specific expression of the CSF-1 receptor is regulated at the level of transcription by a tissue-specific promoter whose activity is stimulated by the monocyte/B-cell-specific transcription factor PU.1 (D.-E. Zhang, C.J. Hetherington, H.-M. Chen, and D.G. Tenen, Mol. Cell. Biol. 14:373-381, 1994). Here we report that the tissue specificity of this promoter is also mediated by sequences in a region II (bp -88 to -59), which lies 10 bp upstream from the PU.1-binding site. When analyzed by DNase footprinting, region II was protected preferentially in monocytic cells. Electrophoretic mobility shift assays confirmed that region II interacts specifically with nuclear proteins from monocytic cells. Two gel shift complexes (Mono A and Mono B) were formed with separate sequence elements within this region. Competition and supershift experiments indicate that Mono B contains a member of the polyomavirus enhancer-binding protein 2/core-binding factor (PEBP2/CBF) family, which includes the AML1 gene product, while Mono A is a distinct complex preferentially expressed in monocytic cells. Promoter constructs with mutations in these sequence elements were no longer expressed specifically in monocytes. Furthermore, multimerized region II sequence elements enhanced the activity of a heterologous thymidine kinase promoter in monocytic cells but not other cell types tested. These results indicate that the monocyte/B-cell-specific transcription factor PU.1 and the Mono A and Mono B protein complexes act in concert to regulate monocyte-specific transcription of the CSF-1 receptor."
149
- # str2 = "Identification of a region which directs the monocytic activity of the colony-stimulating factor 1 (macrophage colony-stimulating factor) receptor promoter and binds PEBP2/CBF (AML1).\nThe receptor for the macrophage colony-stimulating factor (or colony-stimulating factor 1 [CSF-1]) is expressed from different promoters in monocytic cells and placental trophoblasts."
153
+ str1 = "TI - Identification of a region which directs the monocytic activity of the\n colony-stimulating factor 1 (macrophage colony-stimulating factor) receptor\n promoter and binds PEBP2/CBF (AML1)."
154
+ str2 = "Identification of a region which directs the monocytic activity of the colony-stimulating factor 1 (macrophage colony-stimulating factor) receptor promoter and binds PEBP2/CBF (AML1).\nThe receptor for the macrophage colony-stimulating factor (or colony-stimulating factor 1 [CSF-1]) is expressed from different promoters in monocytic cells and placental trophoblasts. We have demonstrated that the monocyte-specific expression of the CSF-1 receptor is regulated at the level of transcription by a tissue-specific promoter whose activity is stimulated by the monocyte/B-cell-specific transcription factor PU.1 (D.-E. Zhang, C.J. Hetherington, H.-M. Chen, and D.G. Tenen, Mol. Cell. Biol. 14:373-381, 1994). Here we report that the tissue specificity of this promoter is also mediated by sequences in a region II (bp -88 to -59), which lies 10 bp upstream from the PU.1-binding site. When analyzed by DNase footprinting, region II was protected preferentially in monocytic cells. Electrophoretic mobility shift assays confirmed that region II interacts specifically with nuclear proteins from monocytic cells. Two gel shift complexes (Mono A and Mono B) were formed with separate sequence elements within this region. Competition and supershift experiments indicate that Mono B contains a member of the polyomavirus enhancer-binding protein 2/core-binding factor (PEBP2/CBF) family, which includes the AML1 gene product, while Mono A is a distinct complex preferentially expressed in monocytic cells. Promoter constructs with mutations in these sequence elements were no longer expressed specifically in monocytes. Furthermore, multimerized region II sequence elements enhanced the activity of a heterologous thymidine kinase promoter in monocytic cells but not other cell types tested. These results indicate that the monocyte/B-cell-specific transcription factor PU.1 and the Mono A and Mono B protein complexes act in concert to regulate monocyte-specific transcription of the CSF-1 receptor."
155
+ # str2 = "Identification of a region which directs the monocytic activity of the colony-stimulating factor 1 (macrophage colony-stimulating factor) receptor promoter and binds PEBP2/CBF (AML1).\nThe receptor for the macrophage colony-stimulating factor (or colony-stimulating factor 1 [CSF-1]) is expressed from different promoters in monocytic cells and placental trophoblasts."
150
156
 
151
- if ARGV.length == 2
152
- str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
153
- str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
154
- end
157
+ if ARGV.length == 2
158
+ str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
159
+ str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
160
+ end
155
161
 
156
- lcsmin = TextAlignment::LCSMin.new(str1, str2)
157
- # puts lcs
158
- # sdiff.each {|h| p h}
159
- puts TextAlignment.sdiff2cdiff(lcsmin.sdiff)
162
+ lcsmin = TextAlignment::LCSMin.new(str1, str2)
163
+ # puts lcs
164
+ # sdiff.each {|h| p h}
165
+ puts TextAlignment.sdiff2cdiff(lcsmin.sdiff)
160
166
  end