text_alignment 0.9.1 → 0.10.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bc60f2422e09058c8abc037a5f4c7e28a2c26c4b0defa3e157a478f6c691e85e
4
- data.tar.gz: 3732d51c46d0597cec005396c13e5aa7c84c766232f5de0c5b90e789a2fa77f1
3
+ metadata.gz: 710fa3dfe07e268c62de77c67abb52e1893a36a9c0fbdfbf7400328ab7f8358a
4
+ data.tar.gz: 31a086666978c4776a60b0ce19fde48beeb8a5bce599b7572f20089cfb5c7500
5
5
  SHA512:
6
- metadata.gz: '0095e5682996e5ccb6d6cc7529c40901656f169670e49d26331acad139964b528a6b3ae9c48f32844fbe2a8737f0ab66fdc4f4da51dc37808bed65e7a7447f37'
7
- data.tar.gz: b8e00566dbcba94fbfd1d84bd7d10ac6ba7677124aa8a0676797223d4969e76917ea21013cb509762a46d14324eb28e38b1d6ad7dc26cd0fcb2a30af573e6612
6
+ metadata.gz: d0a752d2203f65e48b4f10ff29eb29a77504e7cef4da1dd3602e2ea5107a99b7edff4c3609c79c7dcba0f9cc458780dc4788ff8123a781a4d77ae89236170bf5
7
+ data.tar.gz: '094ff5f662ccdc2fc14f81549126362be49fd64610f994c6ba5c4457fbbbececc1931257589ebde49211c2bce744ca033bfe3673c8af5c8a01af7679bfb626cc'
@@ -26,8 +26,9 @@ def read_text(filename)
26
26
  end
27
27
  end
28
28
 
29
- def align_denotations(denotations, source_text, target_text, debug = false)
30
- alignment = TextAlignment::TextAlignment.new(source_text, target_text, denotations)
29
+ def align_denotations(denotations, source_text, target_text, debug = false, cm = nil)
30
+ alignment = TextAlignment::TextAlignment.new(source_text, target_text, denotations, cm)
31
+ cm = alignment.cultivation_map
31
32
  new_denotations = alignment.transform_hdenotations(denotations)
32
33
 
33
34
  if debug
@@ -47,7 +48,7 @@ def align_denotations(denotations, source_text, target_text, debug = false)
47
48
  warn
48
49
 
49
50
  # return target annotations
50
- new_denotations
51
+ [new_denotations, cm]
51
52
  end
52
53
 
53
54
  def align_mannotations(source_annotations, target_text, debug = false)
@@ -58,11 +59,13 @@ def align_mannotations(source_annotations, target_text, debug = false)
58
59
  idnum_attributes = 0
59
60
  idnum_modifications = 0
60
61
 
62
+ cm = nil
61
63
  source_annotations.each_with_index do |annotations, i|
62
64
  if annotations.has_key?(:denotations) && !annotations[:denotations].empty?
63
65
  ididx = {}
64
66
  warn "[#{i}]-=-=-=-=-"
65
- denotations = align_denotations(annotations[:denotations], annotations[:text], target_text, debug)
67
+ denotations, cm = align_denotations(annotations[:denotations], annotations[:text], target_text, debug, cm)
68
+
66
69
  denotations.each do |d|
67
70
  reid = 'T' + (idnum_denotations += 1).to_s
68
71
  ididx[d[:id]] = reid
@@ -114,8 +117,9 @@ target_text = read_text(ARGV[1])
114
117
  target_annotations = if source_annotations.class == Array
115
118
  align_mannotations(source_annotations, target_text, false)
116
119
  else
117
- denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], target_text, false)
120
+ denotations, cm = align_denotations(source_annotations[:denotations], source_annotations[:text], target_text, false)
121
+ # denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], target_text, true)
118
122
  source_annotations.merge({text:target_text, denotations:denotations})
119
123
  end
120
124
 
121
- # puts target_annotations.to_json
125
+ puts target_annotations.to_json
@@ -6,76 +6,65 @@ module TextAlignment; end unless defined? TextAlignment
6
6
 
7
7
  class TextAlignment::AnchorFinder
8
8
 
9
- def initialize(source_str, target_str, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
10
- @size_ngram = _size_ngram || TextAlignment::SIZE_NGRAM
11
- @size_window = _size_window || TextAlignment::SIZE_WINDOW
12
- @sim_threshold = _text_similiarity_threshold || TextAlignment::TEXT_SIMILARITY_THRESHOLD
13
-
14
- @reverse = (target_str.length < source_str.length)
15
-
16
- @s1, @s2 = if @reverse
9
+ def initialize(source_str, target_str, cultivation_map)
10
+ @s1, @s2 = if reverse?(source_str, target_str)
17
11
  [target_str.downcase, source_str.downcase]
18
12
  else
19
13
  [source_str.downcase, target_str.downcase]
20
14
  end
21
15
 
22
- # current position in s1
23
- @beg_s1 = 0
24
- @end_s1_prev = 0
25
- @end_s2_prev = 0
26
- end
27
-
28
- def get_next_anchor
29
- # find the position of an anchor ngram in s1 and s2
30
- while @beg_s1 < (@s1.length - @size_ngram)
31
- if [' ', "\n", "\t"].include? @s1[@beg_s1]
32
- @beg_s1 += 1
33
- next
34
- end
35
- anchor = @s1[@beg_s1, @size_ngram]
16
+ @cultivation_map = cultivation_map
36
17
 
37
- # search_position = 0
38
- search_position = @end_s2_prev
39
- while @beg_s2 = @s2.index(anchor, search_position)
40
- # if both the begining points are sufficiantly close to the end points of the last match
41
- break if @beg_s1 > 0 && @beg_s2 > 0 && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
18
+ @size_ngram = TextAlignment::SIZE_NGRAM
19
+ @size_window = TextAlignment::SIZE_WINDOW
20
+ @sim_threshold = TextAlignment::TEXT_SIMILARITY_THRESHOLD
21
+ @pos_s1_final_possible_begin = @s1.length - @size_ngram - 1
42
22
 
43
- left_window_s1, left_window_s2 = get_left_windows
44
- break if left_window_s1 && (text_similarity(left_window_s1, left_window_s2) > @sim_threshold)
23
+ # positions of last match
24
+ @pos_s1_last_match = 0
25
+ @pos_s2_last_match = 0
26
+ end
45
27
 
46
- right_window_s1, right_window_s2 = get_right_windows
47
- break if right_window_s2 && (text_similarity(right_window_s1, right_window_s2) > @sim_threshold)
28
+ def reverse?(source_str = nil, target_str = nil)
29
+ unless source_str.nil?
30
+ @reverse_p = target_str.length < source_str.length
31
+ end
32
+ @reverse_p
33
+ end
48
34
 
49
- search_position = @beg_s2 + 1
50
- end
35
+ def get_next_anchor
36
+ # To find the beginning positions of an anchor ngram in s1 and s2, beginning from the last positions matched
37
+ beg_s2 = for beg_s1 in @pos_s1_last_match .. @pos_s1_final_possible_begin
51
38
 
52
- break unless @beg_s2.nil?
39
+ # To skip whitespace letters
40
+ next if [' ', "\n", "\t"].include? @s1[beg_s1]
53
41
 
54
- @beg_s1 += 1
42
+ _beg_s2 = get_beg_s2(beg_s1)
43
+ break _beg_s2 unless _beg_s2.nil?
55
44
  end
56
45
 
57
- return nil if @beg_s1 >= (@s1.length - @size_ngram)
46
+ # To return nil when it fails to find an anchor
47
+ return nil if beg_s2.class == Range
58
48
 
59
- # extend the block
60
- b1 = @beg_s1
61
- b2 = @beg_s2
62
- while b1 >= @end_s1_prev && b2 >= @end_s2_prev && @s1[b1] == @s2[b2]
49
+ # To extend the block to the left
50
+ b1 = beg_s1
51
+ b2 = beg_s2
52
+ while b1 >= @pos_s1_last_match && b2 >= @pos_s2_last_match && @s1[b1] == @s2[b2]
63
53
  b1 -= 1; b2 -= 1
64
54
  end
65
-
66
55
  b1 += 1; b2 += 1
67
56
 
68
- e1 = @beg_s1 + @size_ngram
69
- e2 = @beg_s2 + @size_ngram
57
+ # To extend the block to the right
58
+ e1 = beg_s1 + @size_ngram
59
+ e2 = beg_s2 + @size_ngram
70
60
  while @s1[e1] && @s1[e1] == @s2[e2]
71
61
  e1 += 1; e2 += 1
72
62
  end
73
63
 
74
- @end_s1_prev = e1
75
- @end_s2_prev = e2
76
- @beg_s1 = e1
64
+ @pos_s1_last_match = e1
65
+ @pos_s2_last_match = e2
77
66
 
78
- if @reverse
67
+ if reverse?
79
68
  {source:{begin:b2 , end:e2}, target:{begin:b1, end:e1}}
80
69
  else
81
70
  {source:{begin:b1 , end:e1}, target:{begin:b2, end:e2}}
@@ -84,14 +73,92 @@ class TextAlignment::AnchorFinder
84
73
 
85
74
  private
86
75
 
87
- def get_left_windows
88
- # commend below with the assumption that the beginning of a document gives a significant locational information
89
- # return if @beg_s1 < @size_window || @beg_s2 < @size_window
76
+ def get_beg_s2(beg_s1)
77
+ # to get the anchor to search for in s2
78
+ anchor = @s1[beg_s1, @size_ngram]
79
+
80
+ # comment out below with the assumption that texts are in the same order
81
+ # search_position = 0
82
+ search_position = @pos_s2_last_match
83
+
84
+ beg_s2_candidates = find_beg_s2_candidates(anchor, search_position)
85
+ return nil if beg_s2_candidates.empty?
86
+
87
+ find_valid_beg_s2(beg_s1, beg_s2_candidates)
88
+ end
89
+
90
+ # To find beg_s2 which match to the anchor
91
+ # return nil if the anchor is too much frequent
92
+ def find_beg_s2_candidates(anchor, search_position)
93
+ candidates = []
94
+ while _beg_s2 = @s2.index(anchor, search_position)
95
+ search_again_position = @cultivation_map.search_again_position(_beg_s2)
96
+ unless search_again_position.nil?
97
+ search_position = search_again_position
98
+ next
99
+ end
100
+
101
+ candidates << _beg_s2
102
+
103
+ # for speed, skip anchor of high frequency
104
+ if candidates.length > 5
105
+ candidates.clear
106
+ break
107
+ end
108
+
109
+ search_position = _beg_s2 + 1
110
+ end
111
+ candidates
112
+ end
113
+
114
+ def find_valid_beg_s2(beg_s1, beg_s2_candidates)
115
+ valid_beg_s2 = nil
116
+
117
+ (10 .. 30).step(10).each do |size_window|
118
+ valid_beg_s2 = nil
119
+
120
+ r = beg_s2_candidates.each do |beg_s2|
121
+ # if both the begining points are sufficiantly close to the end points of the last match
122
+ # break if beg_s1 > 0 && beg_s2 > 0 && (beg_s1 - @pos_s1_last_match < 5) && (beg_s2 >= @pos_s2_last_match) && (beg_s2 - @pos_s2_last_match < 5)
123
+ if beg_s1 > 0 && beg_s2 > 0 && (beg_s1 - @pos_s1_last_match < 5) && (beg_s2 - @pos_s2_last_match < 5)
124
+ break unless valid_beg_s2.nil?
125
+ valid_beg_s2 = beg_s2
126
+ next
127
+ end
128
+
129
+ left_window_s1, left_window_s2 = get_left_windows(beg_s1, beg_s2)
130
+ if left_window_s1 && (text_similarity(left_window_s1, left_window_s2) > @sim_threshold)
131
+ break unless valid_beg_s2.nil?
132
+ valid_beg_s2 = beg_s2
133
+ next
134
+ end
135
+
136
+ right_window_s1, right_window_s2 = get_right_windows(beg_s1, beg_s2)
137
+ if right_window_s2 && (text_similarity(right_window_s1, right_window_s2) > @sim_threshold)
138
+ break unless valid_beg_s2.nil?
139
+ valid_beg_s2 = beg_s2
140
+ next
141
+ end
142
+ end
143
+
144
+ # r == nil means that the inner loop was broken (multiple candidates had passed the tests)
145
+ # r != nil means that the inner loop was completed (with or w/o a valid beg_s2 found)
146
+ break unless r.nil?
147
+ end
148
+
149
+ valid_beg_s2
150
+ end
151
+
152
+ def get_left_windows(beg_s1, beg_s2, size_window = nil)
153
+ size_window ||= @size_window
154
+
155
+ # comment out below with the assumption that the beginning of a document gives a significant locational information
156
+ # return if @beg_s1 < size_window || @beg_s2 < size_window
90
157
 
91
158
  window_s1 = ''
92
- loc = @beg_s1 - 1
159
+ loc = beg_s1 - 1
93
160
  count = 0
94
- while count < @size_window && loc >= 0
161
+ while count < size_window && loc >= 0
95
162
  if @s1[loc] =~ /[0-9a-zA-Z]/
96
163
  window_s1 += @s1[loc]
97
164
  count += 1
@@ -100,9 +167,9 @@ class TextAlignment::AnchorFinder
100
167
  end
101
168
 
102
169
  window_s2 = ''
103
- loc = @beg_s2 - 1
170
+ loc = beg_s2 - 1
104
171
  count = 0
105
- while count < @size_window && loc >= 0
172
+ while count < size_window && loc >= 0
106
173
  if @s2[loc] =~ /[0-9a-zA-Z]/
107
174
  window_s2 += @s2[loc]
108
175
  count += 1
@@ -113,15 +180,17 @@ class TextAlignment::AnchorFinder
113
180
  [window_s1, window_s2]
114
181
  end
115
182
 
116
- def get_right_windows
183
+ def get_right_windows(beg_s1, beg_s2, size_window = nil)
184
+ size_window ||= @size_window
185
+
117
186
  # commend below with the assumption that the end of a document gives a significant locational
118
- # return if (@beg_s1 + @size_ngram > (@s1.length - @size_window)) || (@beg_s2 + @size_ngram > (@s2.length - @size_window))
187
+ # return if (@beg_s1 + @size_ngram > (@s1.length - size_window)) || (@beg_s2 + @size_ngram > (@s2.length - size_window))
119
188
 
120
189
  window_s1 = ''
121
- loc = @beg_s1 + @size_ngram
190
+ loc = beg_s1 + @size_ngram
122
191
  len_s1 = @s1.length
123
192
  count = 0
124
- while count < @size_window && loc < len_s1
193
+ while count < size_window && loc < len_s1
125
194
  if @s1[loc] =~ /[0-9a-zA-Z]/
126
195
  window_s1 += @s1[loc]
127
196
  count += 1
@@ -130,10 +199,10 @@ class TextAlignment::AnchorFinder
130
199
  end
131
200
 
132
201
  window_s2 = ''
133
- loc = @beg_s2 + @size_ngram
202
+ loc = beg_s2 + @size_ngram
134
203
  len_s2 = @s2.length
135
204
  count = 0
136
- while count < @size_window && loc < len_s2
205
+ while count < size_window && loc < len_s2
137
206
  if @s2[loc] =~ /[0-9a-zA-Z]/
138
207
  window_s2 += @s2[loc]
139
208
  count += 1
@@ -148,5 +217,4 @@ class TextAlignment::AnchorFinder
148
217
  return 0 if str1.nil? || str2.nil?
149
218
  String::Similarity.cosine(str1, str2, ngram:ngram_order)
150
219
  end
151
-
152
- end
220
+ end
@@ -1,6 +1,6 @@
1
1
  module TextAlignment; end unless defined? TextAlignment
2
2
 
3
- TextAlignment::MAPPINGS = [
3
+ TextAlignment::CHAR_MAPPING = [
4
4
  ["©", "(c)"], #U+00A9 (Copyright Sign)
5
5
 
6
6
  ["α", "alpha"], #U+03B1 (greek small letter alpha)
@@ -61,9 +61,10 @@ TextAlignment::MAPPINGS = [
61
61
  ["•", "*"], #U+2022 (bullet)
62
62
  [" ", " "], #U+2009 (thin space)
63
63
  [" ", " "], #U+200A (hair space)
64
- [" ", " "], #U+00A0 (no-break space)
64
+ [" ", " "], #U+00A0 (Non-Breaking space)
65
65
  [" ", " "], #U+3000 (ideographic space)
66
- ["", "-"], #U+2211 (Non-Breaking Hyphen)
66
+ ["", "-"], #U+2010 (Hyphen)
67
+ ["‑", "-"], #U+2011 (Non-Breaking Hyphen)
67
68
  ["−", "-"], #U+2212 (minus sign)
68
69
  ["–", "-"], #U+2013 (en dash)
69
70
  ["′", "'"], #U+2032 (prime)
@@ -75,98 +76,112 @@ TextAlignment::MAPPINGS = [
75
76
  ]
76
77
 
77
78
 
78
- TextAlignment::PADDING_LETTERS = ['@', '^', '|', '#', '$', '%', '&', '_'] unless defined? TextAlignment::PADDING_LETTERS
79
+ class TextAlignment::CharMapping
80
+ attr_reader :str
79
81
 
82
+ def initialize(_str, char_mapping = nil)
83
+ char_mapping ||= TextAlignment::CHAR_MAPPING
84
+ @str, offset_mapping = enmap_str(_str, char_mapping)
85
+ @index_enmap = offset_mapping.to_h
86
+ @index_demap = offset_mapping.map{|m| m.reverse}.to_h
87
+ end
88
+
89
+ def enmap_position(position)
90
+ @index_enmap[position]
91
+ end
80
92
 
81
- class << TextAlignment
82
- def single_character_mapping_preprocessing(_str1, _str2, _mappings = nil)
83
- _mappings ||= TextAlignment::MAPPINGS
93
+ def demap_position(position)
94
+ @index_demap[position]
95
+ end
84
96
 
85
- character_mappings = _mappings.select{|m| m[0].length == 1 && m[1].length == 1}
86
- if character_mappings.empty?
87
- [_str1, _str2, _mappings]
88
- else
89
- characters_from = character_mappings.collect{|m| m[0]}.join
90
- characters_to = character_mappings.collect{|m| m[1]}.join
91
- characters_to.gsub!(/-/, '\-')
97
+ def enmap_denotations(_denotations)
98
+ denotations = _denotations.map do |d|
99
+ d.dup.merge(span:{begin:enmap_position(d[:span][:begin]), end:enmap_position(d[:span][:end])})
100
+ end
101
+ end
92
102
 
93
- str1 = _str1.tr(characters_from, characters_to)
94
- str2 = _str2.tr(characters_from, characters_to)
103
+ private
95
104
 
96
- mappings = _mappings.select{|m| m[0].length > 1 || m[1].length > 1}
105
+ def enmap_str(_str, char_mapping)
106
+ str = _str.dup
97
107
 
98
- [str1, str2, mappings]
108
+ # To execute the single letter mapping
109
+ char_mapping.each do |one, long|
110
+ str.gsub!(one, long) if long.length == 1
99
111
  end
100
- end
101
112
 
102
- def long_to_one_mapping_preprocessing(_str1, _str2, _mappings = nil)
103
- _mappings ||= TextAlignment::MAPPINGS
104
-
105
- long_to_one_mappings = _mappings.select{|m| m[0].length == 1 && m[1].length > 1}
106
- if long_to_one_mappings.empty?
107
- [_str1, _str2, _mappings]
108
- else
109
- ## long to one character mappings
110
- pletters = TextAlignment::PADDING_LETTERS
111
-
112
- # find the padding letter for str1
113
- @padding_letter1 = begin
114
- i = pletters.index{|l| _str2.index(l).nil?}
115
- raise RuntimeError, "Could not find a padding letter for str1" if i.nil?
116
- TextAlignment::PADDING_LETTERS[i]
117
- end
113
+ # To get the (location, length) index for replacements
114
+ loc_len = []
115
+ char_mapping.each do |one, long|
116
+ next if long.length == 1
118
117
 
119
- # find the padding letter for str2
120
- @padding_letter2 = begin
121
- i = pletters.index{|l| l != @padding_letter1 && _str1.index(l).nil?}
122
- raise RuntimeError, "Could not find a padding letter for str2" if i.nil?
123
- TextAlignment::PADDING_LETTERS[i]
118
+ init_next = 0
119
+ while loc = str.index(long, init_next)
120
+ loc_len << [loc, long.length]
121
+ init_next = loc + long.length
124
122
  end
125
123
 
126
- str1 = str2 = nil
127
- long_to_one_mappings.each do |f|
128
- from = f[1]
129
-
130
- str1 = if _str2.index(f[0])
131
- to = f[0] + (@padding_letter1 * (f[1].length - 1))
132
- _str1.gsub(from, to)
133
- else
134
- _str1
135
- end
136
-
137
- str2 = if _str1.index(f[0])
138
- to = f[0] + (@padding_letter2 * (f[1].length - 1))
139
- _str2.gsub(from, to)
140
- else
141
- _str2
142
- end
143
- end
144
- mappings = _mappings.select{|m| m[0].length > 1 || m[1].length == 1}
124
+ # a workaround to avoid messing-up due to embedding
125
+ str.gsub!(long, one * long.length)
126
+ end
145
127
 
146
- [str1, str2, mappings]
128
+ # To get the (location, length) index for consecutive whitespace sequences
129
+ init_next = 0
130
+ while loc = str.index(/\s{2,}/, init_next)
131
+ len = $~[0].length
132
+ loc_len << [loc, len]
133
+ init_next = loc + len
147
134
  end
148
- end
149
135
 
150
- def compute_similarity(_s1, _s2, sdiff)
151
- return 0 if sdiff.nil?
136
+ loc_len.sort!{|a, b| a[0] <=> b[0]}
152
137
 
153
- # compute the lcs only with non-whitespace letters
154
- lcs = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/ && d.new_element =~ /\S/}
155
- return 0 if lcs == 0
138
+ # To get the offset_mapping before and after replacement
139
+ offset_mapping = []
140
+ init_next = 0
141
+ j = 0
156
142
 
157
- s1 = if @padding_letter1
158
- _s1.tr(@padding_letter1, ' ')
159
- else
160
- _s1
143
+ loc_len.each do |loc, len|
144
+ offset_mapping += (init_next .. loc).map do |i|
145
+ j += 1
146
+ [i, j - 1]
147
+ end
148
+ init_next = loc + len
149
+ end
150
+
151
+ offset_mapping += (init_next .. str.length).map do |i|
152
+ j += 1
153
+ [i, j - 1]
161
154
  end
162
155
 
163
- s2 = if @padding_letter2
164
- _s2.tr(@padding_letter2, ' ')
165
- else
166
- _s2
156
+ # To execute the long letter mapping
157
+ char_mapping.each do |one, long|
158
+ str.gsub!(one * long.length, one) if long.length > 1
167
159
  end
168
160
 
169
- similarity = lcs.to_f / [s1.scan(/\S/).count, s2.scan(/\S/).count].min
161
+ # To replace multi whitespace sequences to a space
162
+ str.gsub!(/\s{2,}/, ' ')
163
+
164
+ [str, offset_mapping]
165
+ end
166
+ end
167
+
168
+ if __FILE__ == $0
169
+ require 'json'
170
+
171
+ unless ARGV.length == 1
172
+ warn "#{$0} an_annotation_json_file.json"
173
+ exit
170
174
  end
175
+ annotations = JSON.parse File.read(ARGV[0]).strip, symbolize_names: true
176
+ denotations = annotations[:denotations]
177
+ if denotations.nil? && annotations[:tracks]
178
+ denotations = annotations[:tracks].first[:denotations]
179
+ end
180
+
181
+ str_mapping = TextAlignment::CharMapping.new(annotations[:text])
182
+ str_mapped = str_mapping.str
183
+ denotations_mapped = str_mapping.enmap_denotations(denotations)
184
+ new_annotations = {text:str_mapped, denotations:denotations_mapped}
171
185
 
186
+ puts new_annotations.to_json
172
187
  end
@@ -0,0 +1,19 @@
1
+ module TextAlignment; end unless defined? TextAlignment
2
+
3
+ class TextAlignment::CultivationMap
4
+ attr_reader :map
5
+
6
+ def initialize
7
+ @map = {}
8
+ end
9
+
10
+ def cultivate(regions)
11
+ regions.each do |b, e|
12
+ (b ... e).each{|p| @map[p] = e}
13
+ end
14
+ end
15
+
16
+ def search_again_position(position)
17
+ @map[position]
18
+ end
19
+ end
@@ -5,7 +5,7 @@ require 'text_alignment/find_divisions'
5
5
  require 'text_alignment/lcs_comparison'
6
6
  require 'text_alignment/lcs_alignment'
7
7
  require 'text_alignment/glcs_alignment'
8
- require 'text_alignment/mappings'
8
+ require 'text_alignment/char_mapping'
9
9
 
10
10
  module TextAlignment; end unless defined? TextAlignment
11
11
 
@@ -106,7 +106,7 @@ if __FILE__ == $0
106
106
 
107
107
  dictionary = [["β", "beta"]]
108
108
  # align = TextAlignment::TextAlignment.new(str1, str2)
109
- align = TextAlignment::TextAlignment.new(str1, str2, TextAlignment::MAPPINGS)
109
+ align = TextAlignment::TextAlignment.new(str1, str2, TextAlignment::CHAR_MAPPING)
110
110
  p align.common_elements
111
111
  p align.mapped_elements
112
112
  end
@@ -6,7 +6,7 @@ require 'text_alignment/lcs_comparison'
6
6
  require 'text_alignment/lcs_alignment'
7
7
  require 'text_alignment/lcs_cdiff'
8
8
  require 'text_alignment/glcs_alignment'
9
- require 'text_alignment/mappings'
9
+ require 'text_alignment/char_mapping'
10
10
 
11
11
  module TextAlignment; end unless defined? TextAlignment
12
12
 
@@ -20,7 +20,9 @@ class TextAlignment::MixedAlignment
20
20
  def initialize(_str1, _str2, _mappings = nil)
21
21
  raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
22
22
 
23
- str1, str2, mappings = TextAlignment::long_to_one_mapping_preprocessing(_str1, _str2, _mappings)
23
+ mappings ||= TextAlignment::CHAR_MAPPING
24
+ str1 = _str1.dup
25
+ str2 = _str2.dup
24
26
 
25
27
  _compute_mixed_alignment(str1, str2, mappings)
26
28
  end
@@ -63,7 +65,7 @@ class TextAlignment::MixedAlignment
63
65
  end
64
66
 
65
67
  cmp = TextAlignment::LCSComparison.new(str1, str2, lcs, @sdiff)
66
- @similarity = TextAlignment::compute_similarity(str1, str2, @sdiff)
68
+ @similarity = compute_similarity(str1, str2, @sdiff)
67
69
  @str1_match_initial = cmp.str1_match_initial
68
70
  @str1_match_final = cmp.str1_match_final
69
71
  @str2_match_initial = cmp.str2_match_initial
@@ -139,4 +141,14 @@ class TextAlignment::MixedAlignment
139
141
  @position_map_end = posmap_end.sort.to_h
140
142
  end
141
143
 
144
+ def compute_similarity(s1, s2, sdiff)
145
+ return 0 if sdiff.nil?
146
+
147
+ # compute the lcs only with non-whitespace letters
148
+ lcs = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/ && d.new_element =~ /\S/}
149
+ return 0 if lcs == 0
150
+
151
+ similarity = lcs.to_f / [s1.scan(/\S/).count, s2.scan(/\S/).count].min
152
+ end
153
+
142
154
  end
@@ -2,6 +2,7 @@
2
2
  require 'text_alignment/constants'
3
3
  require 'text_alignment/anchor_finder'
4
4
  require 'text_alignment/mixed_alignment'
5
+ require 'text_alignment/cultivation_map'
5
6
 
6
7
  module TextAlignment; end unless defined? TextAlignment
7
8
 
@@ -9,23 +10,206 @@ class TextAlignment::TextAlignment
9
10
  attr_reader :block_alignment
10
11
  attr_reader :similarity
11
12
  attr_reader :lost_annotations
13
+ attr_reader :cultivation_map
12
14
 
13
- def initialize(_str1, _str2, denotations = nil, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
15
+ def initialize(_str1, _str2, _denotations = nil, _cultivation_map = nil)
14
16
  raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
15
17
 
16
- @block_alignment = {source_text: _str1, target_text: _str2, denotations: denotations}
18
+ @block_alignment = {source_text: _str1, target_text: _str2, denotations: _denotations}
17
19
  @original_str1 = _str1
18
20
  @original_str2 = _str2
19
21
 
20
- str1, str2, @mappings = TextAlignment::single_character_mapping_preprocessing(_str1, _str2)
22
+ @str1_mapping = TextAlignment::CharMapping.new(_str1)
23
+ @str2_mapping = TextAlignment::CharMapping.new(_str2)
21
24
 
22
- if r = whole_block_alignment(str1, str2)
23
- @block_alignment[:blocks] = r
24
- return
25
+ str1 = @str1_mapping.str
26
+ denotations = @str1_mapping.enmap_denotations(_denotations)
27
+
28
+ str2 = @str2_mapping.str
29
+
30
+ @cultivation_map = _cultivation_map || TextAlignment::CultivationMap.new
31
+
32
+ @block_alignment[:blocks] = if r = whole_block_alignment(str1, str2, @cultivation_map)
33
+ # whole block alignment
34
+ r
35
+ else
36
+ find_block_alignment(str1, str2, denotations, @cultivation_map)
37
+ end
38
+
39
+ newly_cultivated_regions = @block_alignment[:blocks].collect do |b|
40
+ if b[:alignment] == :block || b[:alignment] == :term
41
+ [b[:target][:begin], b[:target][:end]]
42
+ else
43
+ nil
44
+ end
45
+ end.compact
46
+ newly_cultivated_regions_condensed = newly_cultivated_regions.inject([]) do |condensed, region|
47
+ if condensed.empty? || (condensed.last.last + 1 < region.first)
48
+ condensed.push region
49
+ else
50
+ condensed.last[1] = region.last
51
+ end
52
+ condensed
53
+ end
54
+
55
+ @cultivation_map.cultivate(newly_cultivated_regions_condensed)
56
+ end
57
+
58
+ def transform_begin_position(_begin_position)
59
+ begin_position = @str1_mapping.enmap_position(_begin_position)
60
+
61
+ i = @block_alignment[:blocks].index{|b| b[:source][:end] > begin_position}
62
+ block = @block_alignment[:blocks][i]
63
+
64
+ b = if block[:alignment] == :block || block[:alignment] == :term
65
+ begin_position + block[:delta]
66
+ elsif block[:alignment] == :empty
67
+ if begin_position == block[:source][:begin]
68
+ block[:target][:begin]
69
+ else
70
+ nil
71
+ end
72
+ else
73
+ r = block[:alignment].transform_begin_position(begin_position - block[:source][:begin])
74
+ r.nil? ? nil : r + block[:target][:begin]
25
75
  end
26
76
 
77
+ @str2_mapping.demap_position(b)
78
+ end
79
+
80
+ def transform_end_position(_end_position)
81
+ end_position = @str1_mapping.enmap_position(_end_position)
82
+
83
+ i = @block_alignment[:blocks].index{|b| b[:source][:end] >= end_position}
84
+ block = @block_alignment[:blocks][i]
85
+
86
+ e = if block[:alignment] == :block || block[:alignment] == :term
87
+ end_position + block[:delta]
88
+ elsif block[:alignment] == :empty
89
+ if end_position == block[:source][:end]
90
+ block[:target][:end]
91
+ else
92
+ nil
93
+ end
94
+ else
95
+ r = block[:alignment].transform_end_position(end_position - block[:source][:begin])
96
+ r.nil? ? nil : r + block[:target][:begin]
97
+ end
98
+
99
+ @str2_mapping.demap_position(e)
100
+ end
101
+
102
+ def transform_a_span(span)
103
+ {begin: transform_begin_position(span[:begin]), end: transform_end_position(span[:end])}
104
+ end
105
+
106
+ def transform_spans(spans)
107
+ spans.map{|span| transform_a_span(span)}
108
+ end
109
+
110
+ def transform_denotations!(denotations)
111
+ return nil if denotations.nil?
112
+ @lost_annotations = []
113
+
114
+ denotations.each do |d|
115
+ source = {begin:d.begin, end:d.end}
116
+ d.begin = transform_begin_position(d.begin);
117
+ d.end = transform_end_position(d.end);
118
+ raise "invalid transform" unless !d.begin.nil? && !d.end.nil? && d.begin >= 0 && d.end > d.begin && d.end <= @original_str2.length
119
+ rescue
120
+ @lost_annotations << {source: source, target:{begin:d.begin, end:d.end}}
121
+ d.begin = nil
122
+ d.end = nil
123
+ end
124
+
125
+ @lost_annotations
126
+ end
127
+
128
+ def transform_hdenotations(hdenotations)
129
+ return nil if hdenotations.nil?
130
+ @lost_annotations = []
131
+
132
+ r = hdenotations.collect do |d|
133
+ t = transform_a_span(d[:span])
134
+ raise "invalid transform" unless !t[:begin].nil? && !t[:end].nil? && t[:begin] >= 0 && t[:end] > t[:begin] && t[:end] <= @original_str2.length
135
+ new_d = d.dup.merge({span:t})
136
+ rescue
137
+ @lost_annotations << {source: d[:span], target:t}
138
+ nil
139
+ end.compact
140
+
141
+ r
142
+ end
143
+
144
+ def alignment_show
145
+ stext = @block_alignment[:source_text]
146
+ ttext = @block_alignment[:target_text]
147
+
148
+ show = ''
149
+ @block_alignment[:blocks].each do |a|
150
+ show += case a[:alignment]
151
+ when :block
152
+ "===== common (block) ===== [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
153
+ stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
154
+ when :term
155
+ "===== common (term) ===== [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
156
+ stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
157
+ when :empty
158
+ "xxxxx disparate texts (similarity: #{a[:similarity]})\n" +
159
+ "<<<<< string 1 [#{a[:source][:begin]} - #{a[:source][:end]}]\n" +
160
+ stext[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
161
+ ">>>>> string 2 " +
162
+ if a[:target]
163
+ "[#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
164
+ ttext[a[:target][:begin] ... a[:target][:end]] + "\n\n"
165
+ else
166
+ "[-]\n\n"
167
+ end
168
+ else
169
+ astr1 = ''
170
+ astr2 = ''
171
+
172
+ base = a[:source][:begin]
173
+ astr1 = a[:alignment].sdiff.map do |c|
174
+ case c.action
175
+ when '='
176
+ stext[c.old_position + base]
177
+ when '+'
178
+ '_'
179
+ when '-'
180
+ stext[c.old_position + base]
181
+ when '!'
182
+ stext[c.old_position + base] + '_'
183
+ end
184
+ end.join('')
185
+
186
+ base = a[:target][:begin]
187
+ astr2 = a[:alignment].sdiff.map do |c|
188
+ case c.action
189
+ when '='
190
+ ttext[c.new_position + base]
191
+ when '+'
192
+ ttext[c.new_position + base]
193
+ when '-'
194
+ '_'
195
+ when '!'
196
+ '_' + ttext[c.new_position + base]
197
+ end
198
+ end.join('')
199
+
200
+ "***** local mismatch [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}] (similarity: #{a[:similarity]})\n" +
201
+ "[#{astr1}]\n" +
202
+ "[#{astr2}]\n\n"
203
+ end
204
+ end
205
+ show
206
+ end
207
+
208
+ private
209
+
210
+ def find_block_alignment(str1, str2, denotations, cultivation_map)
27
211
  ## to find block alignments
28
- anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, _size_ngram, _size_window, _text_similiarity_threshold)
212
+ anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, cultivation_map)
29
213
 
30
214
  blocks = []
31
215
  while block = anchor_finder.get_next_anchor
@@ -68,12 +252,13 @@ class TextAlignment::TextAlignment
68
252
 
69
253
  if b2 == e2
70
254
  [
71
- {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty},
255
+ {source:{begin:b1, end:e1}, alignment: :empty},
72
256
  block
73
257
  ]
74
258
  else
259
+ len_buffer = ((e1 - b1) * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
260
+
75
261
  if b1 == 0 && b2 == 0
76
- len_buffer = (e1 * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
77
262
  b2 = e2 - len_buffer if e2 > len_buffer
78
263
  end
79
264
 
@@ -85,6 +270,10 @@ class TextAlignment::TextAlignment
85
270
  {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty},
86
271
  block
87
272
  ]
273
+ elsif ((e2 - b2) - (e1 - b1)) > len_buffer
274
+ la_block1 = local_alignment_blocks(str1, b1, e1, str2, b2, b2 + len_buffer, denotations)
275
+ la_block2 = local_alignment_blocks(str1, b1, e1, str2, e2 - len_buffer, e2, denotations)
276
+ [la_block2, la_block2].max{|a, b| a.first[:similarity] <=> b.first[:similarity]} << block
88
277
  else
89
278
  local_alignment_blocks(str1, b1, e1, str2, b2, e2, denotations) << block
90
279
  end
@@ -102,31 +291,53 @@ class TextAlignment::TextAlignment
102
291
  b1 = last_block[:source][:end]
103
292
  if b1 < str1.length
104
293
  e1 = str1.length
105
-
106
294
  b2 = last_block[:target][:end]
107
- if b2 < str2.length
108
- len_buffer = ((e1 - b1) * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
109
- e2 = (str2.length - b2) > len_buffer ? b2 + len_buffer : str2.length
110
- local_alignment_blocks(str1, b1, e1, str2, b2, e2, denotations)
295
+
296
+ _str1 = str1[b1 ... e1]
297
+ if _str1.strip.empty?
298
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
111
299
  else
112
- [{source:{begin:last_block[:source][:end], end:str1.length}, alignment: :empty}]
300
+ if b2 < str2.length
301
+ len_buffer = ((e1 - b1) * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
302
+ e2 = (str2.length - b2) > len_buffer ? b2 + len_buffer : str2.length
303
+
304
+ local_alignment_blocks(str1, b1, e1, str2, b2, e2, denotations)
305
+ else
306
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
307
+ end
113
308
  end
114
309
  else
115
310
  []
116
311
  end
117
312
  end
118
-
119
- @block_alignment[:blocks] = blocks2
120
313
  end
121
314
 
122
- def whole_block_alignment(str1, str2)
315
+ def whole_block_alignment(str1, str2, cultivation_map)
123
316
  ## Block exact match
124
- block_begin = str2.index(str1)
317
+ search_position = 0
318
+
319
+ block_begin = begin
320
+ _block_begin = str2.index(str1, search_position)
321
+ break if _block_begin.nil?
322
+ search_position = cultivation_map.search_again_position(_block_begin)
323
+ _block_begin
324
+ end until search_position.nil?
325
+
125
326
  unless block_begin.nil?
126
327
  return [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
127
328
  end
128
329
 
129
- block_begin = str2.downcase.index(str1.downcase)
330
+ search_position = 0
331
+
332
+ dstr1 = str1.downcase
333
+ dstr2 = str2.downcase
334
+ block_begin = begin
335
+ _block_begin = dstr2.index(dstr1, search_position)
336
+ break if _block_begin.nil?
337
+ search_position = cultivation_map.search_again_position(_block_begin)
338
+ _block_begin
339
+ end until search_position.nil?
340
+
130
341
  unless block_begin.nil?
131
342
  return [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
132
343
  end
@@ -144,7 +355,7 @@ class TextAlignment::TextAlignment
144
355
  map{|d| d.merge(lex:str1[d[:span][:begin] ... d[:span][:end]])}
145
356
 
146
357
  position = 0
147
- tblocks = ds_in_scope.map do |term|
358
+ _tblocks = ds_in_scope.map do |term|
148
359
  lex = term[:lex]
149
360
  r = block2.index(lex, position)
150
361
  if r.nil?
@@ -152,11 +363,11 @@ class TextAlignment::TextAlignment
152
363
  break
153
364
  end
154
365
  position = r + lex.length
155
- {source:term[:span], target:{begin:r + b2, end:r + b2 + lex.length}, alignment: :term, delta: r + b2 - term[:span][:begin]}
366
+ {source:term[:span], target:{begin:r + b2, end:r + b2 + lex.length}, alignment: :term, similarity: 0.9, delta: r + b2 - term[:span][:begin]}
156
367
  end
157
368
 
158
369
  # missing term found
159
- tblocks = [] if position.nil?
370
+ _tblocks = [] if position.nil?
160
371
 
161
372
  # redundant matching found
162
373
  unless position.nil?
@@ -164,13 +375,13 @@ class TextAlignment::TextAlignment
164
375
  lex = term[:lex]
165
376
  look_forward = block2.index(lex, position)
166
377
  unless look_forward.nil?
167
- tblocks = []
378
+ _tblocks = []
168
379
  break
169
380
  end
170
381
  end
171
382
  end
172
383
 
173
- tblocks
384
+ _tblocks
174
385
  else
175
386
  []
176
387
  end
@@ -184,7 +395,7 @@ class TextAlignment::TextAlignment
184
395
  block2 = str2[b2 ... e2]
185
396
 
186
397
  ## character-based alignment
187
- alignment = TextAlignment::MixedAlignment.new(block1.downcase, block2.downcase, @mappings)
398
+ alignment = TextAlignment::MixedAlignment.new(block1.downcase, block2.downcase)
188
399
  if alignment.sdiff.nil?
189
400
  [{source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}]
190
401
  else
@@ -196,7 +407,7 @@ class TextAlignment::TextAlignment
196
407
  block2 = str2[b2 ... e2]
197
408
 
198
409
  ## character-based alignment
199
- alignment = TextAlignment::MixedAlignment.new(block1.downcase, block2.downcase, @mappings)
410
+ alignment = TextAlignment::MixedAlignment.new(block1.downcase, block2.downcase)
200
411
  if alignment.sdiff.nil?
201
412
  [{source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}]
202
413
  else
@@ -244,157 +455,4 @@ class TextAlignment::TextAlignment
244
455
  end
245
456
  end
246
457
 
247
-
248
- def indices(str, target)
249
- position = 0
250
- len = target.len
251
- Enumerator.new do |yielder|
252
- while idx = str.index(target, position)
253
- yielder << idx
254
- position = idx + len
255
- end
256
- end
257
- end
258
-
259
- def transform_begin_position(begin_position)
260
- i = @block_alignment[:blocks].index{|b| b[:source][:end] > begin_position}
261
- block = @block_alignment[:blocks][i]
262
-
263
- b = if block[:alignment] == :block || block[:alignment] == :term
264
- begin_position + block[:delta]
265
- elsif block[:alignment] == :empty
266
- if begin_position == block[:source][:begin]
267
- block[:target][:begin]
268
- else
269
- nil
270
- end
271
- else
272
- r = block[:alignment].transform_begin_position(begin_position - block[:source][:begin])
273
- r.nil? ? nil : r + block[:target][:begin]
274
- end
275
- end
276
-
277
- def transform_end_position(end_position)
278
- i = @block_alignment[:blocks].index{|b| b[:source][:end] >= end_position}
279
- block = @block_alignment[:blocks][i]
280
-
281
- e = if block[:alignment] == :block || block[:alignment] == :term
282
- end_position + block[:delta]
283
- elsif block[:alignment] == :empty
284
- if end_position == block[:source][:end]
285
- block[:target][:end]
286
- else
287
- nil
288
- end
289
- else
290
- r = block[:alignment].transform_end_position(end_position - block[:source][:begin])
291
- r.nil? ? nil : r + block[:target][:begin]
292
- end
293
- end
294
-
295
- def transform_a_span(span)
296
- {begin: transform_begin_position(span[:begin]), end: transform_end_position(span[:end])}
297
- end
298
-
299
- def transform_spans(spans)
300
- spans.map{|span| transform_a_span(span)}
301
- end
302
-
303
- def transform_denotations!(denotations)
304
- return nil if denotations.nil?
305
- @lost_annotations = []
306
-
307
- denotations.each do |d|
308
- source = {begin:d.begin, end:d.end}
309
- d.begin = transform_begin_position(d.begin);
310
- d.end = transform_end_position(d.end);
311
- raise "invalid transform" unless !d.begin.nil? && !d.end.nil? && d.begin >= 0 && d.end > d.begin && d.end <= @original_str2.length
312
- rescue
313
- @lost_annotations << {source: source, target:{begin:d.begin, end:d.end}}
314
- d.begin = nil
315
- d.end = nil
316
- end
317
-
318
- @lost_annotations
319
- end
320
-
321
- def transform_hdenotations(hdenotations)
322
- return nil if hdenotations.nil?
323
- @lost_annotations = []
324
-
325
- r = hdenotations.collect do |d|
326
- t = transform_a_span(d[:span])
327
- raise "invalid transform" unless !t[:begin].nil? && !t[:end].nil? && t[:begin] >= 0 && t[:end] > t[:begin] && t[:end] <= @original_str2.length
328
- new_d = d.dup.merge({span:t})
329
- rescue
330
- @lost_annotations << {source: d[:span], target:t}
331
- nil
332
- end.compact
333
-
334
- r
335
- end
336
-
337
- def alignment_show
338
- stext = @block_alignment[:source_text]
339
- ttext = @block_alignment[:target_text]
340
-
341
- show = ''
342
- @block_alignment[:blocks].each do |a|
343
- show += case a[:alignment]
344
- when :block
345
- "===== common (block) ===== [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
346
- stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
347
- when :term
348
- "===== common (term) ===== [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
349
- stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
350
- when :empty
351
- "xxxxx disparate texts (similarity: #{a[:similarity]})\n" +
352
- "<<<<< string 1 [#{a[:source][:begin]} - #{a[:source][:end]}]\n" +
353
- stext[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
354
- ">>>>> string 2 " +
355
- if a[:target]
356
- "[#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
357
- ttext[a[:target][:begin] ... a[:target][:end]] + "\n\n"
358
- else
359
- "[-]\n\n"
360
- end
361
- else
362
- astr1 = ''
363
- astr2 = ''
364
-
365
- base = a[:source][:begin]
366
- astr1 = a[:alignment].sdiff.map do |c|
367
- case c.action
368
- when '='
369
- stext[c.old_position + base]
370
- when '+'
371
- '_'
372
- when '-'
373
- stext[c.old_position + base]
374
- when '!'
375
- stext[c.old_position + base] + '_'
376
- end
377
- end.join('')
378
-
379
- base = a[:target][:begin]
380
- astr2 = a[:alignment].sdiff.map do |c|
381
- case c.action
382
- when '='
383
- ttext[c.new_position + base]
384
- when '+'
385
- ttext[c.new_position + base]
386
- when '-'
387
- '_'
388
- when '!'
389
- '_' + ttext[c.new_position + base]
390
- end
391
- end.join('')
392
-
393
- "***** local mismatch [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}] (similarity: #{a[:similarity]})\n" +
394
- "[#{astr1}]\n" +
395
- "[#{astr2}]\n\n"
396
- end
397
- end
398
- show
399
- end
400
458
  end
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.9.1'
2
+ VERSION = '0.10.1'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.1
4
+ version: 0.10.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-20 00:00:00.000000000 Z
11
+ date: 2021-03-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary
@@ -77,7 +77,9 @@ files:
77
77
  - lib/text_alignment.rb
78
78
  - lib/text_alignment/anchor_finder.rb
79
79
  - lib/text_alignment/approximate_fit.rb
80
+ - lib/text_alignment/char_mapping.rb
80
81
  - lib/text_alignment/constants.rb
82
+ - lib/text_alignment/cultivation_map.rb
81
83
  - lib/text_alignment/find_divisions.rb
82
84
  - lib/text_alignment/glcs_alignment.rb
83
85
  - lib/text_alignment/glcs_alignment_fast.rb
@@ -86,7 +88,6 @@ files:
86
88
  - lib/text_alignment/lcs_cdiff.rb
87
89
  - lib/text_alignment/lcs_comparison.rb
88
90
  - lib/text_alignment/lcs_min.rb
89
- - lib/text_alignment/mappings.rb
90
91
  - lib/text_alignment/mixed_alignment.rb
91
92
  - lib/text_alignment/text_alignment.rb
92
93
  - lib/text_alignment/version.rb