text_alignment 0.11.3 → 0.11.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0f019e7fbd144890e96eda8f2cf9b27cf091930c96b81236452172a5142e2cf3
4
- data.tar.gz: 47d32ec727511d53730bf56557992f972f7747832f9437f5e2f5798cd0764f41
3
+ metadata.gz: 5f95c7fb8bfdeb768fbd1400f7e785a2ed18016322f2cdcba3ea7196aa4e86ac
4
+ data.tar.gz: cad0296a218108884703af07bcbf1b303a6e12c51ab14ad872586c50cfc8e82c
5
5
  SHA512:
6
- metadata.gz: bf2720ce7af3612a8c0b1823bf6265e90f0d5e92f315d7eb697c4b13e1c9752e795adb5b1dbc840629379f3e96cf94115dd5d9400663f1d43a4caf428274f69a
7
- data.tar.gz: d7bca56968c54fa68d83b5e9d89aa89107774cde761117e28d561cf9c63226b08a613e84b3484f2df58edaadfac7e1b286155bc3486559d6fd7c63d6ec082907
6
+ metadata.gz: 5d18a8d142974967fcfe358b561c83d76d1c5d8d2a277092ed4a8b42de14cd791b004830ff9f145735e6bc0a1265295f4fd6190c674ba62b57190d7e035a863b
7
+ data.tar.gz: c155ff9780d5f82893825787ee39ba319ddb1f16396d6a933334a83e485ff4d2eb2840d5fc1399f9873cd69a2458f029ced0d128d9099c4f08137d360e3e2007
@@ -2,6 +2,7 @@
2
2
  require 'text_alignment'
3
3
  require 'json'
4
4
  require 'pp'
5
+ require 'optparse'
5
6
 
6
7
  def read_annotations(filename)
7
8
  case File.extname(filename)
@@ -108,24 +109,54 @@ def align_mannotations(source_annotations, reference_text, alignment, debug = fa
108
109
  end
109
110
 
110
111
 
112
+ ## Options
113
+ options = {}
114
+ verbose = false
115
+
116
+ ## command line option processing
117
+ require 'optparse'
118
+ optparse = OptionParser.new do |opts|
119
+ opts.banner = "Usage: align_annotations [options] target_annotations(.json|.txt) reference_text(.json|.txt)"
120
+
121
+ opts.on('-d', '--duplicate', 'tells it to assume there may be duplicate texts.') do
122
+ options[:duplicate_texts] = true
123
+ end
124
+
125
+ opts.on('-w', '--no-whitespaces', 'tells it to ignore whitespaces.') do
126
+ options[:to_ignore_whitespaces] = true
127
+ end
128
+
129
+ opts.on('-o', '--no-order', 'tells it to ignore the order of the texts.') do
130
+ options[:to_ignore_text_order] = true
131
+ end
132
+
133
+ opts.on('-v', '--verbose', 'tells it to show the state verbosely for debugging.') do
134
+ verbose = true
135
+ end
136
+
137
+ opts.on('-h', '--help', 'displays this screen.') do
138
+ puts opts
139
+ exit
140
+ end
141
+ end
142
+
143
+ optparse.parse!
144
+
111
145
  unless ARGV.length == 2
112
- warn "align_annotations target_annotations(.json|.txt) reference_annotations(.json|.txt)"
113
- exit
146
+ puts optparse.help
147
+ exit 1
114
148
  end
115
149
 
116
150
  source_annotations = read_annotations(ARGV[0])
117
151
  reference_text = read_text(ARGV[1])
118
152
 
119
- alignment = TextAlignment::TextAlignment.new(reference_text, true)
153
+ alignment = TextAlignment::TextAlignment.new(reference_text, options)
120
154
 
121
155
  target_annotations = if source_annotations.class == Array
122
- # align_mannotations(source_annotations, reference_text, alignment, true)
123
- align_mannotations(source_annotations, reference_text, alignment, false)
156
+ align_mannotations(source_annotations, reference_text, alignment, verbose)
124
157
  else
125
- # denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], alignment)
126
- denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], alignment, false)
158
+ denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], alignment, verbose)
127
159
  source_annotations.merge({text:reference_text, denotations:denotations})
128
160
  end
129
161
 
130
- # pp alignment.block_alignment
131
162
  # puts target_annotations.to_json
@@ -6,11 +6,18 @@ module TextAlignment; end unless defined? TextAlignment
6
6
 
7
7
  class TextAlignment::AnchorFinder
8
8
 
9
- def initialize(source_str, target_str, cultivation_map)
9
+ def initialize(source_str, target_str, cultivation_map, to_ignore_whitespaces = false, to_ignore_text_order = false)
10
+ @method_get_left_windows, @method_get_right_windows = if to_ignore_whitespaces
11
+ [method(:get_left_windows_no_squeeze_ws), method(:get_right_windows_no_squeeze_ws)]
12
+ else
13
+ [method(:get_left_windows), method(:get_right_windows)]
14
+ end
15
+
10
16
  @s1 = source_str.downcase
11
17
  @s2 = target_str.downcase
12
18
 
13
19
  @cultivation_map = cultivation_map
20
+ @to_ignore_text_order = to_ignore_text_order
14
21
 
15
22
  @size_ngram = TextAlignment::SIZE_NGRAM
16
23
  @size_window = TextAlignment::SIZE_WINDOW
@@ -65,10 +72,7 @@ class TextAlignment::AnchorFinder
65
72
  # to get the anchor to search for in s2
66
73
  anchor = @s1[beg_s1, @size_ngram]
67
74
 
68
- # comment out below with the assumption that texts are in the same order
69
- # search_position = 0
70
- search_position = @pos_s2_last_match
71
-
75
+ search_position = @to_ignore_text_order ? 0 : @pos_s2_last_match
72
76
  beg_s2_candidates = find_beg_s2_candidates(anchor, search_position)
73
77
  return nil if beg_s2_candidates.empty?
74
78
 
@@ -108,14 +112,14 @@ class TextAlignment::AnchorFinder
108
112
  next
109
113
  end
110
114
 
111
- left_window_s1, left_window_s2 = get_left_windows(beg_s1, beg_s2, size_window)
115
+ left_window_s1, left_window_s2 = @method_get_left_windows.call(beg_s1, beg_s2, size_window)
112
116
  if left_window_s1 && (text_similarity(left_window_s1, left_window_s2) > @sim_threshold)
113
117
  break unless valid_beg_s2.nil?
114
118
  valid_beg_s2 = beg_s2
115
119
  next
116
120
  end
117
121
 
118
- right_window_s1, right_window_s2 = get_right_windows(beg_s1, beg_s2, size_window)
122
+ right_window_s1, right_window_s2 = @method_get_right_windows.call(beg_s1, beg_s2, size_window)
119
123
  if right_window_s2 && (text_similarity(right_window_s1, right_window_s2) > @sim_threshold)
120
124
  break unless valid_beg_s2.nil?
121
125
  valid_beg_s2 = beg_s2
@@ -139,7 +143,7 @@ class TextAlignment::AnchorFinder
139
143
  size_window ||= @size_window
140
144
 
141
145
  # comment out below with the assumption that the beginning of a document gives a significant locational information
142
- # return if @beg_s1 < size_window || @beg_s2 < size_window
146
+ # return if beg_s1 < size_window || beg_s2 < size_window
143
147
 
144
148
  window_s1 = ''
145
149
  loc = beg_s1 - 1
@@ -170,7 +174,7 @@ class TextAlignment::AnchorFinder
170
174
  size_window ||= @size_window
171
175
 
172
176
  # commend below with the assumption that the end of a document gives a significant locational
173
- # return if (@beg_s1 + @size_ngram > (@s1.length - size_window)) || (@beg_s2 + @size_ngram > (@s2.length - size_window))
177
+ # return if (beg_s1 + @size_ngram > (@s1.length - size_window)) || (beg_s2 + @size_ngram > (@s2.length - size_window))
174
178
 
175
179
  window_s1 = ''
176
180
  loc = beg_s1 + @size_ngram
@@ -199,6 +203,44 @@ class TextAlignment::AnchorFinder
199
203
  [window_s1, window_s2]
200
204
  end
201
205
 
206
+ def get_left_windows_no_squeeze_ws(beg_s1, beg_s2, size_window = nil)
207
+ size_window ||= @size_window
208
+
209
+ # comment out below with the assumption that the beginning of a document gives a significant locational information
210
+ # return if beg_s1 < size_window || beg_s2 < size_window
211
+
212
+ wbeg = beg_s1 - size_window
213
+ wbeg = 0 if wbeg < 0
214
+ window_s1 = @s1[wbeg ... beg_s1]
215
+
216
+ wbeg = beg_s2 - size_window
217
+ wbeg = 0 if wbeg < 0
218
+ window_s2 = @s2[wbeg ... beg_s2]
219
+
220
+ [window_s1, window_s2]
221
+ end
222
+
223
+ def get_right_windows_no_squeeze_ws(beg_s1, beg_s2, size_window = nil)
224
+ size_window ||= @size_window
225
+
226
+ # commend below with the assumption that the end of a document gives a significant locational
227
+ # return if (@beg_s1 + @size_ngram > (@s1.length - size_window)) || (@beg_s2 + @size_ngram > (@s2.length - size_window))
228
+
229
+ slen = @s1.length
230
+ wbeg = beg_s1 + @size_ngram
231
+ wend = wbeg + size_window
232
+ wend = slen if wend > slen
233
+ window_s1 = @s1[wbeg ... wend]
234
+
235
+ slen = @s2.length
236
+ wbeg = beg_s2 + @size_ngram
237
+ wend = wbeg + size_window
238
+ wend = slen if wend > slen
239
+ window_s2 = @s2[wbeg ... wend]
240
+
241
+ [window_s1, window_s2]
242
+ end
243
+
202
244
  def text_similarity(str1, str2, ngram_order = 2)
203
245
  return 0 if str1.nil? || str2.nil?
204
246
  String::Similarity.cosine(str1, str2, ngram:ngram_order)
@@ -1,3 +1,5 @@
1
+ require 'strscan'
2
+
1
3
  module TextAlignment; end unless defined? TextAlignment
2
4
 
3
5
  TextAlignment::CHAR_MAPPING = [
@@ -78,10 +80,18 @@ TextAlignment::CHAR_MAPPING = [
78
80
 
79
81
 
80
82
  class TextAlignment::CharMapping
81
- attr_reader :mapped_text
83
+ attr_reader :mapped_text, :index_enmap
84
+
85
+ def initialize(_text, char_mapping = nil, to_ignore_whitespaces = false)
86
+ if to_ignore_whitespaces
87
+ @method_get_positions_squeeze_ws = method(:get_positions_squeeze_ws_0)
88
+ @method_squeeze_ws = method(:squeeze_ws_0!)
89
+ else
90
+ @method_get_positions_squeeze_ws = method(:get_positions_squeeze_ws_1)
91
+ @method_squeeze_ws = method(:squeeze_ws_1!)
92
+ end
82
93
 
83
- def initialize(_text, char_mapping = nil)
84
- char_mapping ||= TextAlignment::CHAR_MAPPING
94
+ char_mapping ||= TextAlignment::CHAR_MAPPING.sort{|a, b| b[1].length <=> a[1].length}
85
95
  @mapped_text, offset_mapping = enmap_text(_text, char_mapping)
86
96
  @index_enmap = offset_mapping.to_h
87
97
  @index_demap = offset_mapping.map{|m| m.reverse}.to_h
@@ -105,22 +115,22 @@ class TextAlignment::CharMapping
105
115
 
106
116
  private
107
117
 
108
- def enmap_text(_text, char_mapping)
118
+ def enmap_text(_text, char_mapping, no_ws = false)
109
119
  text = _text.dup
110
120
 
111
- # To execute the single letter mapping
121
+ # To execute the single letter mapping replacement
112
122
  char_mapping.each do |one, long|
113
123
  text.gsub!(one, long) if long.length == 1
114
124
  end
115
125
 
116
- # To get the (location, length) index for replacements
117
- loc_len = []
126
+ # To get the replacement positions, (position, old_length, new_length), for char mappings
127
+ rpositions = []
118
128
  char_mapping.each do |one, long|
119
129
  next if long.length == 1
120
130
 
121
131
  init_next = 0
122
132
  while loc = text.index(long, init_next)
123
- loc_len << [loc, long.length]
133
+ rpositions << [loc, long.length, 1]
124
134
  init_next = loc + long.length
125
135
  end
126
136
 
@@ -128,32 +138,27 @@ class TextAlignment::CharMapping
128
138
  text.gsub!(long, one * long.length)
129
139
  end
130
140
 
131
- # To get the (location, length) index for consecutive whitespace sequences
132
- init_next = 0
133
- while loc = text.index(/\s{2,}/, init_next)
134
- len = $~[0].length
135
- loc_len << [loc, len]
136
- init_next = loc + len
137
- end
138
-
139
- loc_len.sort!{|a, b| a[0] <=> b[0]}
141
+ # To get the replacement positions, (position, old_length, new_length), for consecutive whitespaces
142
+ rpositions += @method_get_positions_squeeze_ws.call(text)
143
+ rpositions.sort!{|a, b| a[0] <=> b[0]}
140
144
 
141
145
  # To get the offset_mapping before and after replacement
142
- offset_mapping = []
143
- init_next = 0
144
- j = 0
145
-
146
- loc_len.each do |loc, len|
147
- offset_mapping += (init_next .. loc).map do |i|
148
- j += 1
149
- [i, j - 1]
146
+ offset_mapping = begin
147
+ i, j = 0, 0
148
+
149
+ offset_mappings = rpositions.map do |loc, old_len, new_len|
150
+ pre_len = loc - i
151
+ m = (0 .. pre_len).map{|c| [i + c, j + c]}
152
+ i = loc + old_len
153
+ j += pre_len + new_len
154
+
155
+ m
150
156
  end
151
- init_next = loc + len
152
- end
153
157
 
154
- offset_mapping += (init_next .. text.length).map do |i|
155
- j += 1
156
- [i, j - 1]
158
+ pre_len = text.length - i
159
+ offset_mappings << (0 .. pre_len).map{|c| [i + c, j + c]}
160
+
161
+ offset_mappings.reduce(:+)
157
162
  end
158
163
 
159
164
  # To execute the long letter mapping
@@ -162,14 +167,40 @@ class TextAlignment::CharMapping
162
167
  end
163
168
 
164
169
  # To replace multi whitespace sequences to a space
165
- text.gsub!(/\s{2,}/, ' ')
170
+ @method_squeeze_ws.call(text)
166
171
 
167
172
  [text, offset_mapping]
168
173
  end
174
+
175
+ # To get squeeze positions of whitespaces to one
176
+ def get_positions_squeeze_ws_1(text)
177
+ rpositions = []
178
+ text.scan(/\s{2,}/) do |s|
179
+ loc = $~.begin(0)
180
+ len = $~.end(0) - loc
181
+ rpositions << [loc, len, 1]
182
+ end
183
+ rpositions
184
+ end
185
+
186
+ # To get squeeze positions of whitespaces to zero
187
+ def get_positions_squeeze_ws_0(text)
188
+ text.enum_for(:scan, /\s+/).map{[b = $~.begin(0), $~.end(0) - b, 0]}
189
+ end
190
+
191
+ def squeeze_ws_1!(text)
192
+ text.gsub!(/\s{2,}/, ' ')
193
+ end
194
+
195
+ def squeeze_ws_0!(text)
196
+ text.gsub!(/\s+/, '')
197
+ end
198
+
169
199
  end
170
200
 
171
201
  if __FILE__ == $0
172
202
  require 'json'
203
+ # require 'profile'
173
204
 
174
205
  unless ARGV.length == 1
175
206
  warn "#{$0} an_annotation_json_file.json"
@@ -181,10 +212,11 @@ if __FILE__ == $0
181
212
  denotations = annotations[:tracks].first[:denotations]
182
213
  end
183
214
 
184
- text_mapping = TextAlignment::CharMapping.new(annotations[:text])
215
+ text_mapping = TextAlignment::CharMapping.new(annotations[:text], nil, false)
216
+ # text_mapping = TextAlignment::CharMapping.new(annotations[:text], nil, true)
185
217
  text_mapped = text_mapping.mapped_text
186
218
  denotations_mapped = text_mapping.enmap_denotations(denotations)
187
219
  new_annotations = {text:text_mapped, denotations:denotations_mapped}
188
220
 
189
- puts new_annotations.to_json
221
+ # puts new_annotations.to_json
190
222
  end
@@ -147,13 +147,24 @@ class TextAlignment::MixedAlignment
147
147
  # recoverbility
148
148
  count_nws = sdiff.count{|d| d.old_element =~ /\S/}
149
149
  count_nws_match = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/}
150
-
151
150
  coverage = count_nws_match.to_f / count_nws
152
151
 
153
152
  # fragmentation rate
154
- count_ofrag = sdiff.count{|d| d.old_element =~ /\s/} + 1
155
- count_frag = sdiff.collect{|d| (d.action == '=') && (d.old_element =~/\s/) ? ' ' : d.action}.join.scan(/=+/).count
156
- rate_frag = count_ofrag.to_f / count_frag
153
+ frag_str = sdiff.collect do |d|
154
+ case d.action
155
+ when '='
156
+ '='
157
+ when '-'
158
+ ''
159
+ when '+'
160
+ (d.new_element =~ /\S/) ? '+' : ''
161
+ else
162
+ ''
163
+ end
164
+ end.join.sub(/^[^=]++/, '').sub(/[^=]+$/, '')
165
+
166
+ count_frag = frag_str.scan(/=+/).count
167
+ rate_frag = 1.0 / count_frag
157
168
 
158
169
  similarity = coverage * rate_frag
159
170
  end
@@ -11,14 +11,18 @@ class TextAlignment::TextAlignment
11
11
  attr_reader :similarity
12
12
  attr_reader :lost_annotations
13
13
 
14
- # Initialize with a reference text, again which texts will be aligned
15
- def initialize(reference_text, to_prevent_overlap = false)
14
+ # Initialize with a reference text, against which texts will be aligned
15
+ def initialize(reference_text, options = {})
16
16
  raise ArgumentError, "nil text" if reference_text.nil?
17
17
 
18
+ options ||= {}
19
+ @duplicate_texts = options[:duplicate_texts] || false
20
+ @to_ignore_whitespaces = options[:to_ignore_whitespaces] || false
21
+ @to_ignore_text_order = options[:to_ignore_text_order] || false
22
+
18
23
  @original_reference_text = reference_text
19
- @rtext_mapping = TextAlignment::CharMapping.new(reference_text)
24
+ @rtext_mapping = TextAlignment::CharMapping.new(reference_text, nil, @to_ignore_whitespaces)
20
25
  @mapped_reference_text = @rtext_mapping.mapped_text
21
- @to_prevent_overlap = to_prevent_overlap
22
26
 
23
27
  @original_text = nil
24
28
  @blocks = nil
@@ -27,12 +31,12 @@ class TextAlignment::TextAlignment
27
31
 
28
32
  def align(text, denotations = nil)
29
33
  # To maintain the cultivation map
30
- update_cultivation_map if @to_prevent_overlap
34
+ update_cultivation_map unless @duplicate_texts
31
35
 
32
36
  # In case the input text is the same as the previous one, reuse the previous text mapping
33
37
  unless @original_text && @original_text == text
34
38
  @original_text = text
35
- @text_mapping = TextAlignment::CharMapping.new(text)
39
+ @text_mapping = TextAlignment::CharMapping.new(text, nil, @to_ignore_whitespaces)
36
40
  end
37
41
 
38
42
  @mapped_text = @text_mapping.mapped_text
@@ -202,7 +206,7 @@ class TextAlignment::TextAlignment
202
206
 
203
207
  def find_block_alignment(str1, str2, denotations, cultivation_map)
204
208
  ## to find block alignments
205
- anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, cultivation_map)
209
+ anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, cultivation_map, @to_ignore_whitespaces, @to_ignore_text_order)
206
210
 
207
211
  blocks = []
208
212
  while block = anchor_finder.get_next_anchor
@@ -238,68 +242,75 @@ class TextAlignment::TextAlignment
238
242
  b1 = lblock.nil? ? 0 : lblock[:source][:end]
239
243
  e1 = cblock.nil? ? str1.length : cblock[:source][:begin]
240
244
 
241
- if b1 < e1
245
+ if b1 <= e1
246
+ _str1 = str1[b1 ... e1]
247
+
242
248
  b2 = lblock.nil? ? 0 : lblock[:target][:end]
243
249
  e2 = cblock.nil? ? str2.length : cblock[:target][:begin]
244
- _str1 = str1[b1 ... e1]
245
- _str2 = str2[b2 ... e2]
246
250
 
247
- sum += if _str1.strip.empty? || _str2.strip.empty?
248
- [{source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}]
249
- else
250
- len_buffer = ((e1 - b1) * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
251
- region_state, state_region = cultivation_map.region_state([b2, e2])
252
- case region_state
253
- when :closed
254
- [{source:{begin:b1, end:e1}, alignment: :empty}]
255
- when :front_open
256
- if sum.empty? # when there is no preceding matched block
257
- [{source:{begin:b1, end:e1}, alignment: :empty}]
258
- else
259
- oe2 = state_region[1]
260
- me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
261
- local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
262
- end
263
- when :rear_open
264
- if cblock.nil? # when there is no following matched block
265
- [{source:{begin:b1, end:e1}, alignment: :empty}]
266
- else
267
- ob2 = state_region[0]
268
- mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
269
- local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
270
- end
271
- when :middle_closed
272
- attempt1 = if sum.empty?
251
+ if b2 < e2
252
+ _str2 = str2[b2 ... e2]
253
+
254
+ sum += if _str1.strip.empty? || _str2.strip.empty?
255
+ [{source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}]
256
+ else
257
+ len_buffer = ((e1 - b1) * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
258
+ region_state, state_region = cultivation_map.region_state([b2, e2])
259
+ case region_state
260
+ when :closed
273
261
  [{source:{begin:b1, end:e1}, alignment: :empty}]
274
- else
275
- oe2 = state_region[0]
276
- me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
277
- local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
278
- end
279
- if (attempt1.empty? || attempt1.first[:alignment] == :empty) && !cblock.nil?
280
- ob2 = state_region[1]
281
- mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
282
- local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
283
- else
284
- attempt1
285
- end
286
- else # :open
287
- if (e2 - b2) > len_buffer
262
+ when :front_open
263
+ if sum.empty? # when there is no preceding matched block
264
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
265
+ else
266
+ oe2 = state_region[1]
267
+ me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
268
+ local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
269
+ end
270
+ when :rear_open
271
+ if cblock.nil? # when there is no following matched block
272
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
273
+ else
274
+ ob2 = state_region[0]
275
+ mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
276
+ local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
277
+ end
278
+ when :middle_closed
288
279
  attempt1 = if sum.empty?
289
280
  [{source:{begin:b1, end:e1}, alignment: :empty}]
290
281
  else
291
- local_alignment(str1, b1, e1, str2, b2, b2 + len_buffer, denotations, cultivation_map)
282
+ oe2 = state_region[0]
283
+ me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
284
+ local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
292
285
  end
293
286
  if (attempt1.empty? || attempt1.first[:alignment] == :empty) && !cblock.nil?
294
- local_alignment(str1, b1, e1, str2, e2 - len_buffer, e2, denotations, cultivation_map)
287
+ ob2 = state_region[1]
288
+ mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
289
+ local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
295
290
  else
296
291
  attempt1
297
292
  end
298
- else
299
- local_alignment(str1, b1, e1, str2, b2, e2, denotations, cultivation_map)
293
+ else # :open
294
+ if (e2 - b2) > len_buffer
295
+ attempt1 = if sum.empty?
296
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
297
+ else
298
+ local_alignment(str1, b1, e1, str2, b2, b2 + len_buffer, denotations, cultivation_map)
299
+ end
300
+ if (attempt1.empty? || attempt1.first[:alignment] == :empty) && !cblock.nil?
301
+ local_alignment(str1, b1, e1, str2, e2 - len_buffer, e2, denotations, cultivation_map)
302
+ else
303
+ attempt1
304
+ end
305
+ else
306
+ local_alignment(str1, b1, e1, str2, b2, e2, denotations, cultivation_map)
307
+ end
300
308
  end
301
309
  end
310
+ elsif b2 > e2 # when out of order
311
+ # ToDo
302
312
  end
313
+
303
314
  end
304
315
 
305
316
  lblock = cblock
@@ -320,7 +331,7 @@ class TextAlignment::TextAlignment
320
331
 
321
332
  def local_alignment(str1, b1, e1, str2, b2, e2, denotations = nil, cultivation_map)
322
333
  tblocks = term_based_alignment(str1, b1, e1, str2, b2, e2, denotations, cultivation_map)
323
- if tblocks.empty?
334
+ if tblocks.empty? || tblocks.first[:alignment] == :empty
324
335
  lcs_alignment(str1, b1, e1, str2, b2, e2, cultivation_map)
325
336
  else
326
337
  tblocks
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.11.3'
2
+ VERSION = '0.11.10'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.3
4
+ version: 0.11.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-19 00:00:00.000000000 Z
11
+ date: 2021-04-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary
@@ -111,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
111
111
  - !ruby/object:Gem::Version
112
112
  version: '0'
113
113
  requirements: []
114
- rubygems_version: 3.0.8
114
+ rubygems_version: 3.0.9
115
115
  signing_key:
116
116
  specification_version: 4
117
117
  summary: Ruby class for aligning two character strings