text_alignment 0.11.3 → 0.11.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0f019e7fbd144890e96eda8f2cf9b27cf091930c96b81236452172a5142e2cf3
4
- data.tar.gz: 47d32ec727511d53730bf56557992f972f7747832f9437f5e2f5798cd0764f41
3
+ metadata.gz: 5f95c7fb8bfdeb768fbd1400f7e785a2ed18016322f2cdcba3ea7196aa4e86ac
4
+ data.tar.gz: cad0296a218108884703af07bcbf1b303a6e12c51ab14ad872586c50cfc8e82c
5
5
  SHA512:
6
- metadata.gz: bf2720ce7af3612a8c0b1823bf6265e90f0d5e92f315d7eb697c4b13e1c9752e795adb5b1dbc840629379f3e96cf94115dd5d9400663f1d43a4caf428274f69a
7
- data.tar.gz: d7bca56968c54fa68d83b5e9d89aa89107774cde761117e28d561cf9c63226b08a613e84b3484f2df58edaadfac7e1b286155bc3486559d6fd7c63d6ec082907
6
+ metadata.gz: 5d18a8d142974967fcfe358b561c83d76d1c5d8d2a277092ed4a8b42de14cd791b004830ff9f145735e6bc0a1265295f4fd6190c674ba62b57190d7e035a863b
7
+ data.tar.gz: c155ff9780d5f82893825787ee39ba319ddb1f16396d6a933334a83e485ff4d2eb2840d5fc1399f9873cd69a2458f029ced0d128d9099c4f08137d360e3e2007
@@ -2,6 +2,7 @@
2
2
  require 'text_alignment'
3
3
  require 'json'
4
4
  require 'pp'
5
+ require 'optparse'
5
6
 
6
7
  def read_annotations(filename)
7
8
  case File.extname(filename)
@@ -108,24 +109,54 @@ def align_mannotations(source_annotations, reference_text, alignment, debug = fa
108
109
  end
109
110
 
110
111
 
112
+ ## Options
113
+ options = {}
114
+ verbose = false
115
+
116
+ ## command line option processing
117
+ require 'optparse'
118
+ optparse = OptionParser.new do |opts|
119
+ opts.banner = "Usage: align_annotations [options] target_annotations(.json|.txt) reference_text(.json|.txt)"
120
+
121
+ opts.on('-d', '--duplicate', 'tells it to assume there may be duplicate texts.') do
122
+ options[:duplicate_texts] = true
123
+ end
124
+
125
+ opts.on('-w', '--no-whitespaces', 'tells it to ignore whitespaces.') do
126
+ options[:to_ignore_whitespaces] = true
127
+ end
128
+
129
+ opts.on('-o', '--no-order', 'tells it to ignore the order of the texts.') do
130
+ options[:to_ignore_text_order] = true
131
+ end
132
+
133
+ opts.on('-v', '--verbose', 'tells it to show the state verbosely for debugging.') do
134
+ verbose = true
135
+ end
136
+
137
+ opts.on('-h', '--help', 'displays this screen.') do
138
+ puts opts
139
+ exit
140
+ end
141
+ end
142
+
143
+ optparse.parse!
144
+
111
145
  unless ARGV.length == 2
112
- warn "align_annotations target_annotations(.json|.txt) reference_annotations(.json|.txt)"
113
- exit
146
+ puts optparse.help
147
+ exit 1
114
148
  end
115
149
 
116
150
  source_annotations = read_annotations(ARGV[0])
117
151
  reference_text = read_text(ARGV[1])
118
152
 
119
- alignment = TextAlignment::TextAlignment.new(reference_text, true)
153
+ alignment = TextAlignment::TextAlignment.new(reference_text, options)
120
154
 
121
155
  target_annotations = if source_annotations.class == Array
122
- # align_mannotations(source_annotations, reference_text, alignment, true)
123
- align_mannotations(source_annotations, reference_text, alignment, false)
156
+ align_mannotations(source_annotations, reference_text, alignment, verbose)
124
157
  else
125
- # denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], alignment)
126
- denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], alignment, false)
158
+ denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], alignment, verbose)
127
159
  source_annotations.merge({text:reference_text, denotations:denotations})
128
160
  end
129
161
 
130
- # pp alignment.block_alignment
131
162
  # puts target_annotations.to_json
@@ -6,11 +6,18 @@ module TextAlignment; end unless defined? TextAlignment
6
6
 
7
7
  class TextAlignment::AnchorFinder
8
8
 
9
- def initialize(source_str, target_str, cultivation_map)
9
+ def initialize(source_str, target_str, cultivation_map, to_ignore_whitespaces = false, to_ignore_text_order = false)
10
+ @method_get_left_windows, @method_get_right_windows = if to_ignore_whitespaces
11
+ [method(:get_left_windows_no_squeeze_ws), method(:get_right_windows_no_squeeze_ws)]
12
+ else
13
+ [method(:get_left_windows), method(:get_right_windows)]
14
+ end
15
+
10
16
  @s1 = source_str.downcase
11
17
  @s2 = target_str.downcase
12
18
 
13
19
  @cultivation_map = cultivation_map
20
+ @to_ignore_text_order = to_ignore_text_order
14
21
 
15
22
  @size_ngram = TextAlignment::SIZE_NGRAM
16
23
  @size_window = TextAlignment::SIZE_WINDOW
@@ -65,10 +72,7 @@ class TextAlignment::AnchorFinder
65
72
  # to get the anchor to search for in s2
66
73
  anchor = @s1[beg_s1, @size_ngram]
67
74
 
68
- # comment out below with the assumption that texts are in the same order
69
- # search_position = 0
70
- search_position = @pos_s2_last_match
71
-
75
+ search_position = @to_ignore_text_order ? 0 : @pos_s2_last_match
72
76
  beg_s2_candidates = find_beg_s2_candidates(anchor, search_position)
73
77
  return nil if beg_s2_candidates.empty?
74
78
 
@@ -108,14 +112,14 @@ class TextAlignment::AnchorFinder
108
112
  next
109
113
  end
110
114
 
111
- left_window_s1, left_window_s2 = get_left_windows(beg_s1, beg_s2, size_window)
115
+ left_window_s1, left_window_s2 = @method_get_left_windows.call(beg_s1, beg_s2, size_window)
112
116
  if left_window_s1 && (text_similarity(left_window_s1, left_window_s2) > @sim_threshold)
113
117
  break unless valid_beg_s2.nil?
114
118
  valid_beg_s2 = beg_s2
115
119
  next
116
120
  end
117
121
 
118
- right_window_s1, right_window_s2 = get_right_windows(beg_s1, beg_s2, size_window)
122
+ right_window_s1, right_window_s2 = @method_get_right_windows.call(beg_s1, beg_s2, size_window)
119
123
  if right_window_s2 && (text_similarity(right_window_s1, right_window_s2) > @sim_threshold)
120
124
  break unless valid_beg_s2.nil?
121
125
  valid_beg_s2 = beg_s2
@@ -139,7 +143,7 @@ class TextAlignment::AnchorFinder
139
143
  size_window ||= @size_window
140
144
 
141
145
  # comment out below with the assumption that the beginning of a document gives a significant locational information
142
- # return if @beg_s1 < size_window || @beg_s2 < size_window
146
+ # return if beg_s1 < size_window || beg_s2 < size_window
143
147
 
144
148
  window_s1 = ''
145
149
  loc = beg_s1 - 1
@@ -170,7 +174,7 @@ class TextAlignment::AnchorFinder
170
174
  size_window ||= @size_window
171
175
 
172
176
  # commend below with the assumption that the end of a document gives a significant locational
173
- # return if (@beg_s1 + @size_ngram > (@s1.length - size_window)) || (@beg_s2 + @size_ngram > (@s2.length - size_window))
177
+ # return if (beg_s1 + @size_ngram > (@s1.length - size_window)) || (beg_s2 + @size_ngram > (@s2.length - size_window))
174
178
 
175
179
  window_s1 = ''
176
180
  loc = beg_s1 + @size_ngram
@@ -199,6 +203,44 @@ class TextAlignment::AnchorFinder
199
203
  [window_s1, window_s2]
200
204
  end
201
205
 
206
+ def get_left_windows_no_squeeze_ws(beg_s1, beg_s2, size_window = nil)
207
+ size_window ||= @size_window
208
+
209
+ # comment out below with the assumption that the beginning of a document gives a significant locational information
210
+ # return if beg_s1 < size_window || beg_s2 < size_window
211
+
212
+ wbeg = beg_s1 - size_window
213
+ wbeg = 0 if wbeg < 0
214
+ window_s1 = @s1[wbeg ... beg_s1]
215
+
216
+ wbeg = beg_s2 - size_window
217
+ wbeg = 0 if wbeg < 0
218
+ window_s2 = @s2[wbeg ... beg_s2]
219
+
220
+ [window_s1, window_s2]
221
+ end
222
+
223
+ def get_right_windows_no_squeeze_ws(beg_s1, beg_s2, size_window = nil)
224
+ size_window ||= @size_window
225
+
226
+ # commend below with the assumption that the end of a document gives a significant locational
227
+ # return if (@beg_s1 + @size_ngram > (@s1.length - size_window)) || (@beg_s2 + @size_ngram > (@s2.length - size_window))
228
+
229
+ slen = @s1.length
230
+ wbeg = beg_s1 + @size_ngram
231
+ wend = wbeg + size_window
232
+ wend = slen if wend > slen
233
+ window_s1 = @s1[wbeg ... wend]
234
+
235
+ slen = @s2.length
236
+ wbeg = beg_s2 + @size_ngram
237
+ wend = wbeg + size_window
238
+ wend = slen if wend > slen
239
+ window_s2 = @s2[wbeg ... wend]
240
+
241
+ [window_s1, window_s2]
242
+ end
243
+
202
244
  def text_similarity(str1, str2, ngram_order = 2)
203
245
  return 0 if str1.nil? || str2.nil?
204
246
  String::Similarity.cosine(str1, str2, ngram:ngram_order)
@@ -1,3 +1,5 @@
1
+ require 'strscan'
2
+
1
3
  module TextAlignment; end unless defined? TextAlignment
2
4
 
3
5
  TextAlignment::CHAR_MAPPING = [
@@ -78,10 +80,18 @@ TextAlignment::CHAR_MAPPING = [
78
80
 
79
81
 
80
82
  class TextAlignment::CharMapping
81
- attr_reader :mapped_text
83
+ attr_reader :mapped_text, :index_enmap
84
+
85
+ def initialize(_text, char_mapping = nil, to_ignore_whitespaces = false)
86
+ if to_ignore_whitespaces
87
+ @method_get_positions_squeeze_ws = method(:get_positions_squeeze_ws_0)
88
+ @method_squeeze_ws = method(:squeeze_ws_0!)
89
+ else
90
+ @method_get_positions_squeeze_ws = method(:get_positions_squeeze_ws_1)
91
+ @method_squeeze_ws = method(:squeeze_ws_1!)
92
+ end
82
93
 
83
- def initialize(_text, char_mapping = nil)
84
- char_mapping ||= TextAlignment::CHAR_MAPPING
94
+ char_mapping ||= TextAlignment::CHAR_MAPPING.sort{|a, b| b[1].length <=> a[1].length}
85
95
  @mapped_text, offset_mapping = enmap_text(_text, char_mapping)
86
96
  @index_enmap = offset_mapping.to_h
87
97
  @index_demap = offset_mapping.map{|m| m.reverse}.to_h
@@ -105,22 +115,22 @@ class TextAlignment::CharMapping
105
115
 
106
116
  private
107
117
 
108
- def enmap_text(_text, char_mapping)
118
+ def enmap_text(_text, char_mapping, no_ws = false)
109
119
  text = _text.dup
110
120
 
111
- # To execute the single letter mapping
121
+ # To execute the single letter mapping replacement
112
122
  char_mapping.each do |one, long|
113
123
  text.gsub!(one, long) if long.length == 1
114
124
  end
115
125
 
116
- # To get the (location, length) index for replacements
117
- loc_len = []
126
+ # To get the replacement positions, (position, old_length, new_length), for char mappings
127
+ rpositions = []
118
128
  char_mapping.each do |one, long|
119
129
  next if long.length == 1
120
130
 
121
131
  init_next = 0
122
132
  while loc = text.index(long, init_next)
123
- loc_len << [loc, long.length]
133
+ rpositions << [loc, long.length, 1]
124
134
  init_next = loc + long.length
125
135
  end
126
136
 
@@ -128,32 +138,27 @@ class TextAlignment::CharMapping
128
138
  text.gsub!(long, one * long.length)
129
139
  end
130
140
 
131
- # To get the (location, length) index for consecutive whitespace sequences
132
- init_next = 0
133
- while loc = text.index(/\s{2,}/, init_next)
134
- len = $~[0].length
135
- loc_len << [loc, len]
136
- init_next = loc + len
137
- end
138
-
139
- loc_len.sort!{|a, b| a[0] <=> b[0]}
141
+ # To get the replacement positions, (position, old_length, new_length), for consecutive whitespaces
142
+ rpositions += @method_get_positions_squeeze_ws.call(text)
143
+ rpositions.sort!{|a, b| a[0] <=> b[0]}
140
144
 
141
145
  # To get the offset_mapping before and after replacement
142
- offset_mapping = []
143
- init_next = 0
144
- j = 0
145
-
146
- loc_len.each do |loc, len|
147
- offset_mapping += (init_next .. loc).map do |i|
148
- j += 1
149
- [i, j - 1]
146
+ offset_mapping = begin
147
+ i, j = 0, 0
148
+
149
+ offset_mappings = rpositions.map do |loc, old_len, new_len|
150
+ pre_len = loc - i
151
+ m = (0 .. pre_len).map{|c| [i + c, j + c]}
152
+ i = loc + old_len
153
+ j += pre_len + new_len
154
+
155
+ m
150
156
  end
151
- init_next = loc + len
152
- end
153
157
 
154
- offset_mapping += (init_next .. text.length).map do |i|
155
- j += 1
156
- [i, j - 1]
158
+ pre_len = text.length - i
159
+ offset_mappings << (0 .. pre_len).map{|c| [i + c, j + c]}
160
+
161
+ offset_mappings.reduce(:+)
157
162
  end
158
163
 
159
164
  # To execute the long letter mapping
@@ -162,14 +167,40 @@ class TextAlignment::CharMapping
162
167
  end
163
168
 
164
169
  # To replace multi whitespace sequences to a space
165
- text.gsub!(/\s{2,}/, ' ')
170
+ @method_squeeze_ws.call(text)
166
171
 
167
172
  [text, offset_mapping]
168
173
  end
174
+
175
+ # To get squeeze positions of whitespaces to one
176
+ def get_positions_squeeze_ws_1(text)
177
+ rpositions = []
178
+ text.scan(/\s{2,}/) do |s|
179
+ loc = $~.begin(0)
180
+ len = $~.end(0) - loc
181
+ rpositions << [loc, len, 1]
182
+ end
183
+ rpositions
184
+ end
185
+
186
+ # To get squeeze positions of whitespaces to zero
187
+ def get_positions_squeeze_ws_0(text)
188
+ text.enum_for(:scan, /\s+/).map{[b = $~.begin(0), $~.end(0) - b, 0]}
189
+ end
190
+
191
+ def squeeze_ws_1!(text)
192
+ text.gsub!(/\s{2,}/, ' ')
193
+ end
194
+
195
+ def squeeze_ws_0!(text)
196
+ text.gsub!(/\s+/, '')
197
+ end
198
+
169
199
  end
170
200
 
171
201
  if __FILE__ == $0
172
202
  require 'json'
203
+ # require 'profile'
173
204
 
174
205
  unless ARGV.length == 1
175
206
  warn "#{$0} an_annotation_json_file.json"
@@ -181,10 +212,11 @@ if __FILE__ == $0
181
212
  denotations = annotations[:tracks].first[:denotations]
182
213
  end
183
214
 
184
- text_mapping = TextAlignment::CharMapping.new(annotations[:text])
215
+ text_mapping = TextAlignment::CharMapping.new(annotations[:text], nil, false)
216
+ # text_mapping = TextAlignment::CharMapping.new(annotations[:text], nil, true)
185
217
  text_mapped = text_mapping.mapped_text
186
218
  denotations_mapped = text_mapping.enmap_denotations(denotations)
187
219
  new_annotations = {text:text_mapped, denotations:denotations_mapped}
188
220
 
189
- puts new_annotations.to_json
221
+ # puts new_annotations.to_json
190
222
  end
@@ -147,13 +147,24 @@ class TextAlignment::MixedAlignment
147
147
  # recoverbility
148
148
  count_nws = sdiff.count{|d| d.old_element =~ /\S/}
149
149
  count_nws_match = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/}
150
-
151
150
  coverage = count_nws_match.to_f / count_nws
152
151
 
153
152
  # fragmentation rate
154
- count_ofrag = sdiff.count{|d| d.old_element =~ /\s/} + 1
155
- count_frag = sdiff.collect{|d| (d.action == '=') && (d.old_element =~/\s/) ? ' ' : d.action}.join.scan(/=+/).count
156
- rate_frag = count_ofrag.to_f / count_frag
153
+ frag_str = sdiff.collect do |d|
154
+ case d.action
155
+ when '='
156
+ '='
157
+ when '-'
158
+ ''
159
+ when '+'
160
+ (d.new_element =~ /\S/) ? '+' : ''
161
+ else
162
+ ''
163
+ end
164
+ end.join.sub(/^[^=]++/, '').sub(/[^=]+$/, '')
165
+
166
+ count_frag = frag_str.scan(/=+/).count
167
+ rate_frag = 1.0 / count_frag
157
168
 
158
169
  similarity = coverage * rate_frag
159
170
  end
@@ -11,14 +11,18 @@ class TextAlignment::TextAlignment
11
11
  attr_reader :similarity
12
12
  attr_reader :lost_annotations
13
13
 
14
- # Initialize with a reference text, again which texts will be aligned
15
- def initialize(reference_text, to_prevent_overlap = false)
14
+ # Initialize with a reference text, against which texts will be aligned
15
+ def initialize(reference_text, options = {})
16
16
  raise ArgumentError, "nil text" if reference_text.nil?
17
17
 
18
+ options ||= {}
19
+ @duplicate_texts = options[:duplicate_texts] || false
20
+ @to_ignore_whitespaces = options[:to_ignore_whitespaces] || false
21
+ @to_ignore_text_order = options[:to_ignore_text_order] || false
22
+
18
23
  @original_reference_text = reference_text
19
- @rtext_mapping = TextAlignment::CharMapping.new(reference_text)
24
+ @rtext_mapping = TextAlignment::CharMapping.new(reference_text, nil, @to_ignore_whitespaces)
20
25
  @mapped_reference_text = @rtext_mapping.mapped_text
21
- @to_prevent_overlap = to_prevent_overlap
22
26
 
23
27
  @original_text = nil
24
28
  @blocks = nil
@@ -27,12 +31,12 @@ class TextAlignment::TextAlignment
27
31
 
28
32
  def align(text, denotations = nil)
29
33
  # To maintain the cultivation map
30
- update_cultivation_map if @to_prevent_overlap
34
+ update_cultivation_map unless @duplicate_texts
31
35
 
32
36
  # In case the input text is the same as the previous one, reuse the previous text mapping
33
37
  unless @original_text && @original_text == text
34
38
  @original_text = text
35
- @text_mapping = TextAlignment::CharMapping.new(text)
39
+ @text_mapping = TextAlignment::CharMapping.new(text, nil, @to_ignore_whitespaces)
36
40
  end
37
41
 
38
42
  @mapped_text = @text_mapping.mapped_text
@@ -202,7 +206,7 @@ class TextAlignment::TextAlignment
202
206
 
203
207
  def find_block_alignment(str1, str2, denotations, cultivation_map)
204
208
  ## to find block alignments
205
- anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, cultivation_map)
209
+ anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, cultivation_map, @to_ignore_whitespaces, @to_ignore_text_order)
206
210
 
207
211
  blocks = []
208
212
  while block = anchor_finder.get_next_anchor
@@ -238,68 +242,75 @@ class TextAlignment::TextAlignment
238
242
  b1 = lblock.nil? ? 0 : lblock[:source][:end]
239
243
  e1 = cblock.nil? ? str1.length : cblock[:source][:begin]
240
244
 
241
- if b1 < e1
245
+ if b1 <= e1
246
+ _str1 = str1[b1 ... e1]
247
+
242
248
  b2 = lblock.nil? ? 0 : lblock[:target][:end]
243
249
  e2 = cblock.nil? ? str2.length : cblock[:target][:begin]
244
- _str1 = str1[b1 ... e1]
245
- _str2 = str2[b2 ... e2]
246
250
 
247
- sum += if _str1.strip.empty? || _str2.strip.empty?
248
- [{source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}]
249
- else
250
- len_buffer = ((e1 - b1) * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
251
- region_state, state_region = cultivation_map.region_state([b2, e2])
252
- case region_state
253
- when :closed
254
- [{source:{begin:b1, end:e1}, alignment: :empty}]
255
- when :front_open
256
- if sum.empty? # when there is no preceding matched block
257
- [{source:{begin:b1, end:e1}, alignment: :empty}]
258
- else
259
- oe2 = state_region[1]
260
- me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
261
- local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
262
- end
263
- when :rear_open
264
- if cblock.nil? # when there is no following matched block
265
- [{source:{begin:b1, end:e1}, alignment: :empty}]
266
- else
267
- ob2 = state_region[0]
268
- mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
269
- local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
270
- end
271
- when :middle_closed
272
- attempt1 = if sum.empty?
251
+ if b2 < e2
252
+ _str2 = str2[b2 ... e2]
253
+
254
+ sum += if _str1.strip.empty? || _str2.strip.empty?
255
+ [{source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}]
256
+ else
257
+ len_buffer = ((e1 - b1) * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
258
+ region_state, state_region = cultivation_map.region_state([b2, e2])
259
+ case region_state
260
+ when :closed
273
261
  [{source:{begin:b1, end:e1}, alignment: :empty}]
274
- else
275
- oe2 = state_region[0]
276
- me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
277
- local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
278
- end
279
- if (attempt1.empty? || attempt1.first[:alignment] == :empty) && !cblock.nil?
280
- ob2 = state_region[1]
281
- mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
282
- local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
283
- else
284
- attempt1
285
- end
286
- else # :open
287
- if (e2 - b2) > len_buffer
262
+ when :front_open
263
+ if sum.empty? # when there is no preceding matched block
264
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
265
+ else
266
+ oe2 = state_region[1]
267
+ me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
268
+ local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
269
+ end
270
+ when :rear_open
271
+ if cblock.nil? # when there is no following matched block
272
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
273
+ else
274
+ ob2 = state_region[0]
275
+ mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
276
+ local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
277
+ end
278
+ when :middle_closed
288
279
  attempt1 = if sum.empty?
289
280
  [{source:{begin:b1, end:e1}, alignment: :empty}]
290
281
  else
291
- local_alignment(str1, b1, e1, str2, b2, b2 + len_buffer, denotations, cultivation_map)
282
+ oe2 = state_region[0]
283
+ me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
284
+ local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
292
285
  end
293
286
  if (attempt1.empty? || attempt1.first[:alignment] == :empty) && !cblock.nil?
294
- local_alignment(str1, b1, e1, str2, e2 - len_buffer, e2, denotations, cultivation_map)
287
+ ob2 = state_region[1]
288
+ mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
289
+ local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
295
290
  else
296
291
  attempt1
297
292
  end
298
- else
299
- local_alignment(str1, b1, e1, str2, b2, e2, denotations, cultivation_map)
293
+ else # :open
294
+ if (e2 - b2) > len_buffer
295
+ attempt1 = if sum.empty?
296
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
297
+ else
298
+ local_alignment(str1, b1, e1, str2, b2, b2 + len_buffer, denotations, cultivation_map)
299
+ end
300
+ if (attempt1.empty? || attempt1.first[:alignment] == :empty) && !cblock.nil?
301
+ local_alignment(str1, b1, e1, str2, e2 - len_buffer, e2, denotations, cultivation_map)
302
+ else
303
+ attempt1
304
+ end
305
+ else
306
+ local_alignment(str1, b1, e1, str2, b2, e2, denotations, cultivation_map)
307
+ end
300
308
  end
301
309
  end
310
+ elsif b2 > e2 # when out of order
311
+ # ToDo
302
312
  end
313
+
303
314
  end
304
315
 
305
316
  lblock = cblock
@@ -320,7 +331,7 @@ class TextAlignment::TextAlignment
320
331
 
321
332
  def local_alignment(str1, b1, e1, str2, b2, e2, denotations = nil, cultivation_map)
322
333
  tblocks = term_based_alignment(str1, b1, e1, str2, b2, e2, denotations, cultivation_map)
323
- if tblocks.empty?
334
+ if tblocks.empty? || tblocks.first[:alignment] == :empty
324
335
  lcs_alignment(str1, b1, e1, str2, b2, e2, cultivation_map)
325
336
  else
326
337
  tblocks
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.11.3'
2
+ VERSION = '0.11.10'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.3
4
+ version: 0.11.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-19 00:00:00.000000000 Z
11
+ date: 2021-04-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary
@@ -111,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
111
111
  - !ruby/object:Gem::Version
112
112
  version: '0'
113
113
  requirements: []
114
- rubygems_version: 3.0.8
114
+ rubygems_version: 3.0.9
115
115
  signing_key:
116
116
  specification_version: 4
117
117
  summary: Ruby class for aligning two character strings