text_alignment 0.11.1 → 0.11.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ef59c0cd578ed453a67edeb3b29059f4b76c4c541f777fa35a06e76b299e2564
4
- data.tar.gz: 5594c0f6eb1d52cc331c210fdf936e7cb09e30277f21933027e55b0c8cfa0e24
3
+ metadata.gz: 81994ec9a8c7c08d2aad32b351b5942fef1748e4035ba762af546d4f3fe7cee8
4
+ data.tar.gz: bfa75451d33b9d21c2baa1a52280f03486bf10a41b5ac2a97469f0ca3a4f7379
5
5
  SHA512:
6
- metadata.gz: ef5bae492d7e7b840c00943ac24e571392c4c992a085c6a63c8cb6db126ed9137ba94f8cd7af42a91e17aa327cb9a5ce24b909944c157f21beb8a88c8ce7528b
7
- data.tar.gz: b6d84dcdc35399f91a0f6a5a24d84b11169de9d861a249acca52f0fec86e6f1fbf1bb4dbb47a5d43654fa43735b181aa64034447fb71ca675090818dcf67133a
6
+ metadata.gz: 73d0ff212a89d6ad33751f87f14a9b292b45ce177c61efd0ede5f852eb3834d1bd3940d202c4d87cfb0422c4dc566dea30c560f8500a220dcd3e1dd492e29eac
7
+ data.tar.gz: 6c763a564e339267624bec5c809b334b0adf9951d25409eac21ae4b4582beae4a08739119dfa369022c44c19d4faacfd0e06766c6223bd219a43c3961cfab08c
@@ -37,7 +37,7 @@ def align_denotations(denotations, source_text, alignment, debug = false)
37
37
  end
38
38
 
39
39
  lost_annotations = alignment.lost_annotations
40
- unless lost_annotations.empty?
40
+ unless lost_annotations.nil? || lost_annotations.empty?
41
41
  warn "\n[lost annotations] #{lost_annotations.length}"
42
42
  lost_annotations.each do |a|
43
43
  warn "#{a}"
@@ -128,4 +128,4 @@ else
128
128
  end
129
129
 
130
130
  # pp alignment.block_alignment
131
- puts target_annotations.to_json
131
+ # puts target_annotations.to_json
@@ -61,6 +61,7 @@ TextAlignment::CHAR_MAPPING = [
61
61
  ["•", "*"], #U+2022 (bullet)
62
62
  [" ", " "], #U+2009 (thin space)
63
63
  [" ", " "], #U+200A (hair space)
64
+ [" ", " "], #U+202F (narrow no-break space)
64
65
  [" ", " "], #U+00A0 (Non-Breaking space)
65
66
  [" ", " "], #U+3000 (ideographic space)
66
67
  ["‐", "-"], #U+2010 (Hyphen)
@@ -94,10 +95,10 @@ class TextAlignment::CharMapping
94
95
  @index_demap[position]
95
96
  end
96
97
 
97
- def enmap_denotations(_denotations)
98
- return nil if _denotations.nil?
98
+ def enmap_denotations(denotations)
99
+ return nil if denotations.nil?
99
100
 
100
- denotations = _denotations.map do |d|
101
+ denotations.map do |d|
101
102
  d.dup.merge(span:{begin:enmap_position(d[:span][:begin]), end:enmap_position(d[:span][:end])})
102
103
  end
103
104
  end
@@ -175,7 +176,7 @@ if __FILE__ == $0
175
176
  exit
176
177
  end
177
178
  annotations = JSON.parse File.read(ARGV[0]).strip, symbolize_names: true
178
- denotations = annotations[:denotations]
179
+ denotations = annotations[:denotations] || []
179
180
  if denotations.nil? && annotations[:tracks]
180
181
  denotations = annotations[:tracks].first[:denotations]
181
182
  end
@@ -56,7 +56,7 @@ class TextAlignment::CultivationMap
56
56
  else
57
57
  if front_open?(region, closed_parts)
58
58
  if rear_open?(region, closed_parts)
59
- [:middle_closed, [closed_parts.first[1], closed_parts.last[0]]]
59
+ [:middle_closed, [closed_parts.first[0], closed_parts.last[1]]]
60
60
  else
61
61
  [:front_open, [region[0], closed_parts.first[0]]]
62
62
  end
@@ -70,7 +70,7 @@ class TextAlignment::CultivationMap
70
70
  end
71
71
  end
72
72
 
73
- def index(target, string, position)
73
+ def index(target, string, position = 0)
74
74
  length = target.length
75
75
  loop do
76
76
  _begin = string.index(target, position)
@@ -21,7 +21,7 @@ class TextAlignment::TextAlignment
21
21
  @to_prevent_overlap = to_prevent_overlap
22
22
 
23
23
  @original_text = nil
24
- @block_alignment = nil
24
+ @blocks = nil
25
25
  @cultivation_map = TextAlignment::CultivationMap.new
26
26
  end
27
27
 
@@ -39,45 +39,20 @@ class TextAlignment::TextAlignment
39
39
  denotations_mapped = @text_mapping.enmap_denotations(denotations)
40
40
 
41
41
  ## To generate the block_alignment of the input text against the reference text
42
- # Initialization
43
- # @block_alignment = {text: @original_text, reference_text: @original_reference_text, denotations: denotations}
44
- @block_alignment = {text: @mapped_text, reference_text: @mapped_reference_text, denotations: denotations}
45
-
46
- # Generation
47
- @block_alignment[:blocks] = if r = whole_block_alignment(@mapped_text, @mapped_reference_text, @cultivation_map)
42
+ @blocks = if r = whole_block_alignment(@mapped_text, @mapped_reference_text, @cultivation_map)
48
43
  r
49
44
  else
50
45
  find_block_alignment(@mapped_text, @mapped_reference_text, denotations_mapped, @cultivation_map)
51
46
  end
52
- end
53
-
54
- def update_cultivation_map
55
- return if @block_alignment.nil? || @block_alignment[:blocks].nil?
56
-
57
- ## To update the cultivation map
58
- newly_cultivated_regions = @block_alignment[:blocks].collect do |b|
59
- if b[:alignment] == :block || b[:alignment] == :term
60
- [b[:target][:begin], b[:target][:end]]
61
- else
62
- nil
63
- end
64
- end.compact.inject([]) do |condensed, region|
65
- if condensed.empty? || (condensed.last.last + 1 < region.first)
66
- condensed.push region
67
- else
68
- condensed.last[1] = region.last
69
- end
70
- condensed
71
- end
72
47
 
73
- @cultivation_map.cultivate(newly_cultivated_regions)
48
+ @block_alignment = {text: @original_text, reference_text: @original_reference_text, denotations: denotations, blocks: demap_blocks(@blocks)}
74
49
  end
75
50
 
76
51
  def transform_begin_position(_begin_position)
77
52
  begin_position = @text_mapping.enmap_position(_begin_position)
78
53
 
79
- i = @block_alignment[:blocks].index{|b| b[:source][:end] > begin_position}
80
- block = @block_alignment[:blocks][i]
54
+ i = @blocks.index{|b| b[:source][:end] > begin_position}
55
+ block = @blocks[i]
81
56
 
82
57
  b = if block[:alignment] == :block || block[:alignment] == :term
83
58
  begin_position + block[:delta]
@@ -98,8 +73,8 @@ class TextAlignment::TextAlignment
98
73
  def transform_end_position(_end_position)
99
74
  end_position = @text_mapping.enmap_position(_end_position)
100
75
 
101
- i = @block_alignment[:blocks].index{|b| b[:source][:end] >= end_position}
102
- block = @block_alignment[:blocks][i]
76
+ i = @blocks.index{|b| b[:source][:end] >= end_position}
77
+ block = @blocks[i]
103
78
 
104
79
  e = if block[:alignment] == :block || block[:alignment] == :term
105
80
  end_position + block[:delta]
@@ -160,8 +135,8 @@ class TextAlignment::TextAlignment
160
135
  end
161
136
 
162
137
  def alignment_show
163
- stext = @mapped_text
164
- ttext = @mapped_reference_text
138
+ stext = @block_alignment[:text]
139
+ ttext = @block_alignment[:reference_text]
165
140
 
166
141
  show = ''
167
142
  @block_alignment[:blocks].each do |a|
@@ -276,20 +251,32 @@ class TextAlignment::TextAlignment
276
251
  region_state, state_region = cultivation_map.region_state([b2, e2])
277
252
  case region_state
278
253
  when :closed
279
- []
254
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
280
255
  when :front_open
281
- oe2 = state_region[1]
282
- me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
283
- local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
256
+ if sum.empty? # when there is no preceding matched block
257
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
258
+ else
259
+ oe2 = state_region[1]
260
+ me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
261
+ local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
262
+ end
284
263
  when :rear_open
285
- ob2 = state_region[0]
286
- mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
287
- local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
264
+ if cblock.nil? # when there is no following matched block
265
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
266
+ else
267
+ ob2 = state_region[0]
268
+ mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
269
+ local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
270
+ end
288
271
  when :middle_closed
289
- oe2 = state_region[0]
290
- me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
291
- attempt1 = local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
292
- if attempt1.empty?
272
+ attempt1 = if sum.empty?
273
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
274
+ else
275
+ oe2 = state_region[0]
276
+ me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
277
+ local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
278
+ end
279
+ if (attempt1.empty? || attempt1.first[:alignment] == :empty) && !cblock.nil?
293
280
  ob2 = state_region[1]
294
281
  mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
295
282
  local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
@@ -298,8 +285,12 @@ class TextAlignment::TextAlignment
298
285
  end
299
286
  else # :open
300
287
  if (e2 - b2) > len_buffer
301
- attempt1 = local_alignment(str1, b1, e1, str2, b2, b2 + len_buffer, denotations, cultivation_map)
302
- if attempt1.empty?
288
+ attempt1 = if sum.empty?
289
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
290
+ else
291
+ local_alignment(str1, b1, e1, str2, b2, b2 + len_buffer, denotations, cultivation_map)
292
+ end
293
+ if (attempt1.empty? || attempt1.first[:alignment] == :empty) && !cblock.nil?
303
294
  local_alignment(str1, b1, e1, str2, e2 - len_buffer, e2, denotations, cultivation_map)
304
295
  else
305
296
  attempt1
@@ -318,10 +309,10 @@ class TextAlignment::TextAlignment
318
309
  end
319
310
 
320
311
  def whole_block_alignment(str1, str2, cultivation_map)
321
- block_begin = cultivation_map.index(str1, str2, 0)
312
+ block_begin = cultivation_map.index(str1, str2)
322
313
  return [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}] unless block_begin.nil?
323
314
 
324
- block_begin = cultivation_map.index(str1.downcase, str2.downcase, 0)
315
+ block_begin = cultivation_map.index(str1.downcase, str2.downcase)
325
316
  return [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}] unless block_begin.nil?
326
317
 
327
318
  nil
@@ -405,4 +396,38 @@ class TextAlignment::TextAlignment
405
396
  end
406
397
  end
407
398
 
399
+ def update_cultivation_map
400
+ return if @blocks.nil?
401
+
402
+ ## To update the cultivation map
403
+ newly_cultivated_regions = @blocks.collect do |b|
404
+ if b[:alignment] == :block || b[:alignment] == :term
405
+ [b[:target][:begin], b[:target][:end]]
406
+ else
407
+ nil
408
+ end
409
+ end.compact.inject([]) do |condensed, region|
410
+ if condensed.empty? || (condensed.last.last + 1 < region.first)
411
+ condensed.push region
412
+ else
413
+ condensed.last[1] = region.last
414
+ end
415
+ condensed
416
+ end
417
+
418
+ @cultivation_map.cultivate(newly_cultivated_regions)
419
+ end
420
+
421
+ def demap_blocks(_blocks)
422
+ return nil if _blocks.nil?
423
+
424
+ blocks = _blocks.map{|b| b.dup}
425
+ blocks.each do |b|
426
+ b[:source] = {begin:@text_mapping.demap_position(b[:source][:begin]), end:@text_mapping.demap_position(b[:source][:end])} if b[:source]
427
+ b[:target] = {begin:@rtext_mapping.demap_position(b[:target][:begin]), end:@rtext_mapping.demap_position(b[:target][:end])} if b[:target]
428
+ end
429
+
430
+ blocks
431
+ end
432
+
408
433
  end
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.11.1'
2
+ VERSION = '0.11.2'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.1
4
+ version: 0.11.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-08 00:00:00.000000000 Z
11
+ date: 2021-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary