text_alignment 0.11.1 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ef59c0cd578ed453a67edeb3b29059f4b76c4c541f777fa35a06e76b299e2564
4
- data.tar.gz: 5594c0f6eb1d52cc331c210fdf936e7cb09e30277f21933027e55b0c8cfa0e24
3
+ metadata.gz: 81994ec9a8c7c08d2aad32b351b5942fef1748e4035ba762af546d4f3fe7cee8
4
+ data.tar.gz: bfa75451d33b9d21c2baa1a52280f03486bf10a41b5ac2a97469f0ca3a4f7379
5
5
  SHA512:
6
- metadata.gz: ef5bae492d7e7b840c00943ac24e571392c4c992a085c6a63c8cb6db126ed9137ba94f8cd7af42a91e17aa327cb9a5ce24b909944c157f21beb8a88c8ce7528b
7
- data.tar.gz: b6d84dcdc35399f91a0f6a5a24d84b11169de9d861a249acca52f0fec86e6f1fbf1bb4dbb47a5d43654fa43735b181aa64034447fb71ca675090818dcf67133a
6
+ metadata.gz: 73d0ff212a89d6ad33751f87f14a9b292b45ce177c61efd0ede5f852eb3834d1bd3940d202c4d87cfb0422c4dc566dea30c560f8500a220dcd3e1dd492e29eac
7
+ data.tar.gz: 6c763a564e339267624bec5c809b334b0adf9951d25409eac21ae4b4582beae4a08739119dfa369022c44c19d4faacfd0e06766c6223bd219a43c3961cfab08c
@@ -37,7 +37,7 @@ def align_denotations(denotations, source_text, alignment, debug = false)
37
37
  end
38
38
 
39
39
  lost_annotations = alignment.lost_annotations
40
- unless lost_annotations.empty?
40
+ unless lost_annotations.nil? || lost_annotations.empty?
41
41
  warn "\n[lost annotations] #{lost_annotations.length}"
42
42
  lost_annotations.each do |a|
43
43
  warn "#{a}"
@@ -128,4 +128,4 @@ else
128
128
  end
129
129
 
130
130
  # pp alignment.block_alignment
131
- puts target_annotations.to_json
131
+ # puts target_annotations.to_json
@@ -61,6 +61,7 @@ TextAlignment::CHAR_MAPPING = [
61
61
  ["•", "*"], #U+2022 (bullet)
62
62
  [" ", " "], #U+2009 (thin space)
63
63
  [" ", " "], #U+200A (hair space)
64
+ [" ", " "], #U+202F (narrow no-break space)
64
65
  [" ", " "], #U+00A0 (Non-Breaking space)
65
66
  [" ", " "], #U+3000 (ideographic space)
66
67
  ["‐", "-"], #U+2010 (Hyphen)
@@ -94,10 +95,10 @@ class TextAlignment::CharMapping
94
95
  @index_demap[position]
95
96
  end
96
97
 
97
- def enmap_denotations(_denotations)
98
- return nil if _denotations.nil?
98
+ def enmap_denotations(denotations)
99
+ return nil if denotations.nil?
99
100
 
100
- denotations = _denotations.map do |d|
101
+ denotations.map do |d|
101
102
  d.dup.merge(span:{begin:enmap_position(d[:span][:begin]), end:enmap_position(d[:span][:end])})
102
103
  end
103
104
  end
@@ -175,7 +176,7 @@ if __FILE__ == $0
175
176
  exit
176
177
  end
177
178
  annotations = JSON.parse File.read(ARGV[0]).strip, symbolize_names: true
178
- denotations = annotations[:denotations]
179
+ denotations = annotations[:denotations] || []
179
180
  if denotations.nil? && annotations[:tracks]
180
181
  denotations = annotations[:tracks].first[:denotations]
181
182
  end
@@ -56,7 +56,7 @@ class TextAlignment::CultivationMap
56
56
  else
57
57
  if front_open?(region, closed_parts)
58
58
  if rear_open?(region, closed_parts)
59
- [:middle_closed, [closed_parts.first[1], closed_parts.last[0]]]
59
+ [:middle_closed, [closed_parts.first[0], closed_parts.last[1]]]
60
60
  else
61
61
  [:front_open, [region[0], closed_parts.first[0]]]
62
62
  end
@@ -70,7 +70,7 @@ class TextAlignment::CultivationMap
70
70
  end
71
71
  end
72
72
 
73
- def index(target, string, position)
73
+ def index(target, string, position = 0)
74
74
  length = target.length
75
75
  loop do
76
76
  _begin = string.index(target, position)
@@ -21,7 +21,7 @@ class TextAlignment::TextAlignment
21
21
  @to_prevent_overlap = to_prevent_overlap
22
22
 
23
23
  @original_text = nil
24
- @block_alignment = nil
24
+ @blocks = nil
25
25
  @cultivation_map = TextAlignment::CultivationMap.new
26
26
  end
27
27
 
@@ -39,45 +39,20 @@ class TextAlignment::TextAlignment
39
39
  denotations_mapped = @text_mapping.enmap_denotations(denotations)
40
40
 
41
41
  ## To generate the block_alignment of the input text against the reference text
42
- # Initialization
43
- # @block_alignment = {text: @original_text, reference_text: @original_reference_text, denotations: denotations}
44
- @block_alignment = {text: @mapped_text, reference_text: @mapped_reference_text, denotations: denotations}
45
-
46
- # Generation
47
- @block_alignment[:blocks] = if r = whole_block_alignment(@mapped_text, @mapped_reference_text, @cultivation_map)
42
+ @blocks = if r = whole_block_alignment(@mapped_text, @mapped_reference_text, @cultivation_map)
48
43
  r
49
44
  else
50
45
  find_block_alignment(@mapped_text, @mapped_reference_text, denotations_mapped, @cultivation_map)
51
46
  end
52
- end
53
-
54
- def update_cultivation_map
55
- return if @block_alignment.nil? || @block_alignment[:blocks].nil?
56
-
57
- ## To update the cultivation map
58
- newly_cultivated_regions = @block_alignment[:blocks].collect do |b|
59
- if b[:alignment] == :block || b[:alignment] == :term
60
- [b[:target][:begin], b[:target][:end]]
61
- else
62
- nil
63
- end
64
- end.compact.inject([]) do |condensed, region|
65
- if condensed.empty? || (condensed.last.last + 1 < region.first)
66
- condensed.push region
67
- else
68
- condensed.last[1] = region.last
69
- end
70
- condensed
71
- end
72
47
 
73
- @cultivation_map.cultivate(newly_cultivated_regions)
48
+ @block_alignment = {text: @original_text, reference_text: @original_reference_text, denotations: denotations, blocks: demap_blocks(@blocks)}
74
49
  end
75
50
 
76
51
  def transform_begin_position(_begin_position)
77
52
  begin_position = @text_mapping.enmap_position(_begin_position)
78
53
 
79
- i = @block_alignment[:blocks].index{|b| b[:source][:end] > begin_position}
80
- block = @block_alignment[:blocks][i]
54
+ i = @blocks.index{|b| b[:source][:end] > begin_position}
55
+ block = @blocks[i]
81
56
 
82
57
  b = if block[:alignment] == :block || block[:alignment] == :term
83
58
  begin_position + block[:delta]
@@ -98,8 +73,8 @@ class TextAlignment::TextAlignment
98
73
  def transform_end_position(_end_position)
99
74
  end_position = @text_mapping.enmap_position(_end_position)
100
75
 
101
- i = @block_alignment[:blocks].index{|b| b[:source][:end] >= end_position}
102
- block = @block_alignment[:blocks][i]
76
+ i = @blocks.index{|b| b[:source][:end] >= end_position}
77
+ block = @blocks[i]
103
78
 
104
79
  e = if block[:alignment] == :block || block[:alignment] == :term
105
80
  end_position + block[:delta]
@@ -160,8 +135,8 @@ class TextAlignment::TextAlignment
160
135
  end
161
136
 
162
137
  def alignment_show
163
- stext = @mapped_text
164
- ttext = @mapped_reference_text
138
+ stext = @block_alignment[:text]
139
+ ttext = @block_alignment[:reference_text]
165
140
 
166
141
  show = ''
167
142
  @block_alignment[:blocks].each do |a|
@@ -276,20 +251,32 @@ class TextAlignment::TextAlignment
276
251
  region_state, state_region = cultivation_map.region_state([b2, e2])
277
252
  case region_state
278
253
  when :closed
279
- []
254
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
280
255
  when :front_open
281
- oe2 = state_region[1]
282
- me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
283
- local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
256
+ if sum.empty? # when there is no preceding matched block
257
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
258
+ else
259
+ oe2 = state_region[1]
260
+ me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
261
+ local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
262
+ end
284
263
  when :rear_open
285
- ob2 = state_region[0]
286
- mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
287
- local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
264
+ if cblock.nil? # when there is no following matched block
265
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
266
+ else
267
+ ob2 = state_region[0]
268
+ mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
269
+ local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
270
+ end
288
271
  when :middle_closed
289
- oe2 = state_region[0]
290
- me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
291
- attempt1 = local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
292
- if attempt1.empty?
272
+ attempt1 = if sum.empty?
273
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
274
+ else
275
+ oe2 = state_region[0]
276
+ me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
277
+ local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
278
+ end
279
+ if (attempt1.empty? || attempt1.first[:alignment] == :empty) && !cblock.nil?
293
280
  ob2 = state_region[1]
294
281
  mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
295
282
  local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
@@ -298,8 +285,12 @@ class TextAlignment::TextAlignment
298
285
  end
299
286
  else # :open
300
287
  if (e2 - b2) > len_buffer
301
- attempt1 = local_alignment(str1, b1, e1, str2, b2, b2 + len_buffer, denotations, cultivation_map)
302
- if attempt1.empty?
288
+ attempt1 = if sum.empty?
289
+ [{source:{begin:b1, end:e1}, alignment: :empty}]
290
+ else
291
+ local_alignment(str1, b1, e1, str2, b2, b2 + len_buffer, denotations, cultivation_map)
292
+ end
293
+ if (attempt1.empty? || attempt1.first[:alignment] == :empty) && !cblock.nil?
303
294
  local_alignment(str1, b1, e1, str2, e2 - len_buffer, e2, denotations, cultivation_map)
304
295
  else
305
296
  attempt1
@@ -318,10 +309,10 @@ class TextAlignment::TextAlignment
318
309
  end
319
310
 
320
311
  def whole_block_alignment(str1, str2, cultivation_map)
321
- block_begin = cultivation_map.index(str1, str2, 0)
312
+ block_begin = cultivation_map.index(str1, str2)
322
313
  return [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}] unless block_begin.nil?
323
314
 
324
- block_begin = cultivation_map.index(str1.downcase, str2.downcase, 0)
315
+ block_begin = cultivation_map.index(str1.downcase, str2.downcase)
325
316
  return [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}] unless block_begin.nil?
326
317
 
327
318
  nil
@@ -405,4 +396,38 @@ class TextAlignment::TextAlignment
405
396
  end
406
397
  end
407
398
 
399
+ def update_cultivation_map
400
+ return if @blocks.nil?
401
+
402
+ ## To update the cultivation map
403
+ newly_cultivated_regions = @blocks.collect do |b|
404
+ if b[:alignment] == :block || b[:alignment] == :term
405
+ [b[:target][:begin], b[:target][:end]]
406
+ else
407
+ nil
408
+ end
409
+ end.compact.inject([]) do |condensed, region|
410
+ if condensed.empty? || (condensed.last.last + 1 < region.first)
411
+ condensed.push region
412
+ else
413
+ condensed.last[1] = region.last
414
+ end
415
+ condensed
416
+ end
417
+
418
+ @cultivation_map.cultivate(newly_cultivated_regions)
419
+ end
420
+
421
+ def demap_blocks(_blocks)
422
+ return nil if _blocks.nil?
423
+
424
+ blocks = _blocks.map{|b| b.dup}
425
+ blocks.each do |b|
426
+ b[:source] = {begin:@text_mapping.demap_position(b[:source][:begin]), end:@text_mapping.demap_position(b[:source][:end])} if b[:source]
427
+ b[:target] = {begin:@rtext_mapping.demap_position(b[:target][:begin]), end:@rtext_mapping.demap_position(b[:target][:end])} if b[:target]
428
+ end
429
+
430
+ blocks
431
+ end
432
+
408
433
  end
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.11.1'
2
+ VERSION = '0.11.2'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.1
4
+ version: 0.11.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-08 00:00:00.000000000 Z
11
+ date: 2021-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary