text_alignment 0.11.1 → 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81994ec9a8c7c08d2aad32b351b5942fef1748e4035ba762af546d4f3fe7cee8
|
4
|
+
data.tar.gz: bfa75451d33b9d21c2baa1a52280f03486bf10a41b5ac2a97469f0ca3a4f7379
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 73d0ff212a89d6ad33751f87f14a9b292b45ce177c61efd0ede5f852eb3834d1bd3940d202c4d87cfb0422c4dc566dea30c560f8500a220dcd3e1dd492e29eac
|
7
|
+
data.tar.gz: 6c763a564e339267624bec5c809b334b0adf9951d25409eac21ae4b4582beae4a08739119dfa369022c44c19d4faacfd0e06766c6223bd219a43c3961cfab08c
|
data/bin/align_annotations
CHANGED
@@ -37,7 +37,7 @@ def align_denotations(denotations, source_text, alignment, debug = false)
|
|
37
37
|
end
|
38
38
|
|
39
39
|
lost_annotations = alignment.lost_annotations
|
40
|
-
unless lost_annotations.empty?
|
40
|
+
unless lost_annotations.nil? || lost_annotations.empty?
|
41
41
|
warn "\n[lost annotations] #{lost_annotations.length}"
|
42
42
|
lost_annotations.each do |a|
|
43
43
|
warn "#{a}"
|
@@ -128,4 +128,4 @@ else
|
|
128
128
|
end
|
129
129
|
|
130
130
|
# pp alignment.block_alignment
|
131
|
-
puts target_annotations.to_json
|
131
|
+
# puts target_annotations.to_json
|
@@ -61,6 +61,7 @@ TextAlignment::CHAR_MAPPING = [
|
|
61
61
|
["•", "*"], #U+2022 (bullet)
|
62
62
|
[" ", " "], #U+2009 (thin space)
|
63
63
|
[" ", " "], #U+200A (hair space)
|
64
|
+
[" ", " "], #U+202F (narrow no-break space)
|
64
65
|
[" ", " "], #U+00A0 (Non-Breaking space)
|
65
66
|
[" ", " "], #U+3000 (ideographic space)
|
66
67
|
["‐", "-"], #U+2010 (Hyphen)
|
@@ -94,10 +95,10 @@ class TextAlignment::CharMapping
|
|
94
95
|
@index_demap[position]
|
95
96
|
end
|
96
97
|
|
97
|
-
def enmap_denotations(
|
98
|
-
return nil if
|
98
|
+
def enmap_denotations(denotations)
|
99
|
+
return nil if denotations.nil?
|
99
100
|
|
100
|
-
denotations
|
101
|
+
denotations.map do |d|
|
101
102
|
d.dup.merge(span:{begin:enmap_position(d[:span][:begin]), end:enmap_position(d[:span][:end])})
|
102
103
|
end
|
103
104
|
end
|
@@ -175,7 +176,7 @@ if __FILE__ == $0
|
|
175
176
|
exit
|
176
177
|
end
|
177
178
|
annotations = JSON.parse File.read(ARGV[0]).strip, symbolize_names: true
|
178
|
-
denotations = annotations[:denotations]
|
179
|
+
denotations = annotations[:denotations] || []
|
179
180
|
if denotations.nil? && annotations[:tracks]
|
180
181
|
denotations = annotations[:tracks].first[:denotations]
|
181
182
|
end
|
@@ -56,7 +56,7 @@ class TextAlignment::CultivationMap
|
|
56
56
|
else
|
57
57
|
if front_open?(region, closed_parts)
|
58
58
|
if rear_open?(region, closed_parts)
|
59
|
-
[:middle_closed, [closed_parts.first[
|
59
|
+
[:middle_closed, [closed_parts.first[0], closed_parts.last[1]]]
|
60
60
|
else
|
61
61
|
[:front_open, [region[0], closed_parts.first[0]]]
|
62
62
|
end
|
@@ -70,7 +70,7 @@ class TextAlignment::CultivationMap
|
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|
73
|
-
def index(target, string, position)
|
73
|
+
def index(target, string, position = 0)
|
74
74
|
length = target.length
|
75
75
|
loop do
|
76
76
|
_begin = string.index(target, position)
|
@@ -21,7 +21,7 @@ class TextAlignment::TextAlignment
|
|
21
21
|
@to_prevent_overlap = to_prevent_overlap
|
22
22
|
|
23
23
|
@original_text = nil
|
24
|
-
@
|
24
|
+
@blocks = nil
|
25
25
|
@cultivation_map = TextAlignment::CultivationMap.new
|
26
26
|
end
|
27
27
|
|
@@ -39,45 +39,20 @@ class TextAlignment::TextAlignment
|
|
39
39
|
denotations_mapped = @text_mapping.enmap_denotations(denotations)
|
40
40
|
|
41
41
|
## To generate the block_alignment of the input text against the reference text
|
42
|
-
|
43
|
-
# @block_alignment = {text: @original_text, reference_text: @original_reference_text, denotations: denotations}
|
44
|
-
@block_alignment = {text: @mapped_text, reference_text: @mapped_reference_text, denotations: denotations}
|
45
|
-
|
46
|
-
# Generation
|
47
|
-
@block_alignment[:blocks] = if r = whole_block_alignment(@mapped_text, @mapped_reference_text, @cultivation_map)
|
42
|
+
@blocks = if r = whole_block_alignment(@mapped_text, @mapped_reference_text, @cultivation_map)
|
48
43
|
r
|
49
44
|
else
|
50
45
|
find_block_alignment(@mapped_text, @mapped_reference_text, denotations_mapped, @cultivation_map)
|
51
46
|
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def update_cultivation_map
|
55
|
-
return if @block_alignment.nil? || @block_alignment[:blocks].nil?
|
56
|
-
|
57
|
-
## To update the cultivation map
|
58
|
-
newly_cultivated_regions = @block_alignment[:blocks].collect do |b|
|
59
|
-
if b[:alignment] == :block || b[:alignment] == :term
|
60
|
-
[b[:target][:begin], b[:target][:end]]
|
61
|
-
else
|
62
|
-
nil
|
63
|
-
end
|
64
|
-
end.compact.inject([]) do |condensed, region|
|
65
|
-
if condensed.empty? || (condensed.last.last + 1 < region.first)
|
66
|
-
condensed.push region
|
67
|
-
else
|
68
|
-
condensed.last[1] = region.last
|
69
|
-
end
|
70
|
-
condensed
|
71
|
-
end
|
72
47
|
|
73
|
-
@
|
48
|
+
@block_alignment = {text: @original_text, reference_text: @original_reference_text, denotations: denotations, blocks: demap_blocks(@blocks)}
|
74
49
|
end
|
75
50
|
|
76
51
|
def transform_begin_position(_begin_position)
|
77
52
|
begin_position = @text_mapping.enmap_position(_begin_position)
|
78
53
|
|
79
|
-
i = @
|
80
|
-
block = @
|
54
|
+
i = @blocks.index{|b| b[:source][:end] > begin_position}
|
55
|
+
block = @blocks[i]
|
81
56
|
|
82
57
|
b = if block[:alignment] == :block || block[:alignment] == :term
|
83
58
|
begin_position + block[:delta]
|
@@ -98,8 +73,8 @@ class TextAlignment::TextAlignment
|
|
98
73
|
def transform_end_position(_end_position)
|
99
74
|
end_position = @text_mapping.enmap_position(_end_position)
|
100
75
|
|
101
|
-
i = @
|
102
|
-
block = @
|
76
|
+
i = @blocks.index{|b| b[:source][:end] >= end_position}
|
77
|
+
block = @blocks[i]
|
103
78
|
|
104
79
|
e = if block[:alignment] == :block || block[:alignment] == :term
|
105
80
|
end_position + block[:delta]
|
@@ -160,8 +135,8 @@ class TextAlignment::TextAlignment
|
|
160
135
|
end
|
161
136
|
|
162
137
|
def alignment_show
|
163
|
-
stext = @
|
164
|
-
ttext = @
|
138
|
+
stext = @block_alignment[:text]
|
139
|
+
ttext = @block_alignment[:reference_text]
|
165
140
|
|
166
141
|
show = ''
|
167
142
|
@block_alignment[:blocks].each do |a|
|
@@ -276,20 +251,32 @@ class TextAlignment::TextAlignment
|
|
276
251
|
region_state, state_region = cultivation_map.region_state([b2, e2])
|
277
252
|
case region_state
|
278
253
|
when :closed
|
279
|
-
[]
|
254
|
+
[{source:{begin:b1, end:e1}, alignment: :empty}]
|
280
255
|
when :front_open
|
281
|
-
|
282
|
-
|
283
|
-
|
256
|
+
if sum.empty? # when there is no preceding matched block
|
257
|
+
[{source:{begin:b1, end:e1}, alignment: :empty}]
|
258
|
+
else
|
259
|
+
oe2 = state_region[1]
|
260
|
+
me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
|
261
|
+
local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
|
262
|
+
end
|
284
263
|
when :rear_open
|
285
|
-
|
286
|
-
|
287
|
-
|
264
|
+
if cblock.nil? # when there is no following matched block
|
265
|
+
[{source:{begin:b1, end:e1}, alignment: :empty}]
|
266
|
+
else
|
267
|
+
ob2 = state_region[0]
|
268
|
+
mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
|
269
|
+
local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
|
270
|
+
end
|
288
271
|
when :middle_closed
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
272
|
+
attempt1 = if sum.empty?
|
273
|
+
[{source:{begin:b1, end:e1}, alignment: :empty}]
|
274
|
+
else
|
275
|
+
oe2 = state_region[0]
|
276
|
+
me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
|
277
|
+
local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
|
278
|
+
end
|
279
|
+
if (attempt1.empty? || attempt1.first[:alignment] == :empty) && !cblock.nil?
|
293
280
|
ob2 = state_region[1]
|
294
281
|
mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
|
295
282
|
local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
|
@@ -298,8 +285,12 @@ class TextAlignment::TextAlignment
|
|
298
285
|
end
|
299
286
|
else # :open
|
300
287
|
if (e2 - b2) > len_buffer
|
301
|
-
attempt1 =
|
302
|
-
|
288
|
+
attempt1 = if sum.empty?
|
289
|
+
[{source:{begin:b1, end:e1}, alignment: :empty}]
|
290
|
+
else
|
291
|
+
local_alignment(str1, b1, e1, str2, b2, b2 + len_buffer, denotations, cultivation_map)
|
292
|
+
end
|
293
|
+
if (attempt1.empty? || attempt1.first[:alignment] == :empty) && !cblock.nil?
|
303
294
|
local_alignment(str1, b1, e1, str2, e2 - len_buffer, e2, denotations, cultivation_map)
|
304
295
|
else
|
305
296
|
attempt1
|
@@ -318,10 +309,10 @@ class TextAlignment::TextAlignment
|
|
318
309
|
end
|
319
310
|
|
320
311
|
def whole_block_alignment(str1, str2, cultivation_map)
|
321
|
-
block_begin = cultivation_map.index(str1, str2
|
312
|
+
block_begin = cultivation_map.index(str1, str2)
|
322
313
|
return [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}] unless block_begin.nil?
|
323
314
|
|
324
|
-
block_begin = cultivation_map.index(str1.downcase, str2.downcase
|
315
|
+
block_begin = cultivation_map.index(str1.downcase, str2.downcase)
|
325
316
|
return [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}] unless block_begin.nil?
|
326
317
|
|
327
318
|
nil
|
@@ -405,4 +396,38 @@ class TextAlignment::TextAlignment
|
|
405
396
|
end
|
406
397
|
end
|
407
398
|
|
399
|
+
def update_cultivation_map
|
400
|
+
return if @blocks.nil?
|
401
|
+
|
402
|
+
## To update the cultivation map
|
403
|
+
newly_cultivated_regions = @blocks.collect do |b|
|
404
|
+
if b[:alignment] == :block || b[:alignment] == :term
|
405
|
+
[b[:target][:begin], b[:target][:end]]
|
406
|
+
else
|
407
|
+
nil
|
408
|
+
end
|
409
|
+
end.compact.inject([]) do |condensed, region|
|
410
|
+
if condensed.empty? || (condensed.last.last + 1 < region.first)
|
411
|
+
condensed.push region
|
412
|
+
else
|
413
|
+
condensed.last[1] = region.last
|
414
|
+
end
|
415
|
+
condensed
|
416
|
+
end
|
417
|
+
|
418
|
+
@cultivation_map.cultivate(newly_cultivated_regions)
|
419
|
+
end
|
420
|
+
|
421
|
+
def demap_blocks(_blocks)
|
422
|
+
return nil if _blocks.nil?
|
423
|
+
|
424
|
+
blocks = _blocks.map{|b| b.dup}
|
425
|
+
blocks.each do |b|
|
426
|
+
b[:source] = {begin:@text_mapping.demap_position(b[:source][:begin]), end:@text_mapping.demap_position(b[:source][:end])} if b[:source]
|
427
|
+
b[:target] = {begin:@rtext_mapping.demap_position(b[:target][:begin]), end:@rtext_mapping.demap_position(b[:target][:end])} if b[:target]
|
428
|
+
end
|
429
|
+
|
430
|
+
blocks
|
431
|
+
end
|
432
|
+
|
408
433
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-dictionary
|