text_alignment 0.11.1 → 0.11.2
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81994ec9a8c7c08d2aad32b351b5942fef1748e4035ba762af546d4f3fe7cee8
|
4
|
+
data.tar.gz: bfa75451d33b9d21c2baa1a52280f03486bf10a41b5ac2a97469f0ca3a4f7379
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 73d0ff212a89d6ad33751f87f14a9b292b45ce177c61efd0ede5f852eb3834d1bd3940d202c4d87cfb0422c4dc566dea30c560f8500a220dcd3e1dd492e29eac
|
7
|
+
data.tar.gz: 6c763a564e339267624bec5c809b334b0adf9951d25409eac21ae4b4582beae4a08739119dfa369022c44c19d4faacfd0e06766c6223bd219a43c3961cfab08c
|
data/bin/align_annotations
CHANGED
@@ -37,7 +37,7 @@ def align_denotations(denotations, source_text, alignment, debug = false)
|
|
37
37
|
end
|
38
38
|
|
39
39
|
lost_annotations = alignment.lost_annotations
|
40
|
-
unless lost_annotations.empty?
|
40
|
+
unless lost_annotations.nil? || lost_annotations.empty?
|
41
41
|
warn "\n[lost annotations] #{lost_annotations.length}"
|
42
42
|
lost_annotations.each do |a|
|
43
43
|
warn "#{a}"
|
@@ -128,4 +128,4 @@ else
|
|
128
128
|
end
|
129
129
|
|
130
130
|
# pp alignment.block_alignment
|
131
|
-
puts target_annotations.to_json
|
131
|
+
# puts target_annotations.to_json
|
@@ -61,6 +61,7 @@ TextAlignment::CHAR_MAPPING = [
|
|
61
61
|
["•", "*"], #U+2022 (bullet)
|
62
62
|
[" ", " "], #U+2009 (thin space)
|
63
63
|
[" ", " "], #U+200A (hair space)
|
64
|
+
[" ", " "], #U+202F (narrow no-break space)
|
64
65
|
[" ", " "], #U+00A0 (Non-Breaking space)
|
65
66
|
[" ", " "], #U+3000 (ideographic space)
|
66
67
|
["‐", "-"], #U+2010 (Hyphen)
|
@@ -94,10 +95,10 @@ class TextAlignment::CharMapping
|
|
94
95
|
@index_demap[position]
|
95
96
|
end
|
96
97
|
|
97
|
-
def enmap_denotations(
|
98
|
-
return nil if
|
98
|
+
def enmap_denotations(denotations)
|
99
|
+
return nil if denotations.nil?
|
99
100
|
|
100
|
-
denotations
|
101
|
+
denotations.map do |d|
|
101
102
|
d.dup.merge(span:{begin:enmap_position(d[:span][:begin]), end:enmap_position(d[:span][:end])})
|
102
103
|
end
|
103
104
|
end
|
@@ -175,7 +176,7 @@ if __FILE__ == $0
|
|
175
176
|
exit
|
176
177
|
end
|
177
178
|
annotations = JSON.parse File.read(ARGV[0]).strip, symbolize_names: true
|
178
|
-
denotations = annotations[:denotations]
|
179
|
+
denotations = annotations[:denotations] || []
|
179
180
|
if denotations.nil? && annotations[:tracks]
|
180
181
|
denotations = annotations[:tracks].first[:denotations]
|
181
182
|
end
|
@@ -56,7 +56,7 @@ class TextAlignment::CultivationMap
|
|
56
56
|
else
|
57
57
|
if front_open?(region, closed_parts)
|
58
58
|
if rear_open?(region, closed_parts)
|
59
|
-
[:middle_closed, [closed_parts.first[
|
59
|
+
[:middle_closed, [closed_parts.first[0], closed_parts.last[1]]]
|
60
60
|
else
|
61
61
|
[:front_open, [region[0], closed_parts.first[0]]]
|
62
62
|
end
|
@@ -70,7 +70,7 @@ class TextAlignment::CultivationMap
|
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|
73
|
-
def index(target, string, position)
|
73
|
+
def index(target, string, position = 0)
|
74
74
|
length = target.length
|
75
75
|
loop do
|
76
76
|
_begin = string.index(target, position)
|
@@ -21,7 +21,7 @@ class TextAlignment::TextAlignment
|
|
21
21
|
@to_prevent_overlap = to_prevent_overlap
|
22
22
|
|
23
23
|
@original_text = nil
|
24
|
-
@
|
24
|
+
@blocks = nil
|
25
25
|
@cultivation_map = TextAlignment::CultivationMap.new
|
26
26
|
end
|
27
27
|
|
@@ -39,45 +39,20 @@ class TextAlignment::TextAlignment
|
|
39
39
|
denotations_mapped = @text_mapping.enmap_denotations(denotations)
|
40
40
|
|
41
41
|
## To generate the block_alignment of the input text against the reference text
|
42
|
-
|
43
|
-
# @block_alignment = {text: @original_text, reference_text: @original_reference_text, denotations: denotations}
|
44
|
-
@block_alignment = {text: @mapped_text, reference_text: @mapped_reference_text, denotations: denotations}
|
45
|
-
|
46
|
-
# Generation
|
47
|
-
@block_alignment[:blocks] = if r = whole_block_alignment(@mapped_text, @mapped_reference_text, @cultivation_map)
|
42
|
+
@blocks = if r = whole_block_alignment(@mapped_text, @mapped_reference_text, @cultivation_map)
|
48
43
|
r
|
49
44
|
else
|
50
45
|
find_block_alignment(@mapped_text, @mapped_reference_text, denotations_mapped, @cultivation_map)
|
51
46
|
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def update_cultivation_map
|
55
|
-
return if @block_alignment.nil? || @block_alignment[:blocks].nil?
|
56
|
-
|
57
|
-
## To update the cultivation map
|
58
|
-
newly_cultivated_regions = @block_alignment[:blocks].collect do |b|
|
59
|
-
if b[:alignment] == :block || b[:alignment] == :term
|
60
|
-
[b[:target][:begin], b[:target][:end]]
|
61
|
-
else
|
62
|
-
nil
|
63
|
-
end
|
64
|
-
end.compact.inject([]) do |condensed, region|
|
65
|
-
if condensed.empty? || (condensed.last.last + 1 < region.first)
|
66
|
-
condensed.push region
|
67
|
-
else
|
68
|
-
condensed.last[1] = region.last
|
69
|
-
end
|
70
|
-
condensed
|
71
|
-
end
|
72
47
|
|
73
|
-
@
|
48
|
+
@block_alignment = {text: @original_text, reference_text: @original_reference_text, denotations: denotations, blocks: demap_blocks(@blocks)}
|
74
49
|
end
|
75
50
|
|
76
51
|
def transform_begin_position(_begin_position)
|
77
52
|
begin_position = @text_mapping.enmap_position(_begin_position)
|
78
53
|
|
79
|
-
i = @
|
80
|
-
block = @
|
54
|
+
i = @blocks.index{|b| b[:source][:end] > begin_position}
|
55
|
+
block = @blocks[i]
|
81
56
|
|
82
57
|
b = if block[:alignment] == :block || block[:alignment] == :term
|
83
58
|
begin_position + block[:delta]
|
@@ -98,8 +73,8 @@ class TextAlignment::TextAlignment
|
|
98
73
|
def transform_end_position(_end_position)
|
99
74
|
end_position = @text_mapping.enmap_position(_end_position)
|
100
75
|
|
101
|
-
i = @
|
102
|
-
block = @
|
76
|
+
i = @blocks.index{|b| b[:source][:end] >= end_position}
|
77
|
+
block = @blocks[i]
|
103
78
|
|
104
79
|
e = if block[:alignment] == :block || block[:alignment] == :term
|
105
80
|
end_position + block[:delta]
|
@@ -160,8 +135,8 @@ class TextAlignment::TextAlignment
|
|
160
135
|
end
|
161
136
|
|
162
137
|
def alignment_show
|
163
|
-
stext = @
|
164
|
-
ttext = @
|
138
|
+
stext = @block_alignment[:text]
|
139
|
+
ttext = @block_alignment[:reference_text]
|
165
140
|
|
166
141
|
show = ''
|
167
142
|
@block_alignment[:blocks].each do |a|
|
@@ -276,20 +251,32 @@ class TextAlignment::TextAlignment
|
|
276
251
|
region_state, state_region = cultivation_map.region_state([b2, e2])
|
277
252
|
case region_state
|
278
253
|
when :closed
|
279
|
-
[]
|
254
|
+
[{source:{begin:b1, end:e1}, alignment: :empty}]
|
280
255
|
when :front_open
|
281
|
-
|
282
|
-
|
283
|
-
|
256
|
+
if sum.empty? # when there is no preceding matched block
|
257
|
+
[{source:{begin:b1, end:e1}, alignment: :empty}]
|
258
|
+
else
|
259
|
+
oe2 = state_region[1]
|
260
|
+
me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
|
261
|
+
local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
|
262
|
+
end
|
284
263
|
when :rear_open
|
285
|
-
|
286
|
-
|
287
|
-
|
264
|
+
if cblock.nil? # when there is no following matched block
|
265
|
+
[{source:{begin:b1, end:e1}, alignment: :empty}]
|
266
|
+
else
|
267
|
+
ob2 = state_region[0]
|
268
|
+
mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
|
269
|
+
local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
|
270
|
+
end
|
288
271
|
when :middle_closed
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
272
|
+
attempt1 = if sum.empty?
|
273
|
+
[{source:{begin:b1, end:e1}, alignment: :empty}]
|
274
|
+
else
|
275
|
+
oe2 = state_region[0]
|
276
|
+
me2 = (oe2 - b2) > len_buffer ? b2 + len_buffer : oe2
|
277
|
+
local_alignment(str1, b1, e1, str2, b2, me2, denotations, cultivation_map)
|
278
|
+
end
|
279
|
+
if (attempt1.empty? || attempt1.first[:alignment] == :empty) && !cblock.nil?
|
293
280
|
ob2 = state_region[1]
|
294
281
|
mb2 = (e2 - ob2) > len_buffer ? e2 - len_buffer : ob2
|
295
282
|
local_alignment(str1, b1, e1, str2, mb2, e2, denotations, cultivation_map)
|
@@ -298,8 +285,12 @@ class TextAlignment::TextAlignment
|
|
298
285
|
end
|
299
286
|
else # :open
|
300
287
|
if (e2 - b2) > len_buffer
|
301
|
-
attempt1 =
|
302
|
-
|
288
|
+
attempt1 = if sum.empty?
|
289
|
+
[{source:{begin:b1, end:e1}, alignment: :empty}]
|
290
|
+
else
|
291
|
+
local_alignment(str1, b1, e1, str2, b2, b2 + len_buffer, denotations, cultivation_map)
|
292
|
+
end
|
293
|
+
if (attempt1.empty? || attempt1.first[:alignment] == :empty) && !cblock.nil?
|
303
294
|
local_alignment(str1, b1, e1, str2, e2 - len_buffer, e2, denotations, cultivation_map)
|
304
295
|
else
|
305
296
|
attempt1
|
@@ -318,10 +309,10 @@ class TextAlignment::TextAlignment
|
|
318
309
|
end
|
319
310
|
|
320
311
|
def whole_block_alignment(str1, str2, cultivation_map)
|
321
|
-
block_begin = cultivation_map.index(str1, str2
|
312
|
+
block_begin = cultivation_map.index(str1, str2)
|
322
313
|
return [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}] unless block_begin.nil?
|
323
314
|
|
324
|
-
block_begin = cultivation_map.index(str1.downcase, str2.downcase
|
315
|
+
block_begin = cultivation_map.index(str1.downcase, str2.downcase)
|
325
316
|
return [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}] unless block_begin.nil?
|
326
317
|
|
327
318
|
nil
|
@@ -405,4 +396,38 @@ class TextAlignment::TextAlignment
|
|
405
396
|
end
|
406
397
|
end
|
407
398
|
|
399
|
+
def update_cultivation_map
|
400
|
+
return if @blocks.nil?
|
401
|
+
|
402
|
+
## To update the cultivation map
|
403
|
+
newly_cultivated_regions = @blocks.collect do |b|
|
404
|
+
if b[:alignment] == :block || b[:alignment] == :term
|
405
|
+
[b[:target][:begin], b[:target][:end]]
|
406
|
+
else
|
407
|
+
nil
|
408
|
+
end
|
409
|
+
end.compact.inject([]) do |condensed, region|
|
410
|
+
if condensed.empty? || (condensed.last.last + 1 < region.first)
|
411
|
+
condensed.push region
|
412
|
+
else
|
413
|
+
condensed.last[1] = region.last
|
414
|
+
end
|
415
|
+
condensed
|
416
|
+
end
|
417
|
+
|
418
|
+
@cultivation_map.cultivate(newly_cultivated_regions)
|
419
|
+
end
|
420
|
+
|
421
|
+
def demap_blocks(_blocks)
|
422
|
+
return nil if _blocks.nil?
|
423
|
+
|
424
|
+
blocks = _blocks.map{|b| b.dup}
|
425
|
+
blocks.each do |b|
|
426
|
+
b[:source] = {begin:@text_mapping.demap_position(b[:source][:begin]), end:@text_mapping.demap_position(b[:source][:end])} if b[:source]
|
427
|
+
b[:target] = {begin:@rtext_mapping.demap_position(b[:target][:begin]), end:@rtext_mapping.demap_position(b[:target][:end])} if b[:target]
|
428
|
+
end
|
429
|
+
|
430
|
+
blocks
|
431
|
+
end
|
432
|
+
|
408
433
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-dictionary
|