text_alignment 0.4.2 → 0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/align_annotations +32 -37
- data/lib/text_alignment/constants.rb +1 -1
- data/lib/text_alignment/lcs_comparison.rb +3 -1
- data/lib/text_alignment/mixed_alignment.rb +1 -54
- data/lib/text_alignment/text_alignment.rb +163 -46
- data/lib/text_alignment/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc820991f5f694f154b94c369158909ccba3760829e0d881c7fd2e6ef7ddd149
|
4
|
+
data.tar.gz: 40ae6f2e388405426a77682bd1a3fb7a3c853076eced9b7301b632081dfd0a57
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5802241b4a8394d3c570c1d4b8f5e1d7706c72852e2d6e6fb23bda2f6e2972fa09f7001695db026667144e2af982eeb91ed0b700bd8151af6df794c98e3c069b
|
7
|
+
data.tar.gz: 8d7c93acbef6ab12bb2a0291444a7bcc73b0236bb5b0d06d274e95aa30c9ffc829965653b58270686147a9ac30ccf570518b3ad266120b320dfb20cd1620f5f9
|
data/bin/align_annotations
CHANGED
@@ -35,6 +35,10 @@ def align_mdoc(source_annotations, target_annotations)
|
|
35
35
|
source_annotations.each do |annotations|
|
36
36
|
alignment = TextAlignment::TextAlignment.new(annotations[:text], target_annotations[:text])
|
37
37
|
|
38
|
+
puts alignment.alignment_show
|
39
|
+
puts "-----"
|
40
|
+
puts
|
41
|
+
|
38
42
|
# alignment.block_alignments.each do |a|
|
39
43
|
# p {source:a[:source], target:a[:target]}
|
40
44
|
# puts "--"
|
@@ -103,48 +107,39 @@ target_annotations = if source_annotations.class == Array
|
|
103
107
|
else
|
104
108
|
alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
|
105
109
|
|
106
|
-
pp alignment
|
110
|
+
# pp alignment
|
107
111
|
|
108
112
|
# verification
|
109
|
-
source_text = source_annotations[:text]
|
110
|
-
puts "=====BEGIN"
|
111
|
-
(0 ... source_text.rstrip.length).each do |p|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
end
|
119
|
-
puts
|
120
|
-
puts "=====END"
|
121
|
-
|
122
|
-
puts "=====BEGIN"
|
123
|
-
(0 .. source_text.rstrip.length).each do |p|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
else
|
128
|
-
print '.'
|
129
|
-
end
|
130
|
-
end
|
131
|
-
puts
|
132
|
-
puts "=====END"
|
133
|
-
|
134
|
-
# alignment.block_alignments.each do |a|
|
135
|
-
# if a[:alignment].nil? || a[:alignment] == :empty
|
136
|
-
# # p [a[:source], a[:target]]
|
137
|
-
# # p a[:alignment]
|
113
|
+
# source_text = source_annotations[:text]
|
114
|
+
# puts "=====BEGIN"
|
115
|
+
# (0 ... source_text.rstrip.length).each do |p|
|
116
|
+
# t = alignment.transform_begin_position(p)
|
117
|
+
# if t.nil?
|
118
|
+
# print source_text[p]
|
119
|
+
# else
|
120
|
+
# print '.'
|
121
|
+
# end
|
122
|
+
# end
|
123
|
+
# puts
|
124
|
+
# puts "=====END"
|
125
|
+
|
126
|
+
# puts "=====BEGIN"
|
127
|
+
# (0 .. source_text.rstrip.length).each do |p|
|
128
|
+
# t = alignment.transform_end_position(p)
|
129
|
+
# if t.nil?
|
130
|
+
# print source_text[p]
|
138
131
|
# else
|
139
|
-
#
|
140
|
-
# p a[:alignment].similarity
|
141
|
-
# puts "--"
|
142
|
-
# puts source_annotations[:text][a[:source][:begin] ... a[:source][:end]]
|
143
|
-
# puts "--"
|
144
|
-
# puts target_text[a[:target][:begin] ... a[:target][:end]]
|
145
|
-
# puts "======"
|
132
|
+
# print '.'
|
146
133
|
# end
|
147
134
|
# end
|
135
|
+
# puts
|
136
|
+
# puts "=====END"
|
137
|
+
|
138
|
+
source_text = source_annotations[:text]
|
139
|
+
|
140
|
+
puts "[block alignment]"
|
141
|
+
puts alignment.alignment_show
|
142
|
+
puts "====="
|
148
143
|
# exit
|
149
144
|
|
150
145
|
# verification of source denotations
|
@@ -4,4 +4,4 @@ TextAlignment::SIZE_NGRAM = 8 unless defined? TextAlignment::SIZE_NGRAM
|
|
4
4
|
TextAlignment::SIZE_WINDOW = 60 unless defined? TextAlignment::SIZE_WINDOW
|
5
5
|
TextAlignment::BUFFER_RATE = 0.1 unless defined? TextAlignment::BUFFER_RATE
|
6
6
|
TextAlignment::BUFFER_MIN = 20 unless defined? TextAlignment::BUFFER_MIN
|
7
|
-
TextAlignment::TEXT_SIMILARITY_THRESHOLD = 0.
|
7
|
+
TextAlignment::TEXT_SIMILARITY_THRESHOLD = 0.9 unless defined? TextAlignment::TEXT_SIMILARITY_THRESHOLD
|
@@ -33,7 +33,9 @@ class TextAlignment::LCSComparison
|
|
33
33
|
@str2_match_initial = sdiff[match_initial].new_position
|
34
34
|
@str1_match_final = sdiff[match_final].old_position
|
35
35
|
@str2_match_final = sdiff[match_final].new_position
|
36
|
-
|
36
|
+
mlcs = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/ && d.new_element =~ /\S/}
|
37
|
+
@similarity = 2 * mlcs / (str1[@str1_match_initial .. @str1_match_final].scan(/\S/).count + str2[@str2_match_initial .. @str2_match_final].scan(/\S/).count).to_f
|
38
|
+
# @similarity = 2 * lcs / (str1[@str1_match_initial .. @str1_match_final].length + str2[@str2_match_initial .. @str2_match_final].length).to_f
|
37
39
|
else
|
38
40
|
@str1_match_initial = 0
|
39
41
|
@str2_match_initial = 0
|
@@ -10,8 +10,6 @@ require 'text_alignment/mappings'
|
|
10
10
|
|
11
11
|
module TextAlignment; end unless defined? TextAlignment
|
12
12
|
|
13
|
-
TextAlignment::NOMATCH_CHARS = "@^|#$%&_" unless defined? TextAlignment::NOMATCH_CHARS
|
14
|
-
|
15
13
|
class TextAlignment::MixedAlignment
|
16
14
|
attr_reader :sdiff
|
17
15
|
attr_reader :position_map_begin, :position_map_end
|
@@ -21,58 +19,7 @@ class TextAlignment::MixedAlignment
|
|
21
19
|
|
22
20
|
def initialize(str1, str2, mappings = [])
|
23
21
|
raise ArgumentError, "nil string" if str1.nil? || str2.nil?
|
24
|
-
|
25
|
-
|
26
|
-
## preprocessing
|
27
|
-
str1 = str1.dup
|
28
|
-
str2 = str2.dup
|
29
|
-
mappings = mappings.dup
|
30
|
-
|
31
|
-
## find the first nomatch character
|
32
|
-
TextAlignment::NOMATCH_CHARS.each_char do |c|
|
33
|
-
if str2.index(c).nil?
|
34
|
-
@nomatch_char1 = c
|
35
|
-
break
|
36
|
-
end
|
37
|
-
end
|
38
|
-
raise RuntimeError, "Cannot find nomatch character" if @nomatch_char1.nil?
|
39
|
-
|
40
|
-
## find the first nomatch character
|
41
|
-
TextAlignment::NOMATCH_CHARS.each_char do |c|
|
42
|
-
if c != @nomatch_char1 && str1.index(c).nil?
|
43
|
-
@nomatch_char2 = c
|
44
|
-
break
|
45
|
-
end
|
46
|
-
end
|
47
|
-
raise RuntimeError, "Cannot find nomatch character" if @nomatch_char2.nil?
|
48
|
-
|
49
|
-
# single character mappings
|
50
|
-
character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
|
51
|
-
characters_from = character_mappings.collect{|m| m[0]}.join
|
52
|
-
characters_to = character_mappings.collect{|m| m[1]}.join
|
53
|
-
characters_to.gsub!(/-/, '\-')
|
54
|
-
|
55
|
-
str1.tr!(characters_from, characters_to)
|
56
|
-
str2.tr!(characters_from, characters_to)
|
57
|
-
|
58
|
-
mappings.delete_if{|m| m[0].length == 1 && m[1].length == 1}
|
59
|
-
|
60
|
-
# ASCII foldings
|
61
|
-
ascii_foldings = mappings.select{|m| m[0].length == 1 && m[1].length > 1}
|
62
|
-
ascii_foldings.each do |f|
|
63
|
-
from = f[1]
|
64
|
-
|
65
|
-
if str2.index(f[0])
|
66
|
-
to = f[0] + (@nomatch_char1 * (f[1].length - 1))
|
67
|
-
str1.gsub!(from, to)
|
68
|
-
end
|
69
|
-
|
70
|
-
if str1.index(f[0])
|
71
|
-
to = f[0] + (@nomatch_char2 * (f[1].length - 1))
|
72
|
-
str2.gsub!(from, to)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
mappings.delete_if{|m| m[0].length == 1 && m[1].length > 1}
|
22
|
+
mappings ||= []
|
76
23
|
|
77
24
|
_compute_mixed_alignment(str1, str2, mappings)
|
78
25
|
end
|
@@ -5,28 +5,35 @@ require 'text_alignment/mixed_alignment'
|
|
5
5
|
|
6
6
|
module TextAlignment; end unless defined? TextAlignment
|
7
7
|
|
8
|
+
TextAlignment::PADDING_LETTERS = ['@', '^', '|', '#', '$', '%', '&', '_'] unless defined? TextAlignment::PADDING_LETTERS
|
9
|
+
|
8
10
|
class TextAlignment::TextAlignment
|
9
|
-
attr_reader :
|
11
|
+
attr_reader :block_alignment
|
10
12
|
attr_reader :similarity
|
11
13
|
attr_reader :lost_annotations
|
12
14
|
|
13
|
-
def initialize(
|
14
|
-
raise ArgumentError, "nil string" if
|
15
|
+
def initialize(_str1, _str2, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
|
16
|
+
raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
|
15
17
|
|
16
|
-
|
17
|
-
size_window = _size_window || TextAlignment::SIZE_WINDOW
|
18
|
-
sim_threshold = _text_similiarity_threshold || TextAlignment::TEXT_SIMILARITY_THRESHOLD
|
18
|
+
@block_alignment = {source_text:_str1, target_text:_str2}
|
19
19
|
|
20
|
-
mappings
|
20
|
+
str1, str2, mappings = string_preprocessing(_str1, _str2)
|
21
21
|
|
22
22
|
# try exact match
|
23
23
|
block_begin = str2.index(str1)
|
24
24
|
unless block_begin.nil?
|
25
|
-
@
|
26
|
-
return @
|
25
|
+
@block_alignment[:blocks] = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
|
26
|
+
return @block_alignment
|
27
|
+
end
|
28
|
+
|
29
|
+
# try exact match
|
30
|
+
block_begin = str2.downcase.index(str1.downcase)
|
31
|
+
unless block_begin.nil?
|
32
|
+
@block_alignment[:blocks] = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
|
33
|
+
return @block_alignment
|
27
34
|
end
|
28
35
|
|
29
|
-
anchor_finder = TextAlignment::AnchorFinder.new(str1, str2,
|
36
|
+
anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, _size_ngram, _size_window, _text_similiarity_threshold)
|
30
37
|
|
31
38
|
# To collect matched blocks
|
32
39
|
mblocks = []
|
@@ -56,7 +63,7 @@ class TextAlignment::TextAlignment
|
|
56
63
|
# puts
|
57
64
|
|
58
65
|
## To find block alignments
|
59
|
-
@
|
66
|
+
@block_alignment[:blocks] = []
|
60
67
|
return if mblocks.empty?
|
61
68
|
|
62
69
|
# Initial step
|
@@ -65,35 +72,35 @@ class TextAlignment::TextAlignment
|
|
65
72
|
e2 = mblocks[0][:target][:begin]
|
66
73
|
|
67
74
|
if mblocks[0][:target][:begin] == 0
|
68
|
-
@
|
75
|
+
@block_alignment[:blocks] << {source:{begin:0, end:e1}, target:{begin:0, end:0}, alignment: :empty}
|
69
76
|
else
|
70
77
|
_str1 = str1[0 ... e1]
|
71
78
|
_str2 = str2[0 ... e2]
|
72
79
|
|
73
80
|
unless _str1.strip.empty?
|
74
81
|
if _str2.strip.empty?
|
75
|
-
@
|
82
|
+
@block_alignment[:blocks] << {source:{begin:0, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
|
76
83
|
else
|
77
84
|
len_min = [_str1.length, _str2.length].min
|
78
85
|
len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
|
79
86
|
b1 = _str1.length < len_buffer ? 0 : e1 - len_buffer
|
80
87
|
b2 = _str2.length < len_buffer ? 0 : e2 - len_buffer
|
81
88
|
|
82
|
-
@
|
89
|
+
@block_alignment[:blocks] << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
|
83
90
|
|
84
91
|
_str1 = str1[b1 ... e1]
|
85
92
|
_str2 = str2[b2 ... e2]
|
86
93
|
alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
|
87
94
|
if alignment.similarity < 0.6
|
88
|
-
@
|
95
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
|
89
96
|
else
|
90
|
-
@
|
97
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment:alignment}
|
91
98
|
end
|
92
99
|
end
|
93
100
|
end
|
94
101
|
end
|
95
102
|
end
|
96
|
-
@
|
103
|
+
@block_alignment[:blocks] << mblocks[0].merge(alignment: :block)
|
97
104
|
|
98
105
|
(1 ... mblocks.length).each do |i|
|
99
106
|
b1 = mblocks[i - 1][:source][:end]
|
@@ -104,17 +111,17 @@ class TextAlignment::TextAlignment
|
|
104
111
|
_str2 = str2[b2 ... e2]
|
105
112
|
unless _str1.strip.empty?
|
106
113
|
if _str2.strip.empty?
|
107
|
-
@
|
114
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
|
108
115
|
else
|
109
116
|
alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
|
110
117
|
if alignment.similarity < 0.6
|
111
|
-
@
|
118
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
|
112
119
|
else
|
113
|
-
@
|
120
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
|
114
121
|
end
|
115
122
|
end
|
116
123
|
end
|
117
|
-
@
|
124
|
+
@block_alignment[:blocks] << mblocks[i].merge(alignment: :block)
|
118
125
|
end
|
119
126
|
|
120
127
|
# Final step
|
@@ -126,7 +133,7 @@ class TextAlignment::TextAlignment
|
|
126
133
|
|
127
134
|
unless _str1.strip.empty?
|
128
135
|
if _str2.strip.empty?
|
129
|
-
@
|
136
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:str1.length}, target:{begin:b2, end:str2.length}, alignment: :empty}
|
130
137
|
else
|
131
138
|
len_min = [_str1.length, _str2.length].min
|
132
139
|
len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
|
@@ -137,56 +144,56 @@ class TextAlignment::TextAlignment
|
|
137
144
|
|
138
145
|
alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
|
139
146
|
if alignment.similarity < 0.6
|
140
|
-
@
|
147
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
|
141
148
|
else
|
142
|
-
@
|
149
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
|
143
150
|
end
|
144
151
|
|
145
|
-
@
|
152
|
+
@block_alignment[:blocks] << {source:{begin:e1, end:-1}, target:{begin:e2, end:-1}, alignment: :empty} if e1 < str1.length
|
146
153
|
end
|
147
154
|
end
|
148
155
|
end
|
149
156
|
|
150
|
-
@
|
157
|
+
@block_alignment[:blocks].each do |a|
|
151
158
|
a[:delta] = a[:target][:begin] - a[:source][:begin]
|
152
159
|
end
|
153
160
|
end
|
154
161
|
|
155
162
|
def transform_begin_position(begin_position)
|
156
|
-
i = @
|
157
|
-
|
158
|
-
|
159
|
-
b = if
|
160
|
-
begin_position +
|
161
|
-
elsif
|
162
|
-
if begin_position ==
|
163
|
-
|
163
|
+
i = @block_alignment[:blocks].index{|b| b[:source][:end] > begin_position}
|
164
|
+
block = @block_alignment[:blocks][i]
|
165
|
+
|
166
|
+
b = if block[:alignment] == :block
|
167
|
+
begin_position + block[:delta]
|
168
|
+
elsif block[:alignment] == :empty
|
169
|
+
if begin_position == block[:source][:begin]
|
170
|
+
block[:target][:begin]
|
164
171
|
else
|
165
172
|
# raise "lost annotation"
|
166
173
|
nil
|
167
174
|
end
|
168
175
|
else
|
169
|
-
r =
|
170
|
-
r.nil? ? nil : r +
|
176
|
+
r = block[:alignment].transform_begin_position(begin_position - block[:source][:begin])
|
177
|
+
r.nil? ? nil : r + block[:target][:begin]
|
171
178
|
end
|
172
179
|
end
|
173
180
|
|
174
181
|
def transform_end_position(end_position)
|
175
|
-
i = @
|
176
|
-
|
177
|
-
|
178
|
-
e = if
|
179
|
-
end_position +
|
180
|
-
elsif
|
181
|
-
if end_position ==
|
182
|
-
|
182
|
+
i = @block_alignment[:blocks].index{|b| b[:source][:end] >= end_position}
|
183
|
+
block = @block_alignment[:blocks][i]
|
184
|
+
|
185
|
+
e = if block[:alignment] == :block
|
186
|
+
end_position + block[:delta]
|
187
|
+
elsif block[:alignment] == :empty
|
188
|
+
if end_position == block[:source][:end]
|
189
|
+
block[:target][:end]
|
183
190
|
else
|
184
191
|
# raise "lost annotation"
|
185
192
|
nil
|
186
193
|
end
|
187
194
|
else
|
188
|
-
r =
|
189
|
-
r.nil? ? nil : r +
|
195
|
+
r = block[:alignment].transform_end_position(end_position - block[:source][:begin])
|
196
|
+
r.nil? ? nil : r + block[:target][:begin]
|
190
197
|
end
|
191
198
|
end
|
192
199
|
|
@@ -232,4 +239,114 @@ class TextAlignment::TextAlignment
|
|
232
239
|
r
|
233
240
|
end
|
234
241
|
|
242
|
+
def alignment_show
|
243
|
+
stext = @block_alignment[:source_text]
|
244
|
+
ttext = @block_alignment[:target_text]
|
245
|
+
|
246
|
+
show = ''
|
247
|
+
@block_alignment[:blocks].each do |a|
|
248
|
+
show += case a[:alignment]
|
249
|
+
when :block
|
250
|
+
"===== common =====\n" +
|
251
|
+
stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
|
252
|
+
when :empty
|
253
|
+
"<<<<< string 1\n" +
|
254
|
+
stext[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
|
255
|
+
">>>>> string 2\n" +
|
256
|
+
ttext[a[:target][:begin] ... a[:target][:end]] + "\n\n"
|
257
|
+
else
|
258
|
+
astr1 = ''
|
259
|
+
astr2 = ''
|
260
|
+
|
261
|
+
base = a[:source][:begin]
|
262
|
+
astr1 = a[:alignment].sdiff.map do |c|
|
263
|
+
case c.action
|
264
|
+
when '='
|
265
|
+
stext[c.old_position + base]
|
266
|
+
when '+'
|
267
|
+
'_'
|
268
|
+
when '-'
|
269
|
+
stext[c.old_position + base]
|
270
|
+
when '!'
|
271
|
+
stext[c.old_position + base] + '_'
|
272
|
+
end
|
273
|
+
end.join('')
|
274
|
+
|
275
|
+
base = a[:target][:begin]
|
276
|
+
astr2 = a[:alignment].sdiff.map do |c|
|
277
|
+
case c.action
|
278
|
+
when '='
|
279
|
+
ttext[c.new_position + base]
|
280
|
+
when '+'
|
281
|
+
ttext[c.new_position + base]
|
282
|
+
when '-'
|
283
|
+
'_'
|
284
|
+
when '!'
|
285
|
+
'_' + ttext[c.new_position + base]
|
286
|
+
end
|
287
|
+
end.join('')
|
288
|
+
|
289
|
+
"***** local mismatch\n" +
|
290
|
+
"[#{astr1}]\n" +
|
291
|
+
"[#{astr2}]\n\n"
|
292
|
+
end
|
293
|
+
end
|
294
|
+
show
|
295
|
+
end
|
296
|
+
|
297
|
+
private
|
298
|
+
|
299
|
+
def string_preprocessing(_str1, _str2)
|
300
|
+
str1 = _str1.dup
|
301
|
+
str2 = _str2.dup
|
302
|
+
mappings = TextAlignment::MAPPINGS.dup
|
303
|
+
|
304
|
+
## single character mappings
|
305
|
+
character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
|
306
|
+
characters_from = character_mappings.collect{|m| m[0]}.join
|
307
|
+
characters_to = character_mappings.collect{|m| m[1]}.join
|
308
|
+
characters_to.gsub!(/-/, '\-')
|
309
|
+
|
310
|
+
str1.tr!(characters_from, characters_to)
|
311
|
+
str2.tr!(characters_from, characters_to)
|
312
|
+
|
313
|
+
mappings.delete_if{|m| m[0].length == 1 && m[1].length == 1}
|
314
|
+
|
315
|
+
## long to one character mappings
|
316
|
+
pletters = TextAlignment::PADDING_LETTERS
|
317
|
+
|
318
|
+
# find the padding letter for str1
|
319
|
+
padding_letter1 = begin
|
320
|
+
i = pletters.index{|l| str2.index(l).nil?}
|
321
|
+
raise RuntimeError, "Could not find a padding letter for str1" if i.nil?
|
322
|
+
TextAlignment::PADDING_LETTERS[i]
|
323
|
+
end
|
324
|
+
|
325
|
+
# find the padding letter for str2
|
326
|
+
padding_letter2 = begin
|
327
|
+
i = pletters.index{|l| l != padding_letter1 && str1.index(l).nil?}
|
328
|
+
raise RuntimeError, "Could not find a padding letter for str2" if i.nil?
|
329
|
+
TextAlignment::PADDING_LETTERS[i]
|
330
|
+
end
|
331
|
+
|
332
|
+
# ASCII foldings
|
333
|
+
ascii_foldings = mappings.select{|m| m[0].length == 1 && m[1].length > 1}
|
334
|
+
ascii_foldings.each do |f|
|
335
|
+
from = f[1]
|
336
|
+
|
337
|
+
if str2.index(f[0])
|
338
|
+
to = f[0] + (padding_letter1 * (f[1].length - 1))
|
339
|
+
str1.gsub!(from, to)
|
340
|
+
end
|
341
|
+
|
342
|
+
if str1.index(f[0])
|
343
|
+
to = f[0] + (padding_letter2 * (f[1].length - 1))
|
344
|
+
str2.gsub!(from, to)
|
345
|
+
end
|
346
|
+
end
|
347
|
+
mappings.delete_if{|m| m[0].length == 1 && m[1].length > 1}
|
348
|
+
|
349
|
+
[str1, str2, mappings]
|
350
|
+
end
|
351
|
+
|
235
352
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.6'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-dictionary
|