text_alignment 0.4.2 → 0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/align_annotations +32 -37
- data/lib/text_alignment/constants.rb +1 -1
- data/lib/text_alignment/lcs_comparison.rb +3 -1
- data/lib/text_alignment/mixed_alignment.rb +1 -54
- data/lib/text_alignment/text_alignment.rb +163 -46
- data/lib/text_alignment/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc820991f5f694f154b94c369158909ccba3760829e0d881c7fd2e6ef7ddd149
|
4
|
+
data.tar.gz: 40ae6f2e388405426a77682bd1a3fb7a3c853076eced9b7301b632081dfd0a57
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5802241b4a8394d3c570c1d4b8f5e1d7706c72852e2d6e6fb23bda2f6e2972fa09f7001695db026667144e2af982eeb91ed0b700bd8151af6df794c98e3c069b
|
7
|
+
data.tar.gz: 8d7c93acbef6ab12bb2a0291444a7bcc73b0236bb5b0d06d274e95aa30c9ffc829965653b58270686147a9ac30ccf570518b3ad266120b320dfb20cd1620f5f9
|
data/bin/align_annotations
CHANGED
@@ -35,6 +35,10 @@ def align_mdoc(source_annotations, target_annotations)
|
|
35
35
|
source_annotations.each do |annotations|
|
36
36
|
alignment = TextAlignment::TextAlignment.new(annotations[:text], target_annotations[:text])
|
37
37
|
|
38
|
+
puts alignment.alignment_show
|
39
|
+
puts "-----"
|
40
|
+
puts
|
41
|
+
|
38
42
|
# alignment.block_alignments.each do |a|
|
39
43
|
# p {source:a[:source], target:a[:target]}
|
40
44
|
# puts "--"
|
@@ -103,48 +107,39 @@ target_annotations = if source_annotations.class == Array
|
|
103
107
|
else
|
104
108
|
alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
|
105
109
|
|
106
|
-
pp alignment
|
110
|
+
# pp alignment
|
107
111
|
|
108
112
|
# verification
|
109
|
-
source_text = source_annotations[:text]
|
110
|
-
puts "=====BEGIN"
|
111
|
-
(0 ... source_text.rstrip.length).each do |p|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
end
|
119
|
-
puts
|
120
|
-
puts "=====END"
|
121
|
-
|
122
|
-
puts "=====BEGIN"
|
123
|
-
(0 .. source_text.rstrip.length).each do |p|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
else
|
128
|
-
print '.'
|
129
|
-
end
|
130
|
-
end
|
131
|
-
puts
|
132
|
-
puts "=====END"
|
133
|
-
|
134
|
-
# alignment.block_alignments.each do |a|
|
135
|
-
# if a[:alignment].nil? || a[:alignment] == :empty
|
136
|
-
# # p [a[:source], a[:target]]
|
137
|
-
# # p a[:alignment]
|
113
|
+
# source_text = source_annotations[:text]
|
114
|
+
# puts "=====BEGIN"
|
115
|
+
# (0 ... source_text.rstrip.length).each do |p|
|
116
|
+
# t = alignment.transform_begin_position(p)
|
117
|
+
# if t.nil?
|
118
|
+
# print source_text[p]
|
119
|
+
# else
|
120
|
+
# print '.'
|
121
|
+
# end
|
122
|
+
# end
|
123
|
+
# puts
|
124
|
+
# puts "=====END"
|
125
|
+
|
126
|
+
# puts "=====BEGIN"
|
127
|
+
# (0 .. source_text.rstrip.length).each do |p|
|
128
|
+
# t = alignment.transform_end_position(p)
|
129
|
+
# if t.nil?
|
130
|
+
# print source_text[p]
|
138
131
|
# else
|
139
|
-
#
|
140
|
-
# p a[:alignment].similarity
|
141
|
-
# puts "--"
|
142
|
-
# puts source_annotations[:text][a[:source][:begin] ... a[:source][:end]]
|
143
|
-
# puts "--"
|
144
|
-
# puts target_text[a[:target][:begin] ... a[:target][:end]]
|
145
|
-
# puts "======"
|
132
|
+
# print '.'
|
146
133
|
# end
|
147
134
|
# end
|
135
|
+
# puts
|
136
|
+
# puts "=====END"
|
137
|
+
|
138
|
+
source_text = source_annotations[:text]
|
139
|
+
|
140
|
+
puts "[block alignment]"
|
141
|
+
puts alignment.alignment_show
|
142
|
+
puts "====="
|
148
143
|
# exit
|
149
144
|
|
150
145
|
# verification of source denotations
|
@@ -4,4 +4,4 @@ TextAlignment::SIZE_NGRAM = 8 unless defined? TextAlignment::SIZE_NGRAM
|
|
4
4
|
TextAlignment::SIZE_WINDOW = 60 unless defined? TextAlignment::SIZE_WINDOW
|
5
5
|
TextAlignment::BUFFER_RATE = 0.1 unless defined? TextAlignment::BUFFER_RATE
|
6
6
|
TextAlignment::BUFFER_MIN = 20 unless defined? TextAlignment::BUFFER_MIN
|
7
|
-
TextAlignment::TEXT_SIMILARITY_THRESHOLD = 0.
|
7
|
+
TextAlignment::TEXT_SIMILARITY_THRESHOLD = 0.9 unless defined? TextAlignment::TEXT_SIMILARITY_THRESHOLD
|
@@ -33,7 +33,9 @@ class TextAlignment::LCSComparison
|
|
33
33
|
@str2_match_initial = sdiff[match_initial].new_position
|
34
34
|
@str1_match_final = sdiff[match_final].old_position
|
35
35
|
@str2_match_final = sdiff[match_final].new_position
|
36
|
-
|
36
|
+
mlcs = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/ && d.new_element =~ /\S/}
|
37
|
+
@similarity = 2 * mlcs / (str1[@str1_match_initial .. @str1_match_final].scan(/\S/).count + str2[@str2_match_initial .. @str2_match_final].scan(/\S/).count).to_f
|
38
|
+
# @similarity = 2 * lcs / (str1[@str1_match_initial .. @str1_match_final].length + str2[@str2_match_initial .. @str2_match_final].length).to_f
|
37
39
|
else
|
38
40
|
@str1_match_initial = 0
|
39
41
|
@str2_match_initial = 0
|
@@ -10,8 +10,6 @@ require 'text_alignment/mappings'
|
|
10
10
|
|
11
11
|
module TextAlignment; end unless defined? TextAlignment
|
12
12
|
|
13
|
-
TextAlignment::NOMATCH_CHARS = "@^|#$%&_" unless defined? TextAlignment::NOMATCH_CHARS
|
14
|
-
|
15
13
|
class TextAlignment::MixedAlignment
|
16
14
|
attr_reader :sdiff
|
17
15
|
attr_reader :position_map_begin, :position_map_end
|
@@ -21,58 +19,7 @@ class TextAlignment::MixedAlignment
|
|
21
19
|
|
22
20
|
def initialize(str1, str2, mappings = [])
|
23
21
|
raise ArgumentError, "nil string" if str1.nil? || str2.nil?
|
24
|
-
|
25
|
-
|
26
|
-
## preprocessing
|
27
|
-
str1 = str1.dup
|
28
|
-
str2 = str2.dup
|
29
|
-
mappings = mappings.dup
|
30
|
-
|
31
|
-
## find the first nomatch character
|
32
|
-
TextAlignment::NOMATCH_CHARS.each_char do |c|
|
33
|
-
if str2.index(c).nil?
|
34
|
-
@nomatch_char1 = c
|
35
|
-
break
|
36
|
-
end
|
37
|
-
end
|
38
|
-
raise RuntimeError, "Cannot find nomatch character" if @nomatch_char1.nil?
|
39
|
-
|
40
|
-
## find the first nomatch character
|
41
|
-
TextAlignment::NOMATCH_CHARS.each_char do |c|
|
42
|
-
if c != @nomatch_char1 && str1.index(c).nil?
|
43
|
-
@nomatch_char2 = c
|
44
|
-
break
|
45
|
-
end
|
46
|
-
end
|
47
|
-
raise RuntimeError, "Cannot find nomatch character" if @nomatch_char2.nil?
|
48
|
-
|
49
|
-
# single character mappings
|
50
|
-
character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
|
51
|
-
characters_from = character_mappings.collect{|m| m[0]}.join
|
52
|
-
characters_to = character_mappings.collect{|m| m[1]}.join
|
53
|
-
characters_to.gsub!(/-/, '\-')
|
54
|
-
|
55
|
-
str1.tr!(characters_from, characters_to)
|
56
|
-
str2.tr!(characters_from, characters_to)
|
57
|
-
|
58
|
-
mappings.delete_if{|m| m[0].length == 1 && m[1].length == 1}
|
59
|
-
|
60
|
-
# ASCII foldings
|
61
|
-
ascii_foldings = mappings.select{|m| m[0].length == 1 && m[1].length > 1}
|
62
|
-
ascii_foldings.each do |f|
|
63
|
-
from = f[1]
|
64
|
-
|
65
|
-
if str2.index(f[0])
|
66
|
-
to = f[0] + (@nomatch_char1 * (f[1].length - 1))
|
67
|
-
str1.gsub!(from, to)
|
68
|
-
end
|
69
|
-
|
70
|
-
if str1.index(f[0])
|
71
|
-
to = f[0] + (@nomatch_char2 * (f[1].length - 1))
|
72
|
-
str2.gsub!(from, to)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
mappings.delete_if{|m| m[0].length == 1 && m[1].length > 1}
|
22
|
+
mappings ||= []
|
76
23
|
|
77
24
|
_compute_mixed_alignment(str1, str2, mappings)
|
78
25
|
end
|
@@ -5,28 +5,35 @@ require 'text_alignment/mixed_alignment'
|
|
5
5
|
|
6
6
|
module TextAlignment; end unless defined? TextAlignment
|
7
7
|
|
8
|
+
TextAlignment::PADDING_LETTERS = ['@', '^', '|', '#', '$', '%', '&', '_'] unless defined? TextAlignment::PADDING_LETTERS
|
9
|
+
|
8
10
|
class TextAlignment::TextAlignment
|
9
|
-
attr_reader :
|
11
|
+
attr_reader :block_alignment
|
10
12
|
attr_reader :similarity
|
11
13
|
attr_reader :lost_annotations
|
12
14
|
|
13
|
-
def initialize(
|
14
|
-
raise ArgumentError, "nil string" if
|
15
|
+
def initialize(_str1, _str2, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
|
16
|
+
raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
|
15
17
|
|
16
|
-
|
17
|
-
size_window = _size_window || TextAlignment::SIZE_WINDOW
|
18
|
-
sim_threshold = _text_similiarity_threshold || TextAlignment::TEXT_SIMILARITY_THRESHOLD
|
18
|
+
@block_alignment = {source_text:_str1, target_text:_str2}
|
19
19
|
|
20
|
-
mappings
|
20
|
+
str1, str2, mappings = string_preprocessing(_str1, _str2)
|
21
21
|
|
22
22
|
# try exact match
|
23
23
|
block_begin = str2.index(str1)
|
24
24
|
unless block_begin.nil?
|
25
|
-
@
|
26
|
-
return @
|
25
|
+
@block_alignment[:blocks] = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
|
26
|
+
return @block_alignment
|
27
|
+
end
|
28
|
+
|
29
|
+
# try exact match
|
30
|
+
block_begin = str2.downcase.index(str1.downcase)
|
31
|
+
unless block_begin.nil?
|
32
|
+
@block_alignment[:blocks] = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
|
33
|
+
return @block_alignment
|
27
34
|
end
|
28
35
|
|
29
|
-
anchor_finder = TextAlignment::AnchorFinder.new(str1, str2,
|
36
|
+
anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, _size_ngram, _size_window, _text_similiarity_threshold)
|
30
37
|
|
31
38
|
# To collect matched blocks
|
32
39
|
mblocks = []
|
@@ -56,7 +63,7 @@ class TextAlignment::TextAlignment
|
|
56
63
|
# puts
|
57
64
|
|
58
65
|
## To find block alignments
|
59
|
-
@
|
66
|
+
@block_alignment[:blocks] = []
|
60
67
|
return if mblocks.empty?
|
61
68
|
|
62
69
|
# Initial step
|
@@ -65,35 +72,35 @@ class TextAlignment::TextAlignment
|
|
65
72
|
e2 = mblocks[0][:target][:begin]
|
66
73
|
|
67
74
|
if mblocks[0][:target][:begin] == 0
|
68
|
-
@
|
75
|
+
@block_alignment[:blocks] << {source:{begin:0, end:e1}, target:{begin:0, end:0}, alignment: :empty}
|
69
76
|
else
|
70
77
|
_str1 = str1[0 ... e1]
|
71
78
|
_str2 = str2[0 ... e2]
|
72
79
|
|
73
80
|
unless _str1.strip.empty?
|
74
81
|
if _str2.strip.empty?
|
75
|
-
@
|
82
|
+
@block_alignment[:blocks] << {source:{begin:0, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
|
76
83
|
else
|
77
84
|
len_min = [_str1.length, _str2.length].min
|
78
85
|
len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
|
79
86
|
b1 = _str1.length < len_buffer ? 0 : e1 - len_buffer
|
80
87
|
b2 = _str2.length < len_buffer ? 0 : e2 - len_buffer
|
81
88
|
|
82
|
-
@
|
89
|
+
@block_alignment[:blocks] << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
|
83
90
|
|
84
91
|
_str1 = str1[b1 ... e1]
|
85
92
|
_str2 = str2[b2 ... e2]
|
86
93
|
alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
|
87
94
|
if alignment.similarity < 0.6
|
88
|
-
@
|
95
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
|
89
96
|
else
|
90
|
-
@
|
97
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment:alignment}
|
91
98
|
end
|
92
99
|
end
|
93
100
|
end
|
94
101
|
end
|
95
102
|
end
|
96
|
-
@
|
103
|
+
@block_alignment[:blocks] << mblocks[0].merge(alignment: :block)
|
97
104
|
|
98
105
|
(1 ... mblocks.length).each do |i|
|
99
106
|
b1 = mblocks[i - 1][:source][:end]
|
@@ -104,17 +111,17 @@ class TextAlignment::TextAlignment
|
|
104
111
|
_str2 = str2[b2 ... e2]
|
105
112
|
unless _str1.strip.empty?
|
106
113
|
if _str2.strip.empty?
|
107
|
-
@
|
114
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
|
108
115
|
else
|
109
116
|
alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
|
110
117
|
if alignment.similarity < 0.6
|
111
|
-
@
|
118
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
|
112
119
|
else
|
113
|
-
@
|
120
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
|
114
121
|
end
|
115
122
|
end
|
116
123
|
end
|
117
|
-
@
|
124
|
+
@block_alignment[:blocks] << mblocks[i].merge(alignment: :block)
|
118
125
|
end
|
119
126
|
|
120
127
|
# Final step
|
@@ -126,7 +133,7 @@ class TextAlignment::TextAlignment
|
|
126
133
|
|
127
134
|
unless _str1.strip.empty?
|
128
135
|
if _str2.strip.empty?
|
129
|
-
@
|
136
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:str1.length}, target:{begin:b2, end:str2.length}, alignment: :empty}
|
130
137
|
else
|
131
138
|
len_min = [_str1.length, _str2.length].min
|
132
139
|
len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
|
@@ -137,56 +144,56 @@ class TextAlignment::TextAlignment
|
|
137
144
|
|
138
145
|
alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
|
139
146
|
if alignment.similarity < 0.6
|
140
|
-
@
|
147
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
|
141
148
|
else
|
142
|
-
@
|
149
|
+
@block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
|
143
150
|
end
|
144
151
|
|
145
|
-
@
|
152
|
+
@block_alignment[:blocks] << {source:{begin:e1, end:-1}, target:{begin:e2, end:-1}, alignment: :empty} if e1 < str1.length
|
146
153
|
end
|
147
154
|
end
|
148
155
|
end
|
149
156
|
|
150
|
-
@
|
157
|
+
@block_alignment[:blocks].each do |a|
|
151
158
|
a[:delta] = a[:target][:begin] - a[:source][:begin]
|
152
159
|
end
|
153
160
|
end
|
154
161
|
|
155
162
|
def transform_begin_position(begin_position)
|
156
|
-
i = @
|
157
|
-
|
158
|
-
|
159
|
-
b = if
|
160
|
-
begin_position +
|
161
|
-
elsif
|
162
|
-
if begin_position ==
|
163
|
-
|
163
|
+
i = @block_alignment[:blocks].index{|b| b[:source][:end] > begin_position}
|
164
|
+
block = @block_alignment[:blocks][i]
|
165
|
+
|
166
|
+
b = if block[:alignment] == :block
|
167
|
+
begin_position + block[:delta]
|
168
|
+
elsif block[:alignment] == :empty
|
169
|
+
if begin_position == block[:source][:begin]
|
170
|
+
block[:target][:begin]
|
164
171
|
else
|
165
172
|
# raise "lost annotation"
|
166
173
|
nil
|
167
174
|
end
|
168
175
|
else
|
169
|
-
r =
|
170
|
-
r.nil? ? nil : r +
|
176
|
+
r = block[:alignment].transform_begin_position(begin_position - block[:source][:begin])
|
177
|
+
r.nil? ? nil : r + block[:target][:begin]
|
171
178
|
end
|
172
179
|
end
|
173
180
|
|
174
181
|
def transform_end_position(end_position)
|
175
|
-
i = @
|
176
|
-
|
177
|
-
|
178
|
-
e = if
|
179
|
-
end_position +
|
180
|
-
elsif
|
181
|
-
if end_position ==
|
182
|
-
|
182
|
+
i = @block_alignment[:blocks].index{|b| b[:source][:end] >= end_position}
|
183
|
+
block = @block_alignment[:blocks][i]
|
184
|
+
|
185
|
+
e = if block[:alignment] == :block
|
186
|
+
end_position + block[:delta]
|
187
|
+
elsif block[:alignment] == :empty
|
188
|
+
if end_position == block[:source][:end]
|
189
|
+
block[:target][:end]
|
183
190
|
else
|
184
191
|
# raise "lost annotation"
|
185
192
|
nil
|
186
193
|
end
|
187
194
|
else
|
188
|
-
r =
|
189
|
-
r.nil? ? nil : r +
|
195
|
+
r = block[:alignment].transform_end_position(end_position - block[:source][:begin])
|
196
|
+
r.nil? ? nil : r + block[:target][:begin]
|
190
197
|
end
|
191
198
|
end
|
192
199
|
|
@@ -232,4 +239,114 @@ class TextAlignment::TextAlignment
|
|
232
239
|
r
|
233
240
|
end
|
234
241
|
|
242
|
+
def alignment_show
|
243
|
+
stext = @block_alignment[:source_text]
|
244
|
+
ttext = @block_alignment[:target_text]
|
245
|
+
|
246
|
+
show = ''
|
247
|
+
@block_alignment[:blocks].each do |a|
|
248
|
+
show += case a[:alignment]
|
249
|
+
when :block
|
250
|
+
"===== common =====\n" +
|
251
|
+
stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
|
252
|
+
when :empty
|
253
|
+
"<<<<< string 1\n" +
|
254
|
+
stext[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
|
255
|
+
">>>>> string 2\n" +
|
256
|
+
ttext[a[:target][:begin] ... a[:target][:end]] + "\n\n"
|
257
|
+
else
|
258
|
+
astr1 = ''
|
259
|
+
astr2 = ''
|
260
|
+
|
261
|
+
base = a[:source][:begin]
|
262
|
+
astr1 = a[:alignment].sdiff.map do |c|
|
263
|
+
case c.action
|
264
|
+
when '='
|
265
|
+
stext[c.old_position + base]
|
266
|
+
when '+'
|
267
|
+
'_'
|
268
|
+
when '-'
|
269
|
+
stext[c.old_position + base]
|
270
|
+
when '!'
|
271
|
+
stext[c.old_position + base] + '_'
|
272
|
+
end
|
273
|
+
end.join('')
|
274
|
+
|
275
|
+
base = a[:target][:begin]
|
276
|
+
astr2 = a[:alignment].sdiff.map do |c|
|
277
|
+
case c.action
|
278
|
+
when '='
|
279
|
+
ttext[c.new_position + base]
|
280
|
+
when '+'
|
281
|
+
ttext[c.new_position + base]
|
282
|
+
when '-'
|
283
|
+
'_'
|
284
|
+
when '!'
|
285
|
+
'_' + ttext[c.new_position + base]
|
286
|
+
end
|
287
|
+
end.join('')
|
288
|
+
|
289
|
+
"***** local mismatch\n" +
|
290
|
+
"[#{astr1}]\n" +
|
291
|
+
"[#{astr2}]\n\n"
|
292
|
+
end
|
293
|
+
end
|
294
|
+
show
|
295
|
+
end
|
296
|
+
|
297
|
+
private
|
298
|
+
|
299
|
+
def string_preprocessing(_str1, _str2)
|
300
|
+
str1 = _str1.dup
|
301
|
+
str2 = _str2.dup
|
302
|
+
mappings = TextAlignment::MAPPINGS.dup
|
303
|
+
|
304
|
+
## single character mappings
|
305
|
+
character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
|
306
|
+
characters_from = character_mappings.collect{|m| m[0]}.join
|
307
|
+
characters_to = character_mappings.collect{|m| m[1]}.join
|
308
|
+
characters_to.gsub!(/-/, '\-')
|
309
|
+
|
310
|
+
str1.tr!(characters_from, characters_to)
|
311
|
+
str2.tr!(characters_from, characters_to)
|
312
|
+
|
313
|
+
mappings.delete_if{|m| m[0].length == 1 && m[1].length == 1}
|
314
|
+
|
315
|
+
## long to one character mappings
|
316
|
+
pletters = TextAlignment::PADDING_LETTERS
|
317
|
+
|
318
|
+
# find the padding letter for str1
|
319
|
+
padding_letter1 = begin
|
320
|
+
i = pletters.index{|l| str2.index(l).nil?}
|
321
|
+
raise RuntimeError, "Could not find a padding letter for str1" if i.nil?
|
322
|
+
TextAlignment::PADDING_LETTERS[i]
|
323
|
+
end
|
324
|
+
|
325
|
+
# find the padding letter for str2
|
326
|
+
padding_letter2 = begin
|
327
|
+
i = pletters.index{|l| l != padding_letter1 && str1.index(l).nil?}
|
328
|
+
raise RuntimeError, "Could not find a padding letter for str2" if i.nil?
|
329
|
+
TextAlignment::PADDING_LETTERS[i]
|
330
|
+
end
|
331
|
+
|
332
|
+
# ASCII foldings
|
333
|
+
ascii_foldings = mappings.select{|m| m[0].length == 1 && m[1].length > 1}
|
334
|
+
ascii_foldings.each do |f|
|
335
|
+
from = f[1]
|
336
|
+
|
337
|
+
if str2.index(f[0])
|
338
|
+
to = f[0] + (padding_letter1 * (f[1].length - 1))
|
339
|
+
str1.gsub!(from, to)
|
340
|
+
end
|
341
|
+
|
342
|
+
if str1.index(f[0])
|
343
|
+
to = f[0] + (padding_letter2 * (f[1].length - 1))
|
344
|
+
str2.gsub!(from, to)
|
345
|
+
end
|
346
|
+
end
|
347
|
+
mappings.delete_if{|m| m[0].length == 1 && m[1].length > 1}
|
348
|
+
|
349
|
+
[str1, str2, mappings]
|
350
|
+
end
|
351
|
+
|
235
352
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.6'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-dictionary
|