text_alignment 0.12.10 → 0.12.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/Gemfile.lock +1 -1
- data/lib/text_alignment/char_mapping.rb +25 -23
- data/lib/text_alignment/mixed_alignment.rb +8 -2
- data/lib/text_alignment/version.rb +1 -1
- metadata +3 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 16b91a2e24502e17557583b56025c46422c6e93698729a54d3936d8a094dc08c
|
|
4
|
+
data.tar.gz: 112acc4316afe87bf26f7cea98839b114ced826b3edbd90c592e23dbd15274ee
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7165101e3694429324a0044670007c9d5cf07c5cc02e536094e113a107ef1fee54b651f9bc8c75047c0beac6ae430842d835436ceeb25a49a02b503a37a5eb6c
|
|
7
|
+
data.tar.gz: 1d1aa73737aa6425a368760e2cd16ee71f97c5a6c77ec30c5db5671ff574954c36c1203484b51205a210909483d245652ba310184df3365b5a3766369b3bec0a
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -151,29 +151,31 @@ class TextAlignment::CharMapping
|
|
|
151
151
|
|
|
152
152
|
def enmap_text(_text, char_mapping, no_ws = false)
|
|
153
153
|
text = _text.dup
|
|
154
|
-
|
|
155
|
-
# To perform the single letter mapping replacement
|
|
156
|
-
char_mapping.each do |one, long|
|
|
157
|
-
# text.gsub!(one, long) if long.length == 1
|
|
158
|
-
text.tr!(one, long) if long.length == 1
|
|
159
|
-
end
|
|
160
|
-
|
|
161
|
-
# To get the replacement positions, (position, old_length, new_length), for char mappings
|
|
162
154
|
rpositions = []
|
|
163
|
-
char_mapping.each do |one, long|
|
|
164
|
-
next if long.length == 1
|
|
165
155
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
156
|
+
# Skip character mapping if text is pure ASCII (performance optimization)
|
|
157
|
+
unless text.ascii_only?
|
|
158
|
+
# To perform the single letter mapping replacement
|
|
159
|
+
char_mapping.each do |one, long|
|
|
160
|
+
text.gsub!(one, long) if long.length == 1
|
|
161
|
+
end
|
|
172
162
|
|
|
173
|
-
|
|
174
|
-
|
|
163
|
+
# To get the replacement positions, (position, old_length, new_length), for char mappings
|
|
164
|
+
char_mapping.each do |one, long|
|
|
165
|
+
next if long.length == 1
|
|
166
|
+
|
|
167
|
+
init_next = 0
|
|
168
|
+
while loc = text.index(long, init_next)
|
|
169
|
+
# Huristics to check if the surrounding letters are sufficiently distinguished.
|
|
170
|
+
if long.length > 3 || ((text[loc - 1, 2] !~ /[a-z][a-z]/) && (text[loc + long.length - 1, 2] !~ /[a-z][a-z]/))
|
|
171
|
+
# if true
|
|
172
|
+
rpositions << [loc, long.length, 1]
|
|
173
|
+
|
|
174
|
+
# a workaround to avoid messing-up due to embedding
|
|
175
|
+
text[loc, long.length] = one * long.length
|
|
176
|
+
end
|
|
177
|
+
init_next = loc + long.length
|
|
175
178
|
end
|
|
176
|
-
init_next = loc + long.length
|
|
177
179
|
end
|
|
178
180
|
end
|
|
179
181
|
|
|
@@ -218,8 +220,8 @@ class TextAlignment::CharMapping
|
|
|
218
220
|
scanner = StringScanner.new(text)
|
|
219
221
|
|
|
220
222
|
while scanner.scan_until(/\s{2,}/)
|
|
221
|
-
len = scanner.
|
|
222
|
-
loc = scanner.
|
|
223
|
+
len = scanner.matched.length
|
|
224
|
+
loc = scanner.charpos - len
|
|
223
225
|
rpositions << [loc, len, 1]
|
|
224
226
|
end
|
|
225
227
|
|
|
@@ -232,8 +234,8 @@ class TextAlignment::CharMapping
|
|
|
232
234
|
scanner = StringScanner.new(text)
|
|
233
235
|
|
|
234
236
|
while scanner.scan(/\s+/)
|
|
235
|
-
len = scanner.
|
|
236
|
-
start = scanner.
|
|
237
|
+
len = scanner.matched.length
|
|
238
|
+
start = scanner.charpos - len
|
|
237
239
|
rpositions << [start, len, 0]
|
|
238
240
|
end
|
|
239
241
|
|
|
@@ -93,7 +93,13 @@ class TextAlignment::MixedAlignment
|
|
|
93
93
|
galign = TextAlignment::GLCSAlignment.new(str1[deletion[0] .. deletion[-1]], str2[addition[0] .. addition[-1]], mappings)
|
|
94
94
|
galign.position_map_begin.each {|k, v| posmap_begin[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
|
|
95
95
|
galign.position_map_end.each {|k, v| posmap_end[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
|
|
96
|
-
|
|
96
|
+
# Correct end position at the boundary: GLCSAlignment doesn't adjust
|
|
97
|
+
# posmap_end at its p1==0 for leading additions, but in this context
|
|
98
|
+
# (substring alignment) the end position should be before the additions.
|
|
99
|
+
if galign.str2_match_begin && galign.str2_match_begin > 0
|
|
100
|
+
posmap_end[deletion[0]] = addition[0]
|
|
101
|
+
end
|
|
102
|
+
posmap_begin[p1] = p2
|
|
97
103
|
@common_elements += galign.common_elements
|
|
98
104
|
@mapped_elements += galign.mapped_elements
|
|
99
105
|
else
|
|
@@ -127,7 +133,7 @@ class TextAlignment::MixedAlignment
|
|
|
127
133
|
galign = TextAlignment::GLCSAlignment.new(str1[deletion[0] .. deletion[-1]], str2[addition[0] .. addition[-1]], mappings)
|
|
128
134
|
galign.position_map_begin.each {|k, v| posmap_begin[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
|
|
129
135
|
galign.position_map_end.each {|k, v| posmap_end[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
|
|
130
|
-
posmap_begin[p1]
|
|
136
|
+
posmap_begin[p1] = p2
|
|
131
137
|
@common_elements += galign.common_elements
|
|
132
138
|
@mapped_elements += galign.mapped_elements
|
|
133
139
|
else
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: text_alignment
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.12.
|
|
4
|
+
version: 0.12.12
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jin-Dong Kim
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: ruby-dictionary
|
|
@@ -92,11 +91,9 @@ files:
|
|
|
92
91
|
- lib/text_alignment/text_alignment.rb
|
|
93
92
|
- lib/text_alignment/version.rb
|
|
94
93
|
- text_alignment.gemspec
|
|
95
|
-
homepage:
|
|
96
94
|
licenses:
|
|
97
95
|
- MIT
|
|
98
96
|
metadata: {}
|
|
99
|
-
post_install_message:
|
|
100
97
|
rdoc_options: []
|
|
101
98
|
require_paths:
|
|
102
99
|
- lib
|
|
@@ -111,8 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
111
108
|
- !ruby/object:Gem::Version
|
|
112
109
|
version: '0'
|
|
113
110
|
requirements: []
|
|
114
|
-
rubygems_version: 3.
|
|
115
|
-
signing_key:
|
|
111
|
+
rubygems_version: 3.6.9
|
|
116
112
|
specification_version: 4
|
|
117
113
|
summary: Ruby class for aligning two character strings
|
|
118
114
|
test_files: []
|