text_alignment 0.12.1 → 0.12.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/text_alignment/char_mapping.rb +11 -5
- data/lib/text_alignment/mixed_alignment.rb +1 -1
- data/lib/text_alignment/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3c2a36fe4cfde7dfb76f554fd4afcae7cb5a03e455887621217f5e5e633b20b3
|
4
|
+
data.tar.gz: f63070c6f423bc15d0fc8c742a21238a104a1b2c1d3fe56ac436effa8ef8eacf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '078a41bc6ab9b16e7747be6a3bb15aff4b23a1161bcea0b653a93f04d673799afcac2109cd1ce8d1a95c99c5c07d36842e3698c9f0997500e653fb4ab939e04a'
|
7
|
+
data.tar.gz: ce44d334779d43b3057317537f615ebf39b8049639d3e50c4e14272c952b76a5df2b060cfd3a15f1d28372c11a795ef8bf43cb04de32b5a78ce2f44433edddfb
|
@@ -93,6 +93,7 @@ class TextAlignment::CharMapping
|
|
93
93
|
|
94
94
|
@text = _text
|
95
95
|
|
96
|
+
# sort by the length of the spell-outs is important
|
96
97
|
char_mapping ||= TextAlignment::CHAR_MAPPING.sort{|a, b| b[1].length <=> a[1].length}
|
97
98
|
@mapped_text, offset_mapping = enmap_text(_text, char_mapping)
|
98
99
|
@index_enmap = offset_mapping.to_h
|
@@ -137,7 +138,7 @@ class TextAlignment::CharMapping
|
|
137
138
|
def enmap_text(_text, char_mapping, no_ws = false)
|
138
139
|
text = _text.dup
|
139
140
|
|
140
|
-
# To
|
141
|
+
# To perform the single letter mapping replacement
|
141
142
|
char_mapping.each do |one, long|
|
142
143
|
text.gsub!(one, long) if long.length == 1
|
143
144
|
end
|
@@ -149,12 +150,16 @@ class TextAlignment::CharMapping
|
|
149
150
|
|
150
151
|
init_next = 0
|
151
152
|
while loc = text.index(long, init_next)
|
152
|
-
|
153
|
+
# Huristics to check if the surrounding letters are sufficiently distinguished.
|
154
|
+
if long.length > 3 || ((text[loc - 1, 2] !~ /[a-z][a-z]/) && (text[loc + long.length - 1, 2] !~ /[a-z][a-z]/))
|
155
|
+
# if true
|
156
|
+
rpositions << [loc, long.length, 1]
|
157
|
+
|
158
|
+
# a workaround to avoid messing-up due to embedding
|
159
|
+
text[loc, long.length] = one * long.length
|
160
|
+
end
|
153
161
|
init_next = loc + long.length
|
154
162
|
end
|
155
|
-
|
156
|
-
# a workaround to avoid messing-up due to embedding
|
157
|
-
text.gsub!(long, one * long.length)
|
158
163
|
end
|
159
164
|
|
160
165
|
# To get the replacement positions, (position, old_length, new_length), for consecutive whitespaces
|
@@ -182,6 +187,7 @@ class TextAlignment::CharMapping
|
|
182
187
|
|
183
188
|
# To execute the long letter mapping
|
184
189
|
char_mapping.each do |one, long|
|
190
|
+
next unless text =~ /#{one}/
|
185
191
|
text.gsub!(one * long.length, one) if long.length > 1
|
186
192
|
end
|
187
193
|
|
@@ -17,7 +17,7 @@ class TextAlignment::MixedAlignment
|
|
17
17
|
attr_reader :similarity
|
18
18
|
attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
|
19
19
|
|
20
|
-
def initialize(_str1, _str2,
|
20
|
+
def initialize(_str1, _str2, mappings = nil)
|
21
21
|
raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
|
22
22
|
|
23
23
|
mappings ||= TextAlignment::CHAR_MAPPING
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-01-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-dictionary
|