text_alignment 0.12.1 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/text_alignment/char_mapping.rb +11 -5
- data/lib/text_alignment/mixed_alignment.rb +1 -1
- data/lib/text_alignment/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3c2a36fe4cfde7dfb76f554fd4afcae7cb5a03e455887621217f5e5e633b20b3
|
4
|
+
data.tar.gz: f63070c6f423bc15d0fc8c742a21238a104a1b2c1d3fe56ac436effa8ef8eacf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '078a41bc6ab9b16e7747be6a3bb15aff4b23a1161bcea0b653a93f04d673799afcac2109cd1ce8d1a95c99c5c07d36842e3698c9f0997500e653fb4ab939e04a'
|
7
|
+
data.tar.gz: ce44d334779d43b3057317537f615ebf39b8049639d3e50c4e14272c952b76a5df2b060cfd3a15f1d28372c11a795ef8bf43cb04de32b5a78ce2f44433edddfb
|
@@ -93,6 +93,7 @@ class TextAlignment::CharMapping
|
|
93
93
|
|
94
94
|
@text = _text
|
95
95
|
|
96
|
+
# sort by the length of the spell-outs is important
|
96
97
|
char_mapping ||= TextAlignment::CHAR_MAPPING.sort{|a, b| b[1].length <=> a[1].length}
|
97
98
|
@mapped_text, offset_mapping = enmap_text(_text, char_mapping)
|
98
99
|
@index_enmap = offset_mapping.to_h
|
@@ -137,7 +138,7 @@ class TextAlignment::CharMapping
|
|
137
138
|
def enmap_text(_text, char_mapping, no_ws = false)
|
138
139
|
text = _text.dup
|
139
140
|
|
140
|
-
# To
|
141
|
+
# To perform the single letter mapping replacement
|
141
142
|
char_mapping.each do |one, long|
|
142
143
|
text.gsub!(one, long) if long.length == 1
|
143
144
|
end
|
@@ -149,12 +150,16 @@ class TextAlignment::CharMapping
|
|
149
150
|
|
150
151
|
init_next = 0
|
151
152
|
while loc = text.index(long, init_next)
|
152
|
-
|
153
|
+
# Huristics to check if the surrounding letters are sufficiently distinguished.
|
154
|
+
if long.length > 3 || ((text[loc - 1, 2] !~ /[a-z][a-z]/) && (text[loc + long.length - 1, 2] !~ /[a-z][a-z]/))
|
155
|
+
# if true
|
156
|
+
rpositions << [loc, long.length, 1]
|
157
|
+
|
158
|
+
# a workaround to avoid messing-up due to embedding
|
159
|
+
text[loc, long.length] = one * long.length
|
160
|
+
end
|
153
161
|
init_next = loc + long.length
|
154
162
|
end
|
155
|
-
|
156
|
-
# a workaround to avoid messing-up due to embedding
|
157
|
-
text.gsub!(long, one * long.length)
|
158
163
|
end
|
159
164
|
|
160
165
|
# To get the replacement positions, (position, old_length, new_length), for consecutive whitespaces
|
@@ -182,6 +187,7 @@ class TextAlignment::CharMapping
|
|
182
187
|
|
183
188
|
# To execute the long letter mapping
|
184
189
|
char_mapping.each do |one, long|
|
190
|
+
next unless text =~ /#{one}/
|
185
191
|
text.gsub!(one * long.length, one) if long.length > 1
|
186
192
|
end
|
187
193
|
|
@@ -17,7 +17,7 @@ class TextAlignment::MixedAlignment
|
|
17
17
|
attr_reader :similarity
|
18
18
|
attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
|
19
19
|
|
20
|
-
def initialize(_str1, _str2,
|
20
|
+
def initialize(_str1, _str2, mappings = nil)
|
21
21
|
raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
|
22
22
|
|
23
23
|
mappings ||= TextAlignment::CHAR_MAPPING
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-01-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-dictionary
|