text_alignment 0.12.4 → 0.12.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 223c09ef2225dcd6f9e8af5f0ee4c8653c725a19db30aeba1ac9cba7747bd79f
|
4
|
+
data.tar.gz: fef76edf37b17523ac9eaff68767cf0c9bb37fc595b9282c3a1f341071aaab7e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1fb1960ff183d185186cc3e16822fffa7046e438f36cf894cade2988cf2ecd99d8ce700facb9b7e02e982ac06006ba5d8dfdac5cf14b5d0b9619280d7444166e
|
7
|
+
data.tar.gz: f89a4db2eb2bd9b594f3202d91727d4be7918f167342ec18156d4050047ba395fd30969dddf8886d1a22a45508887374744d6f1f26f27cd40b58ead67522c4a6
|
@@ -79,7 +79,7 @@ TextAlignment::CHAR_MAPPING = [
|
|
79
79
|
]
|
80
80
|
|
81
81
|
# build a string of every Unicode whitespace codepoint \s covers:
|
82
|
-
|
82
|
+
_WS_LIST = [
|
83
83
|
"\u0009", # CHARACTER TABULATION
|
84
84
|
"\u000A", # LINE FEED
|
85
85
|
"\u000B", # LINE TABULATION
|
@@ -105,7 +105,10 @@ ALL_WS = [
|
|
105
105
|
"\u202F", # NARROW NO-BREAK SPACE
|
106
106
|
"\u205F", # MEDIUM MATHEMATICAL SPACE
|
107
107
|
"\u3000", # IDEOGRAPHIC SPACE
|
108
|
-
].
|
108
|
+
].freeze
|
109
|
+
|
110
|
+
ALL_WS = _WS_LIST.join.freeze
|
111
|
+
Ractor.make_shareable(ALL_WS)
|
109
112
|
|
110
113
|
class TextAlignment::CharMapping
|
111
114
|
attr_reader :mapped_text, :index_enmap
|
@@ -151,7 +154,8 @@ class TextAlignment::CharMapping
|
|
151
154
|
|
152
155
|
# To perform the single letter mapping replacement
|
153
156
|
char_mapping.each do |one, long|
|
154
|
-
text.gsub!(one, long) if long.length == 1
|
157
|
+
# text.gsub!(one, long) if long.length == 1
|
158
|
+
text.tr!(one, long) if long.length == 1
|
155
159
|
end
|
156
160
|
|
157
161
|
# To get the replacement positions, (position, old_length, new_length), for char mappings
|
@@ -21,7 +21,7 @@ class << TextAlignment
|
|
21
21
|
mappings.delete_if{|m| m[0].length == 1 && m[1].length == 1}
|
22
22
|
characters_from = character_mappings.collect{|m| m[0]}.join
|
23
23
|
characters_to = character_mappings.collect{|m| m[1]}.join
|
24
|
-
characters_to.gsub!(
|
24
|
+
characters_to.gsub!('-', '\-')
|
25
25
|
|
26
26
|
source.tr!(characters_from, characters_to)
|
27
27
|
targets.each{|target| target[:text].tr!(characters_from, characters_to)}
|
@@ -10,7 +10,7 @@ class << TextAlignment
|
|
10
10
|
character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
|
11
11
|
characters_from = character_mappings.collect{|m| m[0]}.join
|
12
12
|
characters_to = character_mappings.collect{|m| m[1]}.join
|
13
|
-
characters_to.gsub!(
|
13
|
+
characters_to.gsub!('-', '\-')
|
14
14
|
|
15
15
|
str1.tr!(characters_from, characters_to)
|
16
16
|
|
@@ -13,6 +13,7 @@ end
|
|
13
13
|
class TextAlignment::LCSMin
|
14
14
|
attr_reader :sdiff, :lcs, :m1_initial, :m1_final, :m2_initial, :m2_final
|
15
15
|
|
16
|
+
WHITESPACE = ["\t", "\n", "\v", "\f", "\r", " "].join
|
16
17
|
PLACEHOLDER_CHAR = '_'
|
17
18
|
|
18
19
|
def initialize (str1, str2)
|
@@ -23,7 +24,7 @@ class TextAlignment::LCSMin
|
|
23
24
|
# str2 is copied with w/s characters replaced with the placeholder characters,
|
24
25
|
# to avoid overfitting to w/s characters during LCS computation.
|
25
26
|
@str1 = str1
|
26
|
-
@str2 = str2.
|
27
|
+
@str2 = str2.tr(WHITESPACE, PLACEHOLDER_CHAR)
|
27
28
|
|
28
29
|
# find the corresponding minimal range of the two strings
|
29
30
|
r = _find_min_range(0, @str1.length - 1, 0, @str2.length - 1)
|