text_alignment 0.12.1 → 0.12.3
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2345340266a0e66e9d26daaa51db1c9239bb837f52a5112a5c525a6d87b120d5
|
4
|
+
data.tar.gz: d3d1d118786e89a4bd7f9a6a9315643967c3ae099f8072913862ded9c895bfa5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9fc6c3235373f0e0174a922f006dd2cdf687361dfd567056137d20707b674ae75a40c13862d5a02946a225c19dfdae239ebf3f274a47bb4df1c8b2256bd968e2
|
7
|
+
data.tar.gz: 50dede22ed93d9e93a21dbbec5de39f3e342147c512d620a341331c4ddee6d0cd2cd08535e484732fd33e24ec1d8789d4daa7ef79191b13bcb9908634fcafe13
|
@@ -93,6 +93,7 @@ class TextAlignment::CharMapping
|
|
93
93
|
|
94
94
|
@text = _text
|
95
95
|
|
96
|
+
# sort by the length of the spell-outs is important
|
96
97
|
char_mapping ||= TextAlignment::CHAR_MAPPING.sort{|a, b| b[1].length <=> a[1].length}
|
97
98
|
@mapped_text, offset_mapping = enmap_text(_text, char_mapping)
|
98
99
|
@index_enmap = offset_mapping.to_h
|
@@ -100,11 +101,11 @@ class TextAlignment::CharMapping
|
|
100
101
|
end
|
101
102
|
|
102
103
|
def enmap_position(position)
|
103
|
-
@index_enmap[position]
|
104
|
+
@index_enmap[position]
|
104
105
|
end
|
105
106
|
|
106
107
|
def demap_position(position)
|
107
|
-
@index_demap[position]
|
108
|
+
@index_demap[position]
|
108
109
|
end
|
109
110
|
|
110
111
|
def enmap_denotations(denotations)
|
@@ -112,23 +113,6 @@ class TextAlignment::CharMapping
|
|
112
113
|
|
113
114
|
denotations.map do |d|
|
114
115
|
d.dup.merge(span:{begin:enmap_position(d[:span][:begin]), end:enmap_position(d[:span][:end])})
|
115
|
-
rescue ArgumentError => e
|
116
|
-
snippet_begin = d[:span][:begin] - 5
|
117
|
-
if snippet_begin < 0
|
118
|
-
snippet_begin = 0
|
119
|
-
end
|
120
|
-
snippet_end = d[:span][:end] + 5
|
121
|
-
if snippet_end > @text.length
|
122
|
-
snippet_end = @text.length
|
123
|
-
end
|
124
|
-
snippet = @text[snippet_begin ... d[:span][:begin]] + '[' + @text[d[:span][:begin] ... d[:span][:end]] + ']' + @text[d[:span][:end] ... snippet_end]
|
125
|
-
if snippet_begin > 0
|
126
|
-
snippet = '...' + snippet
|
127
|
-
end
|
128
|
-
if snippet_end < @text.length
|
129
|
-
snippet = snippet + '...'
|
130
|
-
end
|
131
|
-
raise ArgumentError, e.message + " (#{snippet})"
|
132
116
|
end
|
133
117
|
end
|
134
118
|
|
@@ -137,7 +121,7 @@ class TextAlignment::CharMapping
|
|
137
121
|
def enmap_text(_text, char_mapping, no_ws = false)
|
138
122
|
text = _text.dup
|
139
123
|
|
140
|
-
# To
|
124
|
+
# To perform the single letter mapping replacement
|
141
125
|
char_mapping.each do |one, long|
|
142
126
|
text.gsub!(one, long) if long.length == 1
|
143
127
|
end
|
@@ -149,12 +133,16 @@ class TextAlignment::CharMapping
|
|
149
133
|
|
150
134
|
init_next = 0
|
151
135
|
while loc = text.index(long, init_next)
|
152
|
-
|
136
|
+
# Huristics to check if the surrounding letters are sufficiently distinguished.
|
137
|
+
if long.length > 3 || ((text[loc - 1, 2] !~ /[a-z][a-z]/) && (text[loc + long.length - 1, 2] !~ /[a-z][a-z]/))
|
138
|
+
# if true
|
139
|
+
rpositions << [loc, long.length, 1]
|
140
|
+
|
141
|
+
# a workaround to avoid messing-up due to embedding
|
142
|
+
text[loc, long.length] = one * long.length
|
143
|
+
end
|
153
144
|
init_next = loc + long.length
|
154
145
|
end
|
155
|
-
|
156
|
-
# a workaround to avoid messing-up due to embedding
|
157
|
-
text.gsub!(long, one * long.length)
|
158
146
|
end
|
159
147
|
|
160
148
|
# To get the replacement positions, (position, old_length, new_length), for consecutive whitespaces
|
@@ -182,6 +170,7 @@ class TextAlignment::CharMapping
|
|
182
170
|
|
183
171
|
# To execute the long letter mapping
|
184
172
|
char_mapping.each do |one, long|
|
173
|
+
next unless text =~ /#{one}/
|
185
174
|
text.gsub!(one * long.length, one) if long.length > 1
|
186
175
|
end
|
187
176
|
|
@@ -17,7 +17,7 @@ class TextAlignment::MixedAlignment
|
|
17
17
|
attr_reader :similarity
|
18
18
|
attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
|
19
19
|
|
20
|
-
def initialize(_str1, _str2,
|
20
|
+
def initialize(_str1, _str2, mappings = nil)
|
21
21
|
raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
|
22
22
|
|
23
23
|
mappings ||= TextAlignment::CHAR_MAPPING
|
@@ -40,12 +40,12 @@ class TextAlignment::TextAlignment
|
|
40
40
|
end
|
41
41
|
|
42
42
|
@mapped_text = @text_mapping.mapped_text
|
43
|
-
denotations_mapped = @text_mapping.enmap_denotations(denotations)
|
44
43
|
|
45
44
|
## To generate the block_alignment of the input text against the reference text
|
46
45
|
@blocks = if r = whole_block_alignment(@mapped_text, @mapped_reference_text, @cultivation_map)
|
47
46
|
r
|
48
47
|
else
|
48
|
+
denotations_mapped = @text_mapping.enmap_denotations(denotations)
|
49
49
|
find_block_alignment(@mapped_text, @mapped_reference_text, denotations_mapped, @cultivation_map)
|
50
50
|
end
|
51
51
|
|
@@ -343,7 +343,7 @@ class TextAlignment::TextAlignment
|
|
343
343
|
|
344
344
|
## term-based alignment
|
345
345
|
tblocks = if denotations
|
346
|
-
denotations_in_scope = denotations.select{|d| d[:span][:begin] >= b1 && d[:span][:end] <= e1}.
|
346
|
+
denotations_in_scope = denotations.select{|d| d[:span][:begin] && d[:span][:end] && d[:span][:begin] >= b1 && d[:span][:end] <= e1}.
|
347
347
|
sort{|d1, d2| d1[:span][:begin] <=> d2[:span][:begin] || d2[:span][:end] <=> d1[:span][:end] }.
|
348
348
|
map{|d| d.merge(lex:str1[d[:span][:begin] ... d[:span][:end]])}
|
349
349
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-dictionary
|