text_alignment 0.12.0 → 0.12.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fc90a3ea0092a4e2672fd40a3bf3e6bb25fba612fc663fc7c249741e79b7a990
4
- data.tar.gz: 8676468d98902404d400b78711668930c9aaed268ddc1f555283b67fb731e2dd
3
+ metadata.gz: 3c2a36fe4cfde7dfb76f554fd4afcae7cb5a03e455887621217f5e5e633b20b3
4
+ data.tar.gz: f63070c6f423bc15d0fc8c742a21238a104a1b2c1d3fe56ac436effa8ef8eacf
5
5
  SHA512:
6
- metadata.gz: 8194112635d7ffebe3983ae4393c05183ea86e3beab2efe645d512cb56c59f38187ab62d5d7bb7fb589ca20989a6763d42af0d881fdff4bb94be69954d9cb3c2
7
- data.tar.gz: 2907241a5a589b2ab3bfee5e08ebf3c498af79f18ba3bd4d2ae3ef543681d251db2213f3d3dffe441aa39263136b7ea755fa4376fa46acc02af0beca379d2a5e
6
+ metadata.gz: '078a41bc6ab9b16e7747be6a3bb15aff4b23a1161bcea0b653a93f04d673799afcac2109cd1ce8d1a95c99c5c07d36842e3698c9f0997500e653fb4ab939e04a'
7
+ data.tar.gz: ce44d334779d43b3057317537f615ebf39b8049639d3e50c4e14272c952b76a5df2b060cfd3a15f1d28372c11a795ef8bf43cb04de32b5a78ce2f44433edddfb
@@ -91,6 +91,9 @@ class TextAlignment::CharMapping
91
91
  @method_squeeze_ws = method(:squeeze_ws_1!)
92
92
  end
93
93
 
94
+ @text = _text
95
+
96
+ # sort by the length of the spell-outs is important
94
97
  char_mapping ||= TextAlignment::CHAR_MAPPING.sort{|a, b| b[1].length <=> a[1].length}
95
98
  @mapped_text, offset_mapping = enmap_text(_text, char_mapping)
96
99
  @index_enmap = offset_mapping.to_h
@@ -98,11 +101,11 @@ class TextAlignment::CharMapping
98
101
  end
99
102
 
100
103
  def enmap_position(position)
101
- @index_enmap[position] || raise(ArgumentError, "Unusual position for annotation: #{position}")
104
+ @index_enmap[position] || raise(ArgumentError, "Unusual position of annotation: #{position}")
102
105
  end
103
106
 
104
107
  def demap_position(position)
105
- @index_demap[position] || raise(ArgumentError, "Unusual position for annotation: #{position}")
108
+ @index_demap[position] || raise(ArgumentError, "Unusual position of annotation: #{position}")
106
109
  end
107
110
 
108
111
  def enmap_denotations(denotations)
@@ -110,6 +113,23 @@ class TextAlignment::CharMapping
110
113
 
111
114
  denotations.map do |d|
112
115
  d.dup.merge(span:{begin:enmap_position(d[:span][:begin]), end:enmap_position(d[:span][:end])})
116
+ rescue ArgumentError => e
117
+ snippet_begin = d[:span][:begin] - 5
118
+ if snippet_begin < 0
119
+ snippet_begin = 0
120
+ end
121
+ snippet_end = d[:span][:end] + 5
122
+ if snippet_end > @text.length
123
+ snippet_end = @text.length
124
+ end
125
+ snippet = @text[snippet_begin ... d[:span][:begin]] + '[' + @text[d[:span][:begin] ... d[:span][:end]] + ']' + @text[d[:span][:end] ... snippet_end]
126
+ if snippet_begin > 0
127
+ snippet = '...' + snippet
128
+ end
129
+ if snippet_end < @text.length
130
+ snippet = snippet + '...'
131
+ end
132
+ raise ArgumentError, e.message + " (#{snippet})"
113
133
  end
114
134
  end
115
135
 
@@ -118,7 +138,7 @@ class TextAlignment::CharMapping
118
138
  def enmap_text(_text, char_mapping, no_ws = false)
119
139
  text = _text.dup
120
140
 
121
- # To execute the single letter mapping replacement
141
+ # To perform the single letter mapping replacement
122
142
  char_mapping.each do |one, long|
123
143
  text.gsub!(one, long) if long.length == 1
124
144
  end
@@ -130,12 +150,16 @@ class TextAlignment::CharMapping
130
150
 
131
151
  init_next = 0
132
152
  while loc = text.index(long, init_next)
133
- rpositions << [loc, long.length, 1]
153
+ # Huristics to check if the surrounding letters are sufficiently distinguished.
154
+ if long.length > 3 || ((text[loc - 1, 2] !~ /[a-z][a-z]/) && (text[loc + long.length - 1, 2] !~ /[a-z][a-z]/))
155
+ # if true
156
+ rpositions << [loc, long.length, 1]
157
+
158
+ # a workaround to avoid messing-up due to embedding
159
+ text[loc, long.length] = one * long.length
160
+ end
134
161
  init_next = loc + long.length
135
162
  end
136
-
137
- # a workaround to avoid messing-up due to embedding
138
- text.gsub!(long, one * long.length)
139
163
  end
140
164
 
141
165
  # To get the replacement positions, (position, old_length, new_length), for consecutive whitespaces
@@ -163,6 +187,7 @@ class TextAlignment::CharMapping
163
187
 
164
188
  # To execute the long letter mapping
165
189
  char_mapping.each do |one, long|
190
+ next unless text =~ /#{one}/
166
191
  text.gsub!(one * long.length, one) if long.length > 1
167
192
  end
168
193
 
@@ -17,7 +17,7 @@ class TextAlignment::MixedAlignment
17
17
  attr_reader :similarity
18
18
  attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
19
19
 
20
- def initialize(_str1, _str2, _mappings = nil)
20
+ def initialize(_str1, _str2, mappings = nil)
21
21
  raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
22
22
 
23
23
  mappings ||= TextAlignment::CHAR_MAPPING
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.12.0'
2
+ VERSION = '0.12.2'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.0
4
+ version: 0.12.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-21 00:00:00.000000000 Z
11
+ date: 2024-01-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary