text_alignment 0.12.0 → 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fc90a3ea0092a4e2672fd40a3bf3e6bb25fba612fc663fc7c249741e79b7a990
4
- data.tar.gz: 8676468d98902404d400b78711668930c9aaed268ddc1f555283b67fb731e2dd
3
+ metadata.gz: 3c2a36fe4cfde7dfb76f554fd4afcae7cb5a03e455887621217f5e5e633b20b3
4
+ data.tar.gz: f63070c6f423bc15d0fc8c742a21238a104a1b2c1d3fe56ac436effa8ef8eacf
5
5
  SHA512:
6
- metadata.gz: 8194112635d7ffebe3983ae4393c05183ea86e3beab2efe645d512cb56c59f38187ab62d5d7bb7fb589ca20989a6763d42af0d881fdff4bb94be69954d9cb3c2
7
- data.tar.gz: 2907241a5a589b2ab3bfee5e08ebf3c498af79f18ba3bd4d2ae3ef543681d251db2213f3d3dffe441aa39263136b7ea755fa4376fa46acc02af0beca379d2a5e
6
+ metadata.gz: '078a41bc6ab9b16e7747be6a3bb15aff4b23a1161bcea0b653a93f04d673799afcac2109cd1ce8d1a95c99c5c07d36842e3698c9f0997500e653fb4ab939e04a'
7
+ data.tar.gz: ce44d334779d43b3057317537f615ebf39b8049639d3e50c4e14272c952b76a5df2b060cfd3a15f1d28372c11a795ef8bf43cb04de32b5a78ce2f44433edddfb
@@ -91,6 +91,9 @@ class TextAlignment::CharMapping
91
91
  @method_squeeze_ws = method(:squeeze_ws_1!)
92
92
  end
93
93
 
94
+ @text = _text
95
+
96
+ # sort by the length of the spell-outs is important
94
97
  char_mapping ||= TextAlignment::CHAR_MAPPING.sort{|a, b| b[1].length <=> a[1].length}
95
98
  @mapped_text, offset_mapping = enmap_text(_text, char_mapping)
96
99
  @index_enmap = offset_mapping.to_h
@@ -98,11 +101,11 @@ class TextAlignment::CharMapping
98
101
  end
99
102
 
100
103
  def enmap_position(position)
101
- @index_enmap[position] || raise(ArgumentError, "Unusual position for annotation: #{position}")
104
+ @index_enmap[position] || raise(ArgumentError, "Unusual position of annotation: #{position}")
102
105
  end
103
106
 
104
107
  def demap_position(position)
105
- @index_demap[position] || raise(ArgumentError, "Unusual position for annotation: #{position}")
108
+ @index_demap[position] || raise(ArgumentError, "Unusual position of annotation: #{position}")
106
109
  end
107
110
 
108
111
  def enmap_denotations(denotations)
@@ -110,6 +113,23 @@ class TextAlignment::CharMapping
110
113
 
111
114
  denotations.map do |d|
112
115
  d.dup.merge(span:{begin:enmap_position(d[:span][:begin]), end:enmap_position(d[:span][:end])})
116
+ rescue ArgumentError => e
117
+ snippet_begin = d[:span][:begin] - 5
118
+ if snippet_begin < 0
119
+ snippet_begin = 0
120
+ end
121
+ snippet_end = d[:span][:end] + 5
122
+ if snippet_end > @text.length
123
+ snippet_end = @text.length
124
+ end
125
+ snippet = @text[snippet_begin ... d[:span][:begin]] + '[' + @text[d[:span][:begin] ... d[:span][:end]] + ']' + @text[d[:span][:end] ... snippet_end]
126
+ if snippet_begin > 0
127
+ snippet = '...' + snippet
128
+ end
129
+ if snippet_end < @text.length
130
+ snippet = snippet + '...'
131
+ end
132
+ raise ArgumentError, e.message + " (#{snippet})"
113
133
  end
114
134
  end
115
135
 
@@ -118,7 +138,7 @@ class TextAlignment::CharMapping
118
138
  def enmap_text(_text, char_mapping, no_ws = false)
119
139
  text = _text.dup
120
140
 
121
- # To execute the single letter mapping replacement
141
+ # To perform the single letter mapping replacement
122
142
  char_mapping.each do |one, long|
123
143
  text.gsub!(one, long) if long.length == 1
124
144
  end
@@ -130,12 +150,16 @@ class TextAlignment::CharMapping
130
150
 
131
151
  init_next = 0
132
152
  while loc = text.index(long, init_next)
133
- rpositions << [loc, long.length, 1]
153
+ # Huristics to check if the surrounding letters are sufficiently distinguished.
154
+ if long.length > 3 || ((text[loc - 1, 2] !~ /[a-z][a-z]/) && (text[loc + long.length - 1, 2] !~ /[a-z][a-z]/))
155
+ # if true
156
+ rpositions << [loc, long.length, 1]
157
+
158
+ # a workaround to avoid messing-up due to embedding
159
+ text[loc, long.length] = one * long.length
160
+ end
134
161
  init_next = loc + long.length
135
162
  end
136
-
137
- # a workaround to avoid messing-up due to embedding
138
- text.gsub!(long, one * long.length)
139
163
  end
140
164
 
141
165
  # To get the replacement positions, (position, old_length, new_length), for consecutive whitespaces
@@ -163,6 +187,7 @@ class TextAlignment::CharMapping
163
187
 
164
188
  # To execute the long letter mapping
165
189
  char_mapping.each do |one, long|
190
+ next unless text =~ /#{one}/
166
191
  text.gsub!(one * long.length, one) if long.length > 1
167
192
  end
168
193
 
@@ -17,7 +17,7 @@ class TextAlignment::MixedAlignment
17
17
  attr_reader :similarity
18
18
  attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
19
19
 
20
- def initialize(_str1, _str2, _mappings = nil)
20
+ def initialize(_str1, _str2, mappings = nil)
21
21
  raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
22
22
 
23
23
  mappings ||= TextAlignment::CHAR_MAPPING
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.12.0'
2
+ VERSION = '0.12.2'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.0
4
+ version: 0.12.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-21 00:00:00.000000000 Z
11
+ date: 2024-01-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary