text_alignment 0.12.1 → 0.12.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '097a3da5324e29b872a7927e2725ce82120f328ce1961cb24aaa5e058cf1b265'
4
- data.tar.gz: 727bcfbffe4c30535a3b0bd01f4d14cdd1f4ccf64c5f337549da116e484e5e35
3
+ metadata.gz: 2345340266a0e66e9d26daaa51db1c9239bb837f52a5112a5c525a6d87b120d5
4
+ data.tar.gz: d3d1d118786e89a4bd7f9a6a9315643967c3ae099f8072913862ded9c895bfa5
5
5
  SHA512:
6
- metadata.gz: 0bb88bc2f353bca72b1cc1488063c3ac3d9f3164c3d877bbef8b17876311a99f08143fced8d8ccaa7283cce1b4e6a1ba265435ef71d6c30979da0a5045462290
7
- data.tar.gz: ef1a61192ade8cabe0b8960619f3200e1312b3cb6766d4b4dfc4a4685ab8af8397c9647cbc6e50cc6047b5ca4a2daaf7b47e669655616082fa99c06f6d3f1e26
6
+ metadata.gz: 9fc6c3235373f0e0174a922f006dd2cdf687361dfd567056137d20707b674ae75a40c13862d5a02946a225c19dfdae239ebf3f274a47bb4df1c8b2256bd968e2
7
+ data.tar.gz: 50dede22ed93d9e93a21dbbec5de39f3e342147c512d620a341331c4ddee6d0cd2cd08535e484732fd33e24ec1d8789d4daa7ef79191b13bcb9908634fcafe13
@@ -93,6 +93,7 @@ class TextAlignment::CharMapping
93
93
 
94
94
  @text = _text
95
95
 
96
+ # sort by the length of the spell-outs is important
96
97
  char_mapping ||= TextAlignment::CHAR_MAPPING.sort{|a, b| b[1].length <=> a[1].length}
97
98
  @mapped_text, offset_mapping = enmap_text(_text, char_mapping)
98
99
  @index_enmap = offset_mapping.to_h
@@ -100,11 +101,11 @@ class TextAlignment::CharMapping
100
101
  end
101
102
 
102
103
  def enmap_position(position)
103
- @index_enmap[position] || raise(ArgumentError, "Unusual position of annotation: #{position}")
104
+ @index_enmap[position]
104
105
  end
105
106
 
106
107
  def demap_position(position)
107
- @index_demap[position] || raise(ArgumentError, "Unusual position of annotation: #{position}")
108
+ @index_demap[position]
108
109
  end
109
110
 
110
111
  def enmap_denotations(denotations)
@@ -112,23 +113,6 @@ class TextAlignment::CharMapping
112
113
 
113
114
  denotations.map do |d|
114
115
  d.dup.merge(span:{begin:enmap_position(d[:span][:begin]), end:enmap_position(d[:span][:end])})
115
- rescue ArgumentError => e
116
- snippet_begin = d[:span][:begin] - 5
117
- if snippet_begin < 0
118
- snippet_begin = 0
119
- end
120
- snippet_end = d[:span][:end] + 5
121
- if snippet_end > @text.length
122
- snippet_end = @text.length
123
- end
124
- snippet = @text[snippet_begin ... d[:span][:begin]] + '[' + @text[d[:span][:begin] ... d[:span][:end]] + ']' + @text[d[:span][:end] ... snippet_end]
125
- if snippet_begin > 0
126
- snippet = '...' + snippet
127
- end
128
- if snippet_end < @text.length
129
- snippet = snippet + '...'
130
- end
131
- raise ArgumentError, e.message + " (#{snippet})"
132
116
  end
133
117
  end
134
118
 
@@ -137,7 +121,7 @@ class TextAlignment::CharMapping
137
121
  def enmap_text(_text, char_mapping, no_ws = false)
138
122
  text = _text.dup
139
123
 
140
- # To execute the single letter mapping replacement
124
+ # To perform the single letter mapping replacement
141
125
  char_mapping.each do |one, long|
142
126
  text.gsub!(one, long) if long.length == 1
143
127
  end
@@ -149,12 +133,16 @@ class TextAlignment::CharMapping
149
133
 
150
134
  init_next = 0
151
135
  while loc = text.index(long, init_next)
152
- rpositions << [loc, long.length, 1]
136
+ # Huristics to check if the surrounding letters are sufficiently distinguished.
137
+ if long.length > 3 || ((text[loc - 1, 2] !~ /[a-z][a-z]/) && (text[loc + long.length - 1, 2] !~ /[a-z][a-z]/))
138
+ # if true
139
+ rpositions << [loc, long.length, 1]
140
+
141
+ # a workaround to avoid messing-up due to embedding
142
+ text[loc, long.length] = one * long.length
143
+ end
153
144
  init_next = loc + long.length
154
145
  end
155
-
156
- # a workaround to avoid messing-up due to embedding
157
- text.gsub!(long, one * long.length)
158
146
  end
159
147
 
160
148
  # To get the replacement positions, (position, old_length, new_length), for consecutive whitespaces
@@ -182,6 +170,7 @@ class TextAlignment::CharMapping
182
170
 
183
171
  # To execute the long letter mapping
184
172
  char_mapping.each do |one, long|
173
+ next unless text =~ /#{one}/
185
174
  text.gsub!(one * long.length, one) if long.length > 1
186
175
  end
187
176
 
@@ -17,7 +17,7 @@ class TextAlignment::MixedAlignment
17
17
  attr_reader :similarity
18
18
  attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
19
19
 
20
- def initialize(_str1, _str2, _mappings = nil)
20
+ def initialize(_str1, _str2, mappings = nil)
21
21
  raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
22
22
 
23
23
  mappings ||= TextAlignment::CHAR_MAPPING
@@ -40,12 +40,12 @@ class TextAlignment::TextAlignment
40
40
  end
41
41
 
42
42
  @mapped_text = @text_mapping.mapped_text
43
- denotations_mapped = @text_mapping.enmap_denotations(denotations)
44
43
 
45
44
  ## To generate the block_alignment of the input text against the reference text
46
45
  @blocks = if r = whole_block_alignment(@mapped_text, @mapped_reference_text, @cultivation_map)
47
46
  r
48
47
  else
48
+ denotations_mapped = @text_mapping.enmap_denotations(denotations)
49
49
  find_block_alignment(@mapped_text, @mapped_reference_text, denotations_mapped, @cultivation_map)
50
50
  end
51
51
 
@@ -343,7 +343,7 @@ class TextAlignment::TextAlignment
343
343
 
344
344
  ## term-based alignment
345
345
  tblocks = if denotations
346
- denotations_in_scope = denotations.select{|d| d[:span][:begin] >= b1 && d[:span][:end] <= e1}.
346
+ denotations_in_scope = denotations.select{|d| d[:span][:begin] && d[:span][:end] && d[:span][:begin] >= b1 && d[:span][:end] <= e1}.
347
347
  sort{|d1, d2| d1[:span][:begin] <=> d2[:span][:begin] || d2[:span][:end] <=> d1[:span][:end] }.
348
348
  map{|d| d.merge(lex:str1[d[:span][:begin] ... d[:span][:end]])}
349
349
 
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.12.1'
2
+ VERSION = '0.12.3'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.1
4
+ version: 0.12.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-26 00:00:00.000000000 Z
11
+ date: 2024-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary