text_alignment 0.12.1 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '097a3da5324e29b872a7927e2725ce82120f328ce1961cb24aaa5e058cf1b265'
4
- data.tar.gz: 727bcfbffe4c30535a3b0bd01f4d14cdd1f4ccf64c5f337549da116e484e5e35
3
+ metadata.gz: 2345340266a0e66e9d26daaa51db1c9239bb837f52a5112a5c525a6d87b120d5
4
+ data.tar.gz: d3d1d118786e89a4bd7f9a6a9315643967c3ae099f8072913862ded9c895bfa5
5
5
  SHA512:
6
- metadata.gz: 0bb88bc2f353bca72b1cc1488063c3ac3d9f3164c3d877bbef8b17876311a99f08143fced8d8ccaa7283cce1b4e6a1ba265435ef71d6c30979da0a5045462290
7
- data.tar.gz: ef1a61192ade8cabe0b8960619f3200e1312b3cb6766d4b4dfc4a4685ab8af8397c9647cbc6e50cc6047b5ca4a2daaf7b47e669655616082fa99c06f6d3f1e26
6
+ metadata.gz: 9fc6c3235373f0e0174a922f006dd2cdf687361dfd567056137d20707b674ae75a40c13862d5a02946a225c19dfdae239ebf3f274a47bb4df1c8b2256bd968e2
7
+ data.tar.gz: 50dede22ed93d9e93a21dbbec5de39f3e342147c512d620a341331c4ddee6d0cd2cd08535e484732fd33e24ec1d8789d4daa7ef79191b13bcb9908634fcafe13
@@ -93,6 +93,7 @@ class TextAlignment::CharMapping
93
93
 
94
94
  @text = _text
95
95
 
96
+ # sort by the length of the spell-outs is important
96
97
  char_mapping ||= TextAlignment::CHAR_MAPPING.sort{|a, b| b[1].length <=> a[1].length}
97
98
  @mapped_text, offset_mapping = enmap_text(_text, char_mapping)
98
99
  @index_enmap = offset_mapping.to_h
@@ -100,11 +101,11 @@ class TextAlignment::CharMapping
100
101
  end
101
102
 
102
103
  def enmap_position(position)
103
- @index_enmap[position] || raise(ArgumentError, "Unusual position of annotation: #{position}")
104
+ @index_enmap[position]
104
105
  end
105
106
 
106
107
  def demap_position(position)
107
- @index_demap[position] || raise(ArgumentError, "Unusual position of annotation: #{position}")
108
+ @index_demap[position]
108
109
  end
109
110
 
110
111
  def enmap_denotations(denotations)
@@ -112,23 +113,6 @@ class TextAlignment::CharMapping
112
113
 
113
114
  denotations.map do |d|
114
115
  d.dup.merge(span:{begin:enmap_position(d[:span][:begin]), end:enmap_position(d[:span][:end])})
115
- rescue ArgumentError => e
116
- snippet_begin = d[:span][:begin] - 5
117
- if snippet_begin < 0
118
- snippet_begin = 0
119
- end
120
- snippet_end = d[:span][:end] + 5
121
- if snippet_end > @text.length
122
- snippet_end = @text.length
123
- end
124
- snippet = @text[snippet_begin ... d[:span][:begin]] + '[' + @text[d[:span][:begin] ... d[:span][:end]] + ']' + @text[d[:span][:end] ... snippet_end]
125
- if snippet_begin > 0
126
- snippet = '...' + snippet
127
- end
128
- if snippet_end < @text.length
129
- snippet = snippet + '...'
130
- end
131
- raise ArgumentError, e.message + " (#{snippet})"
132
116
  end
133
117
  end
134
118
 
@@ -137,7 +121,7 @@ class TextAlignment::CharMapping
137
121
  def enmap_text(_text, char_mapping, no_ws = false)
138
122
  text = _text.dup
139
123
 
140
- # To execute the single letter mapping replacement
124
+ # To perform the single letter mapping replacement
141
125
  char_mapping.each do |one, long|
142
126
  text.gsub!(one, long) if long.length == 1
143
127
  end
@@ -149,12 +133,16 @@ class TextAlignment::CharMapping
149
133
 
150
134
  init_next = 0
151
135
  while loc = text.index(long, init_next)
152
- rpositions << [loc, long.length, 1]
136
+ # Huristics to check if the surrounding letters are sufficiently distinguished.
137
+ if long.length > 3 || ((text[loc - 1, 2] !~ /[a-z][a-z]/) && (text[loc + long.length - 1, 2] !~ /[a-z][a-z]/))
138
+ # if true
139
+ rpositions << [loc, long.length, 1]
140
+
141
+ # a workaround to avoid messing-up due to embedding
142
+ text[loc, long.length] = one * long.length
143
+ end
153
144
  init_next = loc + long.length
154
145
  end
155
-
156
- # a workaround to avoid messing-up due to embedding
157
- text.gsub!(long, one * long.length)
158
146
  end
159
147
 
160
148
  # To get the replacement positions, (position, old_length, new_length), for consecutive whitespaces
@@ -182,6 +170,7 @@ class TextAlignment::CharMapping
182
170
 
183
171
  # To execute the long letter mapping
184
172
  char_mapping.each do |one, long|
173
+ next unless text =~ /#{one}/
185
174
  text.gsub!(one * long.length, one) if long.length > 1
186
175
  end
187
176
 
@@ -17,7 +17,7 @@ class TextAlignment::MixedAlignment
17
17
  attr_reader :similarity
18
18
  attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
19
19
 
20
- def initialize(_str1, _str2, _mappings = nil)
20
+ def initialize(_str1, _str2, mappings = nil)
21
21
  raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
22
22
 
23
23
  mappings ||= TextAlignment::CHAR_MAPPING
@@ -40,12 +40,12 @@ class TextAlignment::TextAlignment
40
40
  end
41
41
 
42
42
  @mapped_text = @text_mapping.mapped_text
43
- denotations_mapped = @text_mapping.enmap_denotations(denotations)
44
43
 
45
44
  ## To generate the block_alignment of the input text against the reference text
46
45
  @blocks = if r = whole_block_alignment(@mapped_text, @mapped_reference_text, @cultivation_map)
47
46
  r
48
47
  else
48
+ denotations_mapped = @text_mapping.enmap_denotations(denotations)
49
49
  find_block_alignment(@mapped_text, @mapped_reference_text, denotations_mapped, @cultivation_map)
50
50
  end
51
51
 
@@ -343,7 +343,7 @@ class TextAlignment::TextAlignment
343
343
 
344
344
  ## term-based alignment
345
345
  tblocks = if denotations
346
- denotations_in_scope = denotations.select{|d| d[:span][:begin] >= b1 && d[:span][:end] <= e1}.
346
+ denotations_in_scope = denotations.select{|d| d[:span][:begin] && d[:span][:end] && d[:span][:begin] >= b1 && d[:span][:end] <= e1}.
347
347
  sort{|d1, d2| d1[:span][:begin] <=> d2[:span][:begin] || d2[:span][:end] <=> d1[:span][:end] }.
348
348
  map{|d| d.merge(lex:str1[d[:span][:begin] ... d[:span][:end]])}
349
349
 
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.12.1'
2
+ VERSION = '0.12.3'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.1
4
+ version: 0.12.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-26 00:00:00.000000000 Z
11
+ date: 2024-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary