text_alignment 0.3.14 → 0.3.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f6f98465bb47a2b241dda72c8532530f5c7fdf4de49a403366bd08c256b7ff0e
4
- data.tar.gz: 44a6c920f8f05ab3ee29a0b9fe4de38e2f6fac2386838625b77d99486189ebf0
3
+ metadata.gz: ea57d01970fdb56a95a7929803949a553965692fb3f4748eec72fe026f9a79cf
4
+ data.tar.gz: 96397baa91646b3eb05a346ff699930b6dacf7d38075273b64ce7916f32d6275
5
5
  SHA512:
6
- metadata.gz: 17f038d6d7366b8223cdd66b5ef9f3d79c8ecc39f432ac15dbfd0f3311e1197bc9c40c5cbd38a69d5778278405dcd100bc18187870ee563a7e5999246845b049
7
- data.tar.gz: f0ded392d47821bc99c640700955686f14cc9550a13b3b8141af2af7f88f79400a3de6632f2bc3223c9e0dc82311d461de84a5ffa16aff443394b3c76540a74c
6
+ metadata.gz: 1d1e7650c35d9bae35a7f1dc2948dbf97fa8f71a86f1f83c5dda9cb64b7179e96ae219db88951e65c2988f078900d960784c4c74489a074654ed893c408be97f
7
+ data.tar.gz: 49afaec2b6332dc4038c1f0d0e930bf20ce61a6d4933ff3758bbbfea3e05cf347277aefc341d9c3b3d1f8a4692cec24b901528c2875875bacecc1caa7cf9159c
@@ -103,6 +103,8 @@ target_annotations = if source_annotations.class == Array
103
103
  else
104
104
  alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
105
105
 
106
+ pp alignment
107
+
106
108
  # verification
107
109
  source_text = source_annotations[:text]
108
110
  puts "=====BEGIN"
@@ -129,8 +131,6 @@ else
129
131
  puts
130
132
  puts "=====END"
131
133
 
132
- # pp alignment
133
-
134
134
  # alignment.block_alignments.each do |a|
135
135
  # if a[:alignment].nil? || a[:alignment] == :empty
136
136
  # # p [a[:source], a[:target]]
@@ -153,8 +153,16 @@ else
153
153
  p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
154
154
  end
155
155
  puts "====="
156
+ puts
156
157
 
157
158
  denotations = alignment.transform_hdenotations(source_annotations[:denotations])
159
+ puts "[Invalid transformation]"
160
+ denotations.each do |d|
161
+ p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < target_text.length
162
+ end
163
+ puts "====="
164
+ puts
165
+
158
166
  lost_annotations += alignment.lost_annotations if alignment.lost_annotations
159
167
 
160
168
  source_annotations.merge({text:target_text, denotations:denotations})
@@ -3,8 +3,8 @@ require 'string-similarity'
3
3
 
4
4
  module TextAlignment; end unless defined? TextAlignment
5
5
 
6
- TextAlignment::SIZE_NGRAM = 10 unless defined? TextAlignment::SIZE_NGRAM
7
- TextAlignment::SIZE_WINDOW = 20 unless defined? TextAlignment::SIZE_WINDOW
6
+ TextAlignment::SIZE_NGRAM = 8 unless defined? TextAlignment::SIZE_NGRAM
7
+ TextAlignment::SIZE_WINDOW = 40 unless defined? TextAlignment::SIZE_WINDOW
8
8
  TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.8 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
9
9
 
10
10
  class TextAlignment::AnchorFinder
@@ -23,6 +23,7 @@ class TextAlignment::AnchorFinder
23
23
 
24
24
  # current position in s1
25
25
  @beg_s1 = 0
26
+ @end_s1_prev = 0
26
27
  @end_s2_prev = 0
27
28
  end
28
29
 
@@ -31,8 +32,8 @@ class TextAlignment::AnchorFinder
31
32
  while @beg_s1 < (@s1.length - @size_ngram)
32
33
  anchor = @s1[@beg_s1, @size_ngram]
33
34
 
34
- search_position = 0
35
- # search_position = @end_s2_prev
35
+ # search_position = 0
36
+ search_position = @end_s2_prev
36
37
  while @beg_s2 = @s2.index(anchor, search_position)
37
38
  # if both the begining points are sufficiantly close to the end points of the last match
38
39
  break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
@@ -56,7 +57,7 @@ class TextAlignment::AnchorFinder
56
57
  # extend the block
57
58
  b1 = @beg_s1
58
59
  b2 = @beg_s2
59
- while b1 > -1 && b2 > -1 && @s1[b1] == @s2[b2]
60
+ while b1 >= @end_s1_prev && b2 > -1 && @s1[b1] == @s2[b2]
60
61
  b1 -= 1; b2 -= 1
61
62
  end
62
63
  b1 += 1; b2 += 1
@@ -40,6 +40,10 @@ class TextAlignment::TextAlignment
40
40
  end
41
41
  end
42
42
 
43
+ pp mblocks
44
+ puts "-----"
45
+ puts
46
+
43
47
  # mblocks.each do |b|
44
48
  # p [b[:source], b[:target]]
45
49
  # puts "---"
@@ -170,10 +174,12 @@ class TextAlignment::TextAlignment
170
174
  if begin_position == block_alignment[:source][:begin]
171
175
  block_alignment[:target][:begin]
172
176
  else
173
- raise "lost annotation"
177
+ # raise "lost annotation"
178
+ nil
174
179
  end
175
180
  else
176
- block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin]) + block_alignment[:target][:begin]
181
+ r = block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin])
182
+ r.nil? ? nil : r + block_alignment[:target][:begin]
177
183
  end
178
184
  end
179
185
 
@@ -187,10 +193,12 @@ class TextAlignment::TextAlignment
187
193
  if end_position == block_alignment[:source][:end]
188
194
  block_alignment[:target][:end]
189
195
  else
190
- raise "lost annotation"
196
+ # raise "lost annotation"
197
+ nil
191
198
  end
192
199
  else
193
- block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin]) + block_alignment[:target][:begin]
200
+ r = block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
201
+ r.nil? ? nil : r + block_alignment[:target][:begin]
194
202
  end
195
203
  end
196
204
 
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.3.14'
2
+ VERSION = '0.3.19'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.14
4
+ version: 0.3.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-13 00:00:00.000000000 Z
11
+ date: 2020-09-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary