text_alignment 0.3.14 → 0.3.19

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f6f98465bb47a2b241dda72c8532530f5c7fdf4de49a403366bd08c256b7ff0e
4
- data.tar.gz: 44a6c920f8f05ab3ee29a0b9fe4de38e2f6fac2386838625b77d99486189ebf0
3
+ metadata.gz: ea57d01970fdb56a95a7929803949a553965692fb3f4748eec72fe026f9a79cf
4
+ data.tar.gz: 96397baa91646b3eb05a346ff699930b6dacf7d38075273b64ce7916f32d6275
5
5
  SHA512:
6
- metadata.gz: 17f038d6d7366b8223cdd66b5ef9f3d79c8ecc39f432ac15dbfd0f3311e1197bc9c40c5cbd38a69d5778278405dcd100bc18187870ee563a7e5999246845b049
7
- data.tar.gz: f0ded392d47821bc99c640700955686f14cc9550a13b3b8141af2af7f88f79400a3de6632f2bc3223c9e0dc82311d461de84a5ffa16aff443394b3c76540a74c
6
+ metadata.gz: 1d1e7650c35d9bae35a7f1dc2948dbf97fa8f71a86f1f83c5dda9cb64b7179e96ae219db88951e65c2988f078900d960784c4c74489a074654ed893c408be97f
7
+ data.tar.gz: 49afaec2b6332dc4038c1f0d0e930bf20ce61a6d4933ff3758bbbfea3e05cf347277aefc341d9c3b3d1f8a4692cec24b901528c2875875bacecc1caa7cf9159c
@@ -103,6 +103,8 @@ target_annotations = if source_annotations.class == Array
103
103
  else
104
104
  alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
105
105
 
106
+ pp alignment
107
+
106
108
  # verification
107
109
  source_text = source_annotations[:text]
108
110
  puts "=====BEGIN"
@@ -129,8 +131,6 @@ else
129
131
  puts
130
132
  puts "=====END"
131
133
 
132
- # pp alignment
133
-
134
134
  # alignment.block_alignments.each do |a|
135
135
  # if a[:alignment].nil? || a[:alignment] == :empty
136
136
  # # p [a[:source], a[:target]]
@@ -153,8 +153,16 @@ else
153
153
  p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
154
154
  end
155
155
  puts "====="
156
+ puts
156
157
 
157
158
  denotations = alignment.transform_hdenotations(source_annotations[:denotations])
159
+ puts "[Invalid transformation]"
160
+ denotations.each do |d|
161
+ p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < target_text.length
162
+ end
163
+ puts "====="
164
+ puts
165
+
158
166
  lost_annotations += alignment.lost_annotations if alignment.lost_annotations
159
167
 
160
168
  source_annotations.merge({text:target_text, denotations:denotations})
@@ -3,8 +3,8 @@ require 'string-similarity'
3
3
 
4
4
  module TextAlignment; end unless defined? TextAlignment
5
5
 
6
- TextAlignment::SIZE_NGRAM = 10 unless defined? TextAlignment::SIZE_NGRAM
7
- TextAlignment::SIZE_WINDOW = 20 unless defined? TextAlignment::SIZE_WINDOW
6
+ TextAlignment::SIZE_NGRAM = 8 unless defined? TextAlignment::SIZE_NGRAM
7
+ TextAlignment::SIZE_WINDOW = 40 unless defined? TextAlignment::SIZE_WINDOW
8
8
  TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.8 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
9
9
 
10
10
  class TextAlignment::AnchorFinder
@@ -23,6 +23,7 @@ class TextAlignment::AnchorFinder
23
23
 
24
24
  # current position in s1
25
25
  @beg_s1 = 0
26
+ @end_s1_prev = 0
26
27
  @end_s2_prev = 0
27
28
  end
28
29
 
@@ -31,8 +32,8 @@ class TextAlignment::AnchorFinder
31
32
  while @beg_s1 < (@s1.length - @size_ngram)
32
33
  anchor = @s1[@beg_s1, @size_ngram]
33
34
 
34
- search_position = 0
35
- # search_position = @end_s2_prev
35
+ # search_position = 0
36
+ search_position = @end_s2_prev
36
37
  while @beg_s2 = @s2.index(anchor, search_position)
37
38
  # if both the begining points are sufficiantly close to the end points of the last match
38
39
  break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
@@ -56,7 +57,7 @@ class TextAlignment::AnchorFinder
56
57
  # extend the block
57
58
  b1 = @beg_s1
58
59
  b2 = @beg_s2
59
- while b1 > -1 && b2 > -1 && @s1[b1] == @s2[b2]
60
+ while b1 >= @end_s1_prev && b2 > -1 && @s1[b1] == @s2[b2]
60
61
  b1 -= 1; b2 -= 1
61
62
  end
62
63
  b1 += 1; b2 += 1
@@ -40,6 +40,10 @@ class TextAlignment::TextAlignment
40
40
  end
41
41
  end
42
42
 
43
+ pp mblocks
44
+ puts "-----"
45
+ puts
46
+
43
47
  # mblocks.each do |b|
44
48
  # p [b[:source], b[:target]]
45
49
  # puts "---"
@@ -170,10 +174,12 @@ class TextAlignment::TextAlignment
170
174
  if begin_position == block_alignment[:source][:begin]
171
175
  block_alignment[:target][:begin]
172
176
  else
173
- raise "lost annotation"
177
+ # raise "lost annotation"
178
+ nil
174
179
  end
175
180
  else
176
- block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin]) + block_alignment[:target][:begin]
181
+ r = block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin])
182
+ r.nil? ? nil : r + block_alignment[:target][:begin]
177
183
  end
178
184
  end
179
185
 
@@ -187,10 +193,12 @@ class TextAlignment::TextAlignment
187
193
  if end_position == block_alignment[:source][:end]
188
194
  block_alignment[:target][:end]
189
195
  else
190
- raise "lost annotation"
196
+ # raise "lost annotation"
197
+ nil
191
198
  end
192
199
  else
193
- block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin]) + block_alignment[:target][:begin]
200
+ r = block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
201
+ r.nil? ? nil : r + block_alignment[:target][:begin]
194
202
  end
195
203
  end
196
204
 
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.3.14'
2
+ VERSION = '0.3.19'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.14
4
+ version: 0.3.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-13 00:00:00.000000000 Z
11
+ date: 2020-09-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary