text_alignment 0.3.11 → 0.3.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 01525b6ca5f7e0ae1ebb9dfce2083006e095a66b3ab468ccef0a584bc3005556
4
- data.tar.gz: bc1137052a12b8db97635b183f299518b0158f3ccecf36d42fa45d746b4f3792
3
+ metadata.gz: 19a2dfcf8dfffa752dfc0c3363d2d3e1cb3ef7498f79023cdd16e38aa8c46afd
4
+ data.tar.gz: 94d925dfc71d24b05fd6861a4f7f7344428b68785db84eeae8f430563b4e3318
5
5
  SHA512:
6
- metadata.gz: 9596bea2616c3b4d939c8314d026941a6e627f4380183409051df62722a0ee5e3b35302da3066b0d32e8322582c999877b05a09c54749d878a284a062247342e
7
- data.tar.gz: 361e3e7a23697167b41e037e7d272bbd286ac397416defb125821ebbcb17bfa386341c75fecbb94fe7f9976cfbc2a8b5f7f9be7c150d23daf6d8c16410509b5d
6
+ metadata.gz: 72e61cf30c98df2c3d5ac19717c813c936b55daad22cb8c6e8b44bdb45321dab98c69d5f90820e9993d86263b04bdad2e96e8010afc8a57eee916126b673c8cc
7
+ data.tar.gz: d92c04294d58845f4a88cb8d9e3db42e9a18e0dd02d0398e3a95bf94662f64a33752754dd637163ef9bc4af77dc602c12fe683d49e9ac0ebb61a2469e5e08216
@@ -103,6 +103,8 @@ target_annotations = if source_annotations.class == Array
103
103
  else
104
104
  alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
105
105
 
106
+ pp alignment
107
+
106
108
  # alignment.block_alignments.each do |a|
107
109
  # if a[:alignment].nil? || a[:alignment] == :empty
108
110
  # # p [a[:source], a[:target]]
@@ -159,7 +161,7 @@ if lost_annotations
159
161
  warn "#{lost_annotations.length}"
160
162
  end
161
163
 
162
- puts target_annotations.to_json
164
+ #puts target_annotations.to_json
163
165
 
164
166
  # denotations = anns1[:denotations]
165
167
 
@@ -3,9 +3,9 @@ require 'string-similarity'
3
3
 
4
4
  module TextAlignment; end unless defined? TextAlignment
5
5
 
6
- TextAlignment::SIZE_NGRAM = 5 unless defined? TextAlignment::SIZE_NGRAM
7
- TextAlignment::SIZE_WINDOW = 10 unless defined? TextAlignment::SIZE_WINDOW
8
- TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.7 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
6
+ TextAlignment::SIZE_NGRAM = 10 unless defined? TextAlignment::SIZE_NGRAM
7
+ TextAlignment::SIZE_WINDOW = 20 unless defined? TextAlignment::SIZE_WINDOW
8
+ TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.8 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
9
9
 
10
10
  class TextAlignment::AnchorFinder
11
11
 
@@ -23,6 +23,7 @@ class TextAlignment::AnchorFinder
23
23
 
24
24
  # current position in s1
25
25
  @beg_s1 = 0
26
+ @end_s2_prev = 0
26
27
  end
27
28
 
28
29
  def get_next_anchor
@@ -31,15 +32,16 @@ class TextAlignment::AnchorFinder
31
32
  anchor = @s1[@beg_s1, @size_ngram]
32
33
 
33
34
  search_position = 0
35
+ # search_position = @end_s2_prev
34
36
  while @beg_s2 = @s2.index(anchor, search_position)
35
37
  # if both the begining points are sufficiantly close to the end points of the last match
36
- break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 - @end_s2_prev < 5)
38
+ break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
37
39
 
38
40
  left_window_s1, left_window_s2 = get_left_windows
39
- break if left_window_s1 && text_similarity(left_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD
41
+ break if left_window_s1 && (text_similarity(left_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD)
40
42
 
41
43
  right_window_s1, right_window_s2 = get_right_windows
42
- break if right_window_s2 && text_similarity(right_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD
44
+ break if right_window_s2 && (text_similarity(right_window_s1, right_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD)
43
45
 
44
46
  search_position = @beg_s2 + 1
45
47
  end
@@ -107,7 +109,7 @@ class TextAlignment::AnchorFinder
107
109
  end
108
110
 
109
111
  def get_right_windows
110
- return if (@beg_s1 + @size_ngram < (@s1.length - @size_window)) || (@beg_s2 + @size_ngram < (@s2.length - @size_window))
112
+ return if (@beg_s1 + @size_ngram > (@s1.length - @size_window)) || (@beg_s2 + @size_ngram > (@s2.length - @size_window))
111
113
 
112
114
  window_s1 = ''
113
115
  loc = @beg_s1 + @size_ngram
@@ -137,6 +139,7 @@ class TextAlignment::AnchorFinder
137
139
  end
138
140
 
139
141
  def text_similarity(str1, str2, ngram_order = 2)
142
+ return 0 if str1.nil? || str2.nil?
140
143
  String::Similarity.cosine(str1, str2, ngram:ngram_order)
141
144
  end
142
145
 
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.3.11'
2
+ VERSION = '0.3.13'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.11
4
+ version: 0.3.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-10 00:00:00.000000000 Z
11
+ date: 2020-09-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary