text_alignment 0.3.11 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 01525b6ca5f7e0ae1ebb9dfce2083006e095a66b3ab468ccef0a584bc3005556
4
- data.tar.gz: bc1137052a12b8db97635b183f299518b0158f3ccecf36d42fa45d746b4f3792
3
+ metadata.gz: 19a2dfcf8dfffa752dfc0c3363d2d3e1cb3ef7498f79023cdd16e38aa8c46afd
4
+ data.tar.gz: 94d925dfc71d24b05fd6861a4f7f7344428b68785db84eeae8f430563b4e3318
5
5
  SHA512:
6
- metadata.gz: 9596bea2616c3b4d939c8314d026941a6e627f4380183409051df62722a0ee5e3b35302da3066b0d32e8322582c999877b05a09c54749d878a284a062247342e
7
- data.tar.gz: 361e3e7a23697167b41e037e7d272bbd286ac397416defb125821ebbcb17bfa386341c75fecbb94fe7f9976cfbc2a8b5f7f9be7c150d23daf6d8c16410509b5d
6
+ metadata.gz: 72e61cf30c98df2c3d5ac19717c813c936b55daad22cb8c6e8b44bdb45321dab98c69d5f90820e9993d86263b04bdad2e96e8010afc8a57eee916126b673c8cc
7
+ data.tar.gz: d92c04294d58845f4a88cb8d9e3db42e9a18e0dd02d0398e3a95bf94662f64a33752754dd637163ef9bc4af77dc602c12fe683d49e9ac0ebb61a2469e5e08216
@@ -103,6 +103,8 @@ target_annotations = if source_annotations.class == Array
103
103
  else
104
104
  alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
105
105
 
106
+ pp alignment
107
+
106
108
  # alignment.block_alignments.each do |a|
107
109
  # if a[:alignment].nil? || a[:alignment] == :empty
108
110
  # # p [a[:source], a[:target]]
@@ -159,7 +161,7 @@ if lost_annotations
159
161
  warn "#{lost_annotations.length}"
160
162
  end
161
163
 
162
- puts target_annotations.to_json
164
+ #puts target_annotations.to_json
163
165
 
164
166
  # denotations = anns1[:denotations]
165
167
 
@@ -3,9 +3,9 @@ require 'string-similarity'
3
3
 
4
4
  module TextAlignment; end unless defined? TextAlignment
5
5
 
6
- TextAlignment::SIZE_NGRAM = 5 unless defined? TextAlignment::SIZE_NGRAM
7
- TextAlignment::SIZE_WINDOW = 10 unless defined? TextAlignment::SIZE_WINDOW
8
- TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.7 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
6
+ TextAlignment::SIZE_NGRAM = 10 unless defined? TextAlignment::SIZE_NGRAM
7
+ TextAlignment::SIZE_WINDOW = 20 unless defined? TextAlignment::SIZE_WINDOW
8
+ TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.8 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
9
9
 
10
10
  class TextAlignment::AnchorFinder
11
11
 
@@ -23,6 +23,7 @@ class TextAlignment::AnchorFinder
23
23
 
24
24
  # current position in s1
25
25
  @beg_s1 = 0
26
+ @end_s2_prev = 0
26
27
  end
27
28
 
28
29
  def get_next_anchor
@@ -31,15 +32,16 @@ class TextAlignment::AnchorFinder
31
32
  anchor = @s1[@beg_s1, @size_ngram]
32
33
 
33
34
  search_position = 0
35
+ # search_position = @end_s2_prev
34
36
  while @beg_s2 = @s2.index(anchor, search_position)
35
37
  # if both the begining points are sufficiantly close to the end points of the last match
36
- break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 - @end_s2_prev < 5)
38
+ break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
37
39
 
38
40
  left_window_s1, left_window_s2 = get_left_windows
39
- break if left_window_s1 && text_similarity(left_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD
41
+ break if left_window_s1 && (text_similarity(left_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD)
40
42
 
41
43
  right_window_s1, right_window_s2 = get_right_windows
42
- break if right_window_s2 && text_similarity(right_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD
44
+ break if right_window_s2 && (text_similarity(right_window_s1, right_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD)
43
45
 
44
46
  search_position = @beg_s2 + 1
45
47
  end
@@ -107,7 +109,7 @@ class TextAlignment::AnchorFinder
107
109
  end
108
110
 
109
111
  def get_right_windows
110
- return if (@beg_s1 + @size_ngram < (@s1.length - @size_window)) || (@beg_s2 + @size_ngram < (@s2.length - @size_window))
112
+ return if (@beg_s1 + @size_ngram > (@s1.length - @size_window)) || (@beg_s2 + @size_ngram > (@s2.length - @size_window))
111
113
 
112
114
  window_s1 = ''
113
115
  loc = @beg_s1 + @size_ngram
@@ -137,6 +139,7 @@ class TextAlignment::AnchorFinder
137
139
  end
138
140
 
139
141
  def text_similarity(str1, str2, ngram_order = 2)
142
+ return 0 if str1.nil? || str2.nil?
140
143
  String::Similarity.cosine(str1, str2, ngram:ngram_order)
141
144
  end
142
145
 
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.3.11'
2
+ VERSION = '0.3.13'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.11
4
+ version: 0.3.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-10 00:00:00.000000000 Z
11
+ date: 2020-09-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary