text_alignment 0.3.15 → 0.3.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8252929e7b74251db493ad991332d0da8a92f35441e2a442d05b6fb29139d657
4
- data.tar.gz: c9e41127fb231e4db2843696c1029b35711449c5bf5104158da301d03b817131
3
+ metadata.gz: 179976ef6ac286a34343f99eef5cf8ee3d26997c1a8e1c8e9348793773ac044a
4
+ data.tar.gz: e15a57b2460a21d2607e3e4775ad32c5a760d9f00067c898b966e21777c241d4
5
5
  SHA512:
6
- metadata.gz: 1fc8da7324d71cf25edbec9765ab512928323079472736ea4e294abb12dfafc87f55d71cb49c371470811775bd489d3c91cce4a787b99faa305f2f326dc80c77
7
- data.tar.gz: f694c99216b59dd693a6acdfffc727fe74b5c189b4b9583b31fb7e6394319a3176de76237a142dac3770bcd1fbbc467d5d1e97a7225ed993a21246a66de8b2ec
6
+ metadata.gz: b3742565325c8ce8b4ce35093b4524b1d9182b51e332f50d9376ce2c22af8918a168b1fcc5764e4313e819c546684a3fc9411d8ee39607d95557924975bd4143
7
+ data.tar.gz: 6f68f68799075990fce62f4454c2ef1f1ef4ff260eac44a5332744a9c21605ac14eb35c9d6d24bedb31681f0995222699d8018727f7e70a45c11076df1c82212
@@ -103,6 +103,8 @@ target_annotations = if source_annotations.class == Array
103
103
  else
104
104
  alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
105
105
 
106
+ pp alignment
107
+
106
108
  # verification
107
109
  source_text = source_annotations[:text]
108
110
  puts "=====BEGIN"
@@ -129,9 +131,6 @@ else
129
131
  puts
130
132
  puts "=====END"
131
133
 
132
- pp alignment
133
-
134
- exit
135
134
  # alignment.block_alignments.each do |a|
136
135
  # if a[:alignment].nil? || a[:alignment] == :empty
137
136
  # # p [a[:source], a[:target]]
@@ -4,7 +4,7 @@ require 'string-similarity'
4
4
  module TextAlignment; end unless defined? TextAlignment
5
5
 
6
6
  TextAlignment::SIZE_NGRAM = 10 unless defined? TextAlignment::SIZE_NGRAM
7
- TextAlignment::SIZE_WINDOW = 30 unless defined? TextAlignment::SIZE_WINDOW
7
+ TextAlignment::SIZE_WINDOW = 40 unless defined? TextAlignment::SIZE_WINDOW
8
8
  TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.8 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
9
9
 
10
10
  class TextAlignment::AnchorFinder
@@ -23,6 +23,7 @@ class TextAlignment::AnchorFinder
23
23
 
24
24
  # current position in s1
25
25
  @beg_s1 = 0
26
+ @end_s1_prev = 0
26
27
  @end_s2_prev = 0
27
28
  end
28
29
 
@@ -31,8 +32,8 @@ class TextAlignment::AnchorFinder
31
32
  while @beg_s1 < (@s1.length - @size_ngram)
32
33
  anchor = @s1[@beg_s1, @size_ngram]
33
34
 
34
- search_position = 0
35
- # search_position = @end_s2_prev
35
+ # search_position = 0
36
+ search_position = @end_s2_prev
36
37
  while @beg_s2 = @s2.index(anchor, search_position)
37
38
  # if both the begining points are sufficiantly close to the end points of the last match
38
39
  break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
@@ -56,7 +57,7 @@ class TextAlignment::AnchorFinder
56
57
  # extend the block
57
58
  b1 = @beg_s1
58
59
  b2 = @beg_s2
59
- while b1 > -1 && b2 > -1 && @s1[b1] == @s2[b2]
60
+ while b1 >= @end_s1_prev && b2 > -1 && @s1[b1] == @s2[b2]
60
61
  b1 -= 1; b2 -= 1
61
62
  end
62
63
  b1 += 1; b2 += 1
@@ -40,6 +40,9 @@ class TextAlignment::TextAlignment
40
40
  end
41
41
  end
42
42
 
43
+ # pp mblocks
44
+ # puts "-----"
45
+ # puts
43
46
  # mblocks.each do |b|
44
47
  # p [b[:source], b[:target]]
45
48
  # puts "---"
@@ -78,6 +81,8 @@ class TextAlignment::TextAlignment
78
81
 
79
82
  @block_alignments << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
80
83
 
84
+ _str1 = str1[b1 ... e1]
85
+ _str2 = str2[b2 ... e2]
81
86
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
82
87
  if alignment.similarity < 0.6
83
88
  @block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
@@ -113,19 +118,6 @@ class TextAlignment::TextAlignment
113
118
  end
114
119
 
115
120
  # Final step
116
- if mblocks[-1][:source][:end] < str1.length
117
- b1 = mblocks[-1][:source][:end]
118
- b2 = mblocks[-1][:target][:end]
119
-
120
- if mblocks[-1][:target][:end] < str2.length
121
-
122
- else
123
- e1 = str1.length
124
- e2 = str2.length
125
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
126
- end
127
- end
128
-
129
121
  if mblocks[-1][:source][:end] < str1.length && mblocks[-1][:target][:end] < str2.length
130
122
  b1 = mblocks[-1][:source][:end]
131
123
  b2 = mblocks[-1][:target][:end]
@@ -174,7 +166,8 @@ class TextAlignment::TextAlignment
174
166
  nil
175
167
  end
176
168
  else
177
- block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin]) + block_alignment[:target][:begin]
169
+ r = block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin])
170
+ r.nil? ? nil : r + block_alignment[:target][:begin]
178
171
  end
179
172
  end
180
173
 
@@ -192,7 +185,8 @@ class TextAlignment::TextAlignment
192
185
  nil
193
186
  end
194
187
  else
195
- block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin]) + block_alignment[:target][:begin]
188
+ r = block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
189
+ r.nil? ? nil : r + block_alignment[:target][:begin]
196
190
  end
197
191
  end
198
192
 
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.3.15'
2
+ VERSION = '0.3.20'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.15
4
+ version: 0.3.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-17 00:00:00.000000000 Z
11
+ date: 2020-09-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary