text_alignment 0.3.16 → 0.3.21

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 61f98e83ee9c1d220dd228be6bb708b79f36d1c691f04dcb14d4af55f398b6da
4
- data.tar.gz: f692e98a27a555baab2797ebe37407ad7133916db172a977e95415b9004e471c
3
+ metadata.gz: 65e1d9b45ff59ac0a233b7656d2aca99d7e4e1051b1a03a0c7726521d4f2b280
4
+ data.tar.gz: 710a3b68c5263f26572727e6e9591ebd5fdb095af4633bd5037c61eae0bb5cb6
5
5
  SHA512:
6
- metadata.gz: 2dd8f865c245601c362e335df4e26413501fa682a97010b8aebd3ebc01864ae4772f6e716725331f9c6bc8f688818d665ef7a21384906211efc0e630b46f2313
7
- data.tar.gz: 4c43199f474b94c825d8ec8ca2085b06107ec34df3d5ee988294f7423caef317c893bbba9879637d197fb2e7ae426c9e43c67cf042b7339959b638d5e5f60d01
6
+ metadata.gz: 598df22e41bbbe0a84b6e1a6a4e631ab0d8166810afd652086595feecbf0808a886685f42e5466626cbb1d6950dd9f1181be776b9938d6174dc7735c3ace24cd
7
+ data.tar.gz: f7dedfb7e64919129f816fbba24dbd1c2e2a056c242a0865915b8a611f594399b17d051d004a846796bba1c2e89c6fb2f17116cd118ca6217cf1a5dff4f6d4d8
@@ -103,6 +103,8 @@ target_annotations = if source_annotations.class == Array
103
103
  else
104
104
  alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
105
105
 
106
+ pp alignment
107
+
106
108
  # verification
107
109
  source_text = source_annotations[:text]
108
110
  puts "=====BEGIN"
@@ -129,9 +131,6 @@ else
129
131
  puts
130
132
  puts "=====END"
131
133
 
132
- pp alignment
133
-
134
- exit
135
134
  # alignment.block_alignments.each do |a|
136
135
  # if a[:alignment].nil? || a[:alignment] == :empty
137
136
  # # p [a[:source], a[:target]]
@@ -3,7 +3,7 @@ require 'string-similarity'
3
3
 
4
4
  module TextAlignment; end unless defined? TextAlignment
5
5
 
6
- TextAlignment::SIZE_NGRAM = 10 unless defined? TextAlignment::SIZE_NGRAM
6
+ TextAlignment::SIZE_NGRAM = 8 unless defined? TextAlignment::SIZE_NGRAM
7
7
  TextAlignment::SIZE_WINDOW = 40 unless defined? TextAlignment::SIZE_WINDOW
8
8
  TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.8 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
9
9
 
@@ -32,11 +32,11 @@ class TextAlignment::AnchorFinder
32
32
  while @beg_s1 < (@s1.length - @size_ngram)
33
33
  anchor = @s1[@beg_s1, @size_ngram]
34
34
 
35
- search_position = 0
36
- # search_position = @end_s2_prev
35
+ # search_position = 0
36
+ search_position = @end_s2_prev
37
37
  while @beg_s2 = @s2.index(anchor, search_position)
38
38
  # if both the begining points are sufficiantly close to the end points of the last match
39
- break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
39
+ break if @beg_s1 > 0 && @beg_s2 > 0 && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
40
40
 
41
41
  left_window_s1, left_window_s2 = get_left_windows
42
42
  break if left_window_s1 && (text_similarity(left_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD)
@@ -40,17 +40,20 @@ class TextAlignment::TextAlignment
40
40
  end
41
41
  end
42
42
 
43
- mblocks.each do |b|
44
- p [b[:source], b[:target]]
45
- puts "---"
46
- puts str1[b[:source][:begin] ... b[:source][:end]]
47
- puts "---"
48
- puts str2[b[:target][:begin] ... b[:target][:end]]
49
- puts "====="
50
- puts
51
- end
52
- puts "-=-=-=-=-"
53
- puts
43
+ # pp mblocks
44
+ # puts "-----"
45
+ # puts
46
+ # mblocks.each do |b|
47
+ # p [b[:source], b[:target]]
48
+ # puts "---"
49
+ # puts str1[b[:source][:begin] ... b[:source][:end]]
50
+ # puts "---"
51
+ # puts str2[b[:target][:begin] ... b[:target][:end]]
52
+ # puts "====="
53
+ # puts
54
+ # end
55
+ # puts "-=-=-=-=-"
56
+ # puts
54
57
 
55
58
  ## To find block alignments
56
59
  @block_alignments = []
@@ -78,6 +81,8 @@ class TextAlignment::TextAlignment
78
81
 
79
82
  @block_alignments << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
80
83
 
84
+ _str1 = str1[b1 ... e1]
85
+ _str2 = str2[b2 ... e2]
81
86
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
82
87
  if alignment.similarity < 0.6
83
88
  @block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
@@ -113,24 +118,11 @@ class TextAlignment::TextAlignment
113
118
  end
114
119
 
115
120
  # Final step
116
- if mblocks[-1][:source][:end] < str1.length
117
- b1 = mblocks[-1][:source][:end]
118
- b2 = mblocks[-1][:target][:end]
119
-
120
- if mblocks[-1][:target][:end] < str2.length
121
-
122
- else
123
- e1 = str1.length
124
- e2 = str2.length
125
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
126
- end
127
- end
128
-
129
121
  if mblocks[-1][:source][:end] < str1.length && mblocks[-1][:target][:end] < str2.length
130
122
  b1 = mblocks[-1][:source][:end]
131
123
  b2 = mblocks[-1][:target][:end]
132
- _str1 = str1[b1 ... -1]
133
- _str2 = str2[b2 ... -1]
124
+ _str1 = str1[b1 ... str1.length]
125
+ _str2 = str2[b2 ... str2.length]
134
126
 
135
127
  unless _str1.strip.empty?
136
128
  if _str2.strip.empty?
@@ -174,9 +166,8 @@ class TextAlignment::TextAlignment
174
166
  nil
175
167
  end
176
168
  else
177
- p begin_position
178
- puts "-----"
179
- block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin]) + block_alignment[:target][:begin]
169
+ r = block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin])
170
+ r.nil? ? nil : r + block_alignment[:target][:begin]
180
171
  end
181
172
  end
182
173
 
@@ -194,7 +185,8 @@ class TextAlignment::TextAlignment
194
185
  nil
195
186
  end
196
187
  else
197
- block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin]) + block_alignment[:target][:begin]
188
+ r = block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
189
+ r.nil? ? nil : r + block_alignment[:target][:begin]
198
190
  end
199
191
  end
200
192
 
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.3.16'
2
+ VERSION = '0.3.21'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.16
4
+ version: 0.3.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-17 00:00:00.000000000 Z
11
+ date: 2020-09-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary