text_alignment 0.3.16 → 0.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 61f98e83ee9c1d220dd228be6bb708b79f36d1c691f04dcb14d4af55f398b6da
4
- data.tar.gz: f692e98a27a555baab2797ebe37407ad7133916db172a977e95415b9004e471c
3
+ metadata.gz: 65e1d9b45ff59ac0a233b7656d2aca99d7e4e1051b1a03a0c7726521d4f2b280
4
+ data.tar.gz: 710a3b68c5263f26572727e6e9591ebd5fdb095af4633bd5037c61eae0bb5cb6
5
5
  SHA512:
6
- metadata.gz: 2dd8f865c245601c362e335df4e26413501fa682a97010b8aebd3ebc01864ae4772f6e716725331f9c6bc8f688818d665ef7a21384906211efc0e630b46f2313
7
- data.tar.gz: 4c43199f474b94c825d8ec8ca2085b06107ec34df3d5ee988294f7423caef317c893bbba9879637d197fb2e7ae426c9e43c67cf042b7339959b638d5e5f60d01
6
+ metadata.gz: 598df22e41bbbe0a84b6e1a6a4e631ab0d8166810afd652086595feecbf0808a886685f42e5466626cbb1d6950dd9f1181be776b9938d6174dc7735c3ace24cd
7
+ data.tar.gz: f7dedfb7e64919129f816fbba24dbd1c2e2a056c242a0865915b8a611f594399b17d051d004a846796bba1c2e89c6fb2f17116cd118ca6217cf1a5dff4f6d4d8
@@ -103,6 +103,8 @@ target_annotations = if source_annotations.class == Array
103
103
  else
104
104
  alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
105
105
 
106
+ pp alignment
107
+
106
108
  # verification
107
109
  source_text = source_annotations[:text]
108
110
  puts "=====BEGIN"
@@ -129,9 +131,6 @@ else
129
131
  puts
130
132
  puts "=====END"
131
133
 
132
- pp alignment
133
-
134
- exit
135
134
  # alignment.block_alignments.each do |a|
136
135
  # if a[:alignment].nil? || a[:alignment] == :empty
137
136
  # # p [a[:source], a[:target]]
@@ -3,7 +3,7 @@ require 'string-similarity'
3
3
 
4
4
  module TextAlignment; end unless defined? TextAlignment
5
5
 
6
- TextAlignment::SIZE_NGRAM = 10 unless defined? TextAlignment::SIZE_NGRAM
6
+ TextAlignment::SIZE_NGRAM = 8 unless defined? TextAlignment::SIZE_NGRAM
7
7
  TextAlignment::SIZE_WINDOW = 40 unless defined? TextAlignment::SIZE_WINDOW
8
8
  TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.8 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
9
9
 
@@ -32,11 +32,11 @@ class TextAlignment::AnchorFinder
32
32
  while @beg_s1 < (@s1.length - @size_ngram)
33
33
  anchor = @s1[@beg_s1, @size_ngram]
34
34
 
35
- search_position = 0
36
- # search_position = @end_s2_prev
35
+ # search_position = 0
36
+ search_position = @end_s2_prev
37
37
  while @beg_s2 = @s2.index(anchor, search_position)
38
38
  # if both the begining points are sufficiantly close to the end points of the last match
39
- break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
39
+ break if @beg_s1 > 0 && @beg_s2 > 0 && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
40
40
 
41
41
  left_window_s1, left_window_s2 = get_left_windows
42
42
  break if left_window_s1 && (text_similarity(left_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD)
@@ -40,17 +40,20 @@ class TextAlignment::TextAlignment
40
40
  end
41
41
  end
42
42
 
43
- mblocks.each do |b|
44
- p [b[:source], b[:target]]
45
- puts "---"
46
- puts str1[b[:source][:begin] ... b[:source][:end]]
47
- puts "---"
48
- puts str2[b[:target][:begin] ... b[:target][:end]]
49
- puts "====="
50
- puts
51
- end
52
- puts "-=-=-=-=-"
53
- puts
43
+ # pp mblocks
44
+ # puts "-----"
45
+ # puts
46
+ # mblocks.each do |b|
47
+ # p [b[:source], b[:target]]
48
+ # puts "---"
49
+ # puts str1[b[:source][:begin] ... b[:source][:end]]
50
+ # puts "---"
51
+ # puts str2[b[:target][:begin] ... b[:target][:end]]
52
+ # puts "====="
53
+ # puts
54
+ # end
55
+ # puts "-=-=-=-=-"
56
+ # puts
54
57
 
55
58
  ## To find block alignments
56
59
  @block_alignments = []
@@ -78,6 +81,8 @@ class TextAlignment::TextAlignment
78
81
 
79
82
  @block_alignments << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
80
83
 
84
+ _str1 = str1[b1 ... e1]
85
+ _str2 = str2[b2 ... e2]
81
86
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
82
87
  if alignment.similarity < 0.6
83
88
  @block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
@@ -113,24 +118,11 @@ class TextAlignment::TextAlignment
113
118
  end
114
119
 
115
120
  # Final step
116
- if mblocks[-1][:source][:end] < str1.length
117
- b1 = mblocks[-1][:source][:end]
118
- b2 = mblocks[-1][:target][:end]
119
-
120
- if mblocks[-1][:target][:end] < str2.length
121
-
122
- else
123
- e1 = str1.length
124
- e2 = str2.length
125
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
126
- end
127
- end
128
-
129
121
  if mblocks[-1][:source][:end] < str1.length && mblocks[-1][:target][:end] < str2.length
130
122
  b1 = mblocks[-1][:source][:end]
131
123
  b2 = mblocks[-1][:target][:end]
132
- _str1 = str1[b1 ... -1]
133
- _str2 = str2[b2 ... -1]
124
+ _str1 = str1[b1 ... str1.length]
125
+ _str2 = str2[b2 ... str2.length]
134
126
 
135
127
  unless _str1.strip.empty?
136
128
  if _str2.strip.empty?
@@ -174,9 +166,8 @@ class TextAlignment::TextAlignment
174
166
  nil
175
167
  end
176
168
  else
177
- p begin_position
178
- puts "-----"
179
- block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin]) + block_alignment[:target][:begin]
169
+ r = block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin])
170
+ r.nil? ? nil : r + block_alignment[:target][:begin]
180
171
  end
181
172
  end
182
173
 
@@ -194,7 +185,8 @@ class TextAlignment::TextAlignment
194
185
  nil
195
186
  end
196
187
  else
197
- block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin]) + block_alignment[:target][:begin]
188
+ r = block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
189
+ r.nil? ? nil : r + block_alignment[:target][:begin]
198
190
  end
199
191
  end
200
192
 
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.3.16'
2
+ VERSION = '0.3.21'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.16
4
+ version: 0.3.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-17 00:00:00.000000000 Z
11
+ date: 2020-09-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary