text_alignment 0.3.19 → 0.3.24

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea57d01970fdb56a95a7929803949a553965692fb3f4748eec72fe026f9a79cf
4
- data.tar.gz: 96397baa91646b3eb05a346ff699930b6dacf7d38075273b64ce7916f32d6275
3
+ metadata.gz: fd59fb1ad977d3286f358d8c08315824e86d99cc6f9d72814adb760f2e680107
4
+ data.tar.gz: baafd1b76f6c6447a5763ff731b77ad07e449fc87ef94abed74a978376f6334e
5
5
  SHA512:
6
- metadata.gz: 1d1e7650c35d9bae35a7f1dc2948dbf97fa8f71a86f1f83c5dda9cb64b7179e96ae219db88951e65c2988f078900d960784c4c74489a074654ed893c408be97f
7
- data.tar.gz: 49afaec2b6332dc4038c1f0d0e930bf20ce61a6d4933ff3758bbbfea3e05cf347277aefc341d9c3b3d1f8a4692cec24b901528c2875875bacecc1caa7cf9159c
6
+ metadata.gz: d417878396803e1169a24fae67a9a4b0d4e84948d0c6bc678626641d6f1b6ac1fe16c94e9f07ee747b98f83e4305fa553220a607475363378f32fac4d43a65c7
7
+ data.tar.gz: efaf3640a67be46dddcf7dda9d819cbcb47828a3966d305887b6024eee6ba517a597e9303790f584264b8995f69671d90bdc0c7883fc9244f3b1746695960bf8
@@ -108,7 +108,7 @@ else
108
108
  # verification
109
109
  source_text = source_annotations[:text]
110
110
  puts "=====BEGIN"
111
- (0 ... source_text.length).each do |p|
111
+ (0 ... source_text.rstrip.length).each do |p|
112
112
  t = alignment.transform_begin_position(p)
113
113
  if t.nil?
114
114
  print source_text[p]
@@ -120,7 +120,7 @@ else
120
120
  puts "=====END"
121
121
 
122
122
  puts "=====BEGIN"
123
- (0 .. source_text.length).each do |p|
123
+ (0 .. source_text.rstrip.length).each do |p|
124
124
  t = alignment.transform_end_position(p)
125
125
  if t.nil?
126
126
  print source_text[p]
@@ -30,13 +30,17 @@ class TextAlignment::AnchorFinder
30
30
  def get_next_anchor
31
31
  # find the position of an anchor ngram in s1 and s2
32
32
  while @beg_s1 < (@s1.length - @size_ngram)
33
+ if [' ', "\n", "\t"].include? @s1[@beg_s1]
34
+ @beg_s1 += 1
35
+ next
36
+ end
33
37
  anchor = @s1[@beg_s1, @size_ngram]
34
38
 
35
39
  # search_position = 0
36
40
  search_position = @end_s2_prev
37
41
  while @beg_s2 = @s2.index(anchor, search_position)
38
42
  # if both the begining points are sufficiantly close to the end points of the last match
39
- break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
43
+ break if @beg_s1 > 0 && @beg_s2 > 0 && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
40
44
 
41
45
  left_window_s1, left_window_s2 = get_left_windows
42
46
  break if left_window_s1 && (text_similarity(left_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD)
@@ -57,9 +61,10 @@ class TextAlignment::AnchorFinder
57
61
  # extend the block
58
62
  b1 = @beg_s1
59
63
  b2 = @beg_s2
60
- while b1 >= @end_s1_prev && b2 > -1 && @s1[b1] == @s2[b2]
64
+ while b1 >= @end_s1_prev && b2 >= @end_s2_prev && @s1[b1] == @s2[b2]
61
65
  b1 -= 1; b2 -= 1
62
66
  end
67
+
63
68
  b1 += 1; b2 += 1
64
69
 
65
70
  e1 = @beg_s1 + @size_ngram
@@ -82,7 +87,8 @@ class TextAlignment::AnchorFinder
82
87
  private
83
88
 
84
89
  def get_left_windows
85
- return if @beg_s1 < @size_window || @beg_s2 < @size_window
90
+ # commend below with the assumption that the beginning of a document gives a significant locational information
91
+ # return if @beg_s1 < @size_window || @beg_s2 < @size_window
86
92
 
87
93
  window_s1 = ''
88
94
  loc = @beg_s1 - 1
@@ -110,7 +116,8 @@ class TextAlignment::AnchorFinder
110
116
  end
111
117
 
112
118
  def get_right_windows
113
- return if (@beg_s1 + @size_ngram > (@s1.length - @size_window)) || (@beg_s2 + @size_ngram > (@s2.length - @size_window))
119
+ # commend below with the assumption that the end of a document gives a significant locational
120
+ # return if (@beg_s1 + @size_ngram > (@s1.length - @size_window)) || (@beg_s2 + @size_ngram > (@s2.length - @size_window))
114
121
 
115
122
  window_s1 = ''
116
123
  loc = @beg_s1 + @size_ngram
@@ -6,7 +6,7 @@ module TextAlignment; end unless defined? TextAlignment
6
6
 
7
7
  TextAlignment::SIGNATURE_NGRAM = 7 unless defined? TextAlignment::SIGNATURE_NGRAM
8
8
  TextAlignment::BUFFER_RATE = 0.1 unless defined? TextAlignment::BUFFER_RATE
9
- TextAlignment::BUFFER_MIN = 10 unless defined? TextAlignment::BUFFER_MIN
9
+ TextAlignment::BUFFER_MIN = 20 unless defined? TextAlignment::BUFFER_MIN
10
10
 
11
11
 
12
12
  class TextAlignment::TextAlignment
@@ -40,10 +40,9 @@ class TextAlignment::TextAlignment
40
40
  end
41
41
  end
42
42
 
43
- pp mblocks
44
- puts "-----"
45
- puts
46
-
43
+ # pp mblocks
44
+ # puts "-----"
45
+ # puts
47
46
  # mblocks.each do |b|
48
47
  # p [b[:source], b[:target]]
49
48
  # puts "---"
@@ -82,6 +81,8 @@ class TextAlignment::TextAlignment
82
81
 
83
82
  @block_alignments << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
84
83
 
84
+ _str1 = str1[b1 ... e1]
85
+ _str2 = str2[b2 ... e2]
85
86
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
86
87
  if alignment.similarity < 0.6
87
88
  @block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
@@ -117,24 +118,11 @@ class TextAlignment::TextAlignment
117
118
  end
118
119
 
119
120
  # Final step
120
- if mblocks[-1][:source][:end] < str1.length
121
- b1 = mblocks[-1][:source][:end]
122
- b2 = mblocks[-1][:target][:end]
123
-
124
- if mblocks[-1][:target][:end] < str2.length
125
-
126
- else
127
- e1 = str1.length
128
- e2 = str2.length
129
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
130
- end
131
- end
132
-
133
121
  if mblocks[-1][:source][:end] < str1.length && mblocks[-1][:target][:end] < str2.length
134
122
  b1 = mblocks[-1][:source][:end]
135
123
  b2 = mblocks[-1][:target][:end]
136
- _str1 = str1[b1 ... -1]
137
- _str2 = str2[b2 ... -1]
124
+ _str1 = str1[b1 ... str1.length]
125
+ _str2 = str2[b2 ... str2.length]
138
126
 
139
127
  unless _str1.strip.empty?
140
128
  if _str2.strip.empty?
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.3.19'
2
+ VERSION = '0.3.24'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.19
4
+ version: 0.3.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-21 00:00:00.000000000 Z
11
+ date: 2020-09-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary