text_alignment 0.3.13 → 0.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19a2dfcf8dfffa752dfc0c3363d2d3e1cb3ef7498f79023cdd16e38aa8c46afd
4
- data.tar.gz: 94d925dfc71d24b05fd6861a4f7f7344428b68785db84eeae8f430563b4e3318
3
+ metadata.gz: f6f98465bb47a2b241dda72c8532530f5c7fdf4de49a403366bd08c256b7ff0e
4
+ data.tar.gz: 44a6c920f8f05ab3ee29a0b9fe4de38e2f6fac2386838625b77d99486189ebf0
5
5
  SHA512:
6
- metadata.gz: 72e61cf30c98df2c3d5ac19717c813c936b55daad22cb8c6e8b44bdb45321dab98c69d5f90820e9993d86263b04bdad2e96e8010afc8a57eee916126b673c8cc
7
- data.tar.gz: d92c04294d58845f4a88cb8d9e3db42e9a18e0dd02d0398e3a95bf94662f64a33752754dd637163ef9bc4af77dc602c12fe683d49e9ac0ebb61a2469e5e08216
6
+ metadata.gz: 17f038d6d7366b8223cdd66b5ef9f3d79c8ecc39f432ac15dbfd0f3311e1197bc9c40c5cbd38a69d5778278405dcd100bc18187870ee563a7e5999246845b049
7
+ data.tar.gz: f0ded392d47821bc99c640700955686f14cc9550a13b3b8141af2af7f88f79400a3de6632f2bc3223c9e0dc82311d461de84a5ffa16aff443394b3c76540a74c
@@ -103,7 +103,33 @@ target_annotations = if source_annotations.class == Array
103
103
  else
104
104
  alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
105
105
 
106
- pp alignment
106
+ # verification
107
+ source_text = source_annotations[:text]
108
+ puts "=====BEGIN"
109
+ (0 ... source_text.length).each do |p|
110
+ t = alignment.transform_begin_position(p)
111
+ if t.nil?
112
+ print source_text[p]
113
+ else
114
+ print '.'
115
+ end
116
+ end
117
+ puts
118
+ puts "=====END"
119
+
120
+ puts "=====BEGIN"
121
+ (0 .. source_text.length).each do |p|
122
+ t = alignment.transform_end_position(p)
123
+ if t.nil?
124
+ print source_text[p]
125
+ else
126
+ print '.'
127
+ end
128
+ end
129
+ puts
130
+ puts "=====END"
131
+
132
+ # pp alignment
107
133
 
108
134
  # alignment.block_alignments.each do |a|
109
135
  # if a[:alignment].nil? || a[:alignment] == :empty
@@ -121,6 +147,13 @@ else
121
147
  # end
122
148
  # exit
123
149
 
150
+ # verification of source denotations
151
+ puts "[Invalid source denotations]"
152
+ source_annotations[:denotations] do |d|
153
+ p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
154
+ end
155
+ puts "====="
156
+
124
157
  denotations = alignment.transform_hdenotations(source_annotations[:denotations])
125
158
  lost_annotations += alignment.lost_annotations if alignment.lost_annotations
126
159
 
@@ -6,6 +6,8 @@ module TextAlignment; end unless defined? TextAlignment
6
6
 
7
7
  TextAlignment::SIGNATURE_NGRAM = 7 unless defined? TextAlignment::SIGNATURE_NGRAM
8
8
  TextAlignment::BUFFER_RATE = 0.1 unless defined? TextAlignment::BUFFER_RATE
9
+ TextAlignment::BUFFER_MIN = 10 unless defined? TextAlignment::BUFFER_MIN
10
+
9
11
 
10
12
  class TextAlignment::TextAlignment
11
13
  attr_reader :block_alignments
@@ -70,7 +72,7 @@ class TextAlignment::TextAlignment
70
72
  @block_alignments << {source:{begin:0, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
71
73
  else
72
74
  len_min = [_str1.length, _str2.length].min
73
- len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i
75
+ len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
74
76
  b1 = _str1.length < len_buffer ? 0 : e1 - len_buffer
75
77
  b2 = _str2.length < len_buffer ? 0 : e2 - len_buffer
76
78
 
@@ -135,9 +137,11 @@ class TextAlignment::TextAlignment
135
137
  @block_alignments << {source:{begin:b1, end:str1.length}, target:{begin:b2, end:str2.length}, alignment: :empty}
136
138
  else
137
139
  len_min = [_str1.length, _str2.length].min
138
- len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i
140
+ len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
139
141
  e1 = _str1.length < len_buffer ? str1.length : b1 + len_buffer
140
- e2 = _str2.length < len_buffer ? str1.length : b2 + len_buffer
142
+ e2 = _str2.length < len_buffer ? str2.length : b2 + len_buffer
143
+ _str1 = str1[b1 ... e1]
144
+ _str2 = str2[b2 ... e2]
141
145
 
142
146
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
143
147
  if alignment.similarity < 0.6
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.3.13'
2
+ VERSION = '0.3.14'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.13
4
+ version: 0.3.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-12 00:00:00.000000000 Z
11
+ date: 2020-09-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary