text_alignment 0.6.4 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 87f945e356349ed709996d88ed39c8ba5b83622bde1c7fd7b9e5ff63504615c2
4
- data.tar.gz: acb6e716113238c39b59a8358928de1bd936382308961a57e2c60e7bc462726f
3
+ metadata.gz: 0c49793b84e9ca5606a8fe9151530f6732337c2b92cfd1af1549a56ea3c2f39e
4
+ data.tar.gz: 4a0b8328d4c6de43af50bd8c278f83facef311c74516dc3a9a0c9dd5f91fbfc0
5
5
  SHA512:
6
- metadata.gz: 4d5b862bb50b4111c6bd390e458d6761303dc394f2fa7dc9d6b821ee7461541705aecac925f700e5124eb282112567e52a51a9f15b84fa8349da25baaf68fdd9
7
- data.tar.gz: a044608a58181e98664a26f410a7d59927dc4d39db8d49a147666f64254e23728ceccaa781a590712b7a74b57222cc449c37eb43a709d3f16da60aa3a55c2e6f
6
+ metadata.gz: ad70de67b4a7b38290a59d89bed7cd9fa343ce3db62bb67b779e51b499929b9d7c4697871cf25017fa59a34c0525741d5da679559fc2f33d902fcda692f9f1ac
7
+ data.tar.gz: 6242b35cbb8f53effc477b508d428d9060cba2e93895496134963755cff956f3931411a87a7bc0d33b7b91459f1a27eb67c422897693320e1b2ceb06eff6e22b
@@ -26,33 +26,43 @@ def read_text(filename)
26
26
  end
27
27
  end
28
28
 
29
- def align_mdoc(source_annotations, target_annotations)
29
+ def align_denotations(denotations, source_text, target_text, debug = false)
30
+ alignment = TextAlignment::TextAlignment.new(source_text, target_text, denotations)
31
+ new_denotations = alignment.transform_hdenotations(denotations)
32
+
33
+ if debug
34
+ warn "[block alignment]"
35
+ warn alignment.alignment_show
36
+ warn "-----"
37
+ end
38
+
39
+ lost_annotations = alignment.lost_annotations
40
+ unless lost_annotations.empty?
41
+ warn "\n[lost annotations] #{lost_annotations.length}"
42
+ lost_annotations.each do |a|
43
+ warn "#{a}"
44
+ end
45
+ warn "====="
46
+ end
47
+ warn
48
+
49
+ # return target annotations
50
+ new_denotations
51
+ end
52
+
53
+ def align_mannotations(source_annotations, target_text, debug = false)
54
+ target_annotations = {text:target_text}
55
+
30
56
  idnum_denotations = 0
31
57
  idnum_relations = 0
32
58
  idnum_attributes = 0
33
59
  idnum_modifications = 0
34
60
 
35
- source_annotations.each do |annotations|
36
- alignment = TextAlignment::TextAlignment.new(annotations[:text], target_annotations[:text])
37
-
38
- puts alignment.alignment_show
39
- puts "-----"
40
- puts
41
-
42
- # alignment.block_alignments.each do |a|
43
- # p {source:a[:source], target:a[:target]}
44
- # puts "--"
45
- # p a[:alignment] if a[:alignment].nil? || a[:alignment] == :empty
46
- # puts "--"
47
- # puts annotations[:text][a[:source][:begin] ... a[:source][:end]]
48
- # puts "--"
49
- # puts target_text[a[:target][:begin] ... a[:target][:end]]
50
- # puts "======"
51
- # end
52
-
61
+ source_annotations.each_with_index do |annotations, i|
53
62
  if annotations.has_key?(:denotations) && !annotations[:denotations].empty?
54
63
  ididx = {}
55
- denotations = alignment.transform_hdenotations(annotations[:denotations])
64
+ warn "[#{i}]-=-=-=-=-"
65
+ denotations = align_denotations(annotations[:denotations], annotations[:text], target_text, debug)
56
66
  denotations.each do |d|
57
67
  reid = 'T' + (idnum_denotations += 1).to_s
58
68
  ididx[d[:id]] = reid
@@ -101,141 +111,11 @@ end
101
111
  source_annotations = read_annotations(ARGV[0])
102
112
  target_text = read_text(ARGV[1])
103
113
 
104
- lost_annotations = []
105
114
  target_annotations = if source_annotations.class == Array
106
- align_mdoc(source_annotations, {text: target_text})
115
+ align_mannotations(source_annotations, target_text, false)
107
116
  else
108
- alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
109
-
110
- # pp alignment
111
-
112
- # verification
113
- # source_text = source_annotations[:text]
114
- # puts "=====BEGIN"
115
- # (0 ... source_text.rstrip.length).each do |p|
116
- # t = alignment.transform_begin_position(p)
117
- # if t.nil?
118
- # print source_text[p]
119
- # else
120
- # print '.'
121
- # end
122
- # end
123
- # puts
124
- # puts "=====END"
125
-
126
- # puts "=====BEGIN"
127
- # (0 .. source_text.rstrip.length).each do |p|
128
- # t = alignment.transform_end_position(p)
129
- # if t.nil?
130
- # print source_text[p]
131
- # else
132
- # print '.'
133
- # end
134
- # end
135
- # puts
136
- # puts "=====END"
137
-
138
- source_text = source_annotations[:text]
139
-
140
- puts "[block alignment]"
141
- puts alignment.alignment_show
142
- puts "====="
143
- # exit
144
-
145
- # verification of source denotations
146
- puts "[Invalid source denotations]"
147
- source_annotations[:denotations] do |d|
148
- p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
149
- end
150
- puts "====="
151
- puts
152
-
153
- denotations = alignment.transform_hdenotations(source_annotations[:denotations])
154
- puts "[Invalid transformation]"
155
- denotations.each do |d|
156
- p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < target_text.length
157
- end
158
- puts "====="
159
- puts
160
-
161
- lost_annotations += alignment.lost_annotations if alignment.lost_annotations
162
-
117
+ denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], target_text, false)
163
118
  source_annotations.merge({text:target_text, denotations:denotations})
164
119
  end
165
120
 
166
- num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source = if source_annotations.class == Array
167
- num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source = 0, 0, 0, 0
168
- source_annotations.each do |annotations|
169
- num_denotations_source += annotations[:denotations].nil? ? 0 : annotations[:denotations].length
170
- num_relations_source += annotations[:relations].nil? ? 0 : annotations[:relations].length
171
- num_attributes_source += annotations[:attributes].nil? ? 0 : annotations[:attributes].length
172
- num_modifications_source += annotations[:modifications].nil? ? 0 : annotations[:modifications].length
173
- end
174
- [num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source]
175
- else
176
- num_denotations_source = source_annotations[:denotations].nil? ? 0 : source_annotations[:denotations].length
177
- num_relations_source = source_annotations[:relations].nil? ? 0 : source_annotations[:relations].length
178
- num_attributes_source = source_annotations[:attributes].nil? ? 0 : source_annotations[:attributes].length
179
- num_modifications_source = source_annotations[:modifications].nil? ? 0 : source_annotations[:modifications].length
180
- [num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source]
181
- end
182
-
183
- warn "[source]"
184
- warn "denotations:\t#{num_denotations_source}"
185
- # warn "relations:\t#{num_relations_source}"
186
- # warn "attributes:\t#{num_attributes_source}"
187
- # warn "modifications:\t#{num_modifications_source}"
188
-
189
- warn "\n[target]"
190
- warn "denotations:\t#{target_annotations[:denotations].nil? ? 0 : target_annotations[:denotations].length}"
191
- # warn "relations:\t#{target_annotations[:relations].nil? ? 0 : target_annotations[:relations].length}"
192
- # warn "attributes:\t#{target_annotations[:attributes].nil? ? 0 : target_annotations[:attributes].length}"
193
- # warn "modifications:\t#{target_annotations[:modifications].nil? ? 0 : target_annotations[:modifications].length}"
194
-
195
- if lost_annotations
196
- warn "\n[lost annotations]"
197
- warn "#{lost_annotations.length}"
198
- end
199
-
200
- #puts target_annotations.to_json
201
-
202
- # denotations = anns1[:denotations]
203
-
204
- # puts "[Alignment1]====="
205
- # align = TextAlignment::TextAlignment.new(str1, str2, TextAlignment::MAPPINGS)
206
-
207
- # align.alignment.each do |a|
208
- # p [a[:target][:begin], a[:target][:end], a[:source][:begin], a[:source][:end]]
209
- # end
210
-
211
- # puts TextAlignment::sdiff2cdiff(align.sdiff)
212
- # puts
213
- # puts "[Similarity]\n#{align.similarity}"
214
- # puts
215
- # puts '[Denotations original]'
216
- # pp denotations
217
- # puts
218
- # puts '[Denotations transformed]'
219
- # new_denotations = align.transform_hdenotations(denotations)
220
- # pp new_denotations
221
- # puts
222
- # puts "[Alignment2 (downcased)]====="
223
- # align = TextAlignment::TextAlignment.new(str1.downcase, str2.downcase, TextAlignment::MAPPINGS)
224
- # puts TextAlignment::sdiff2cdiff(align.sdiff)
225
- # puts
226
- # puts "[Similarity]\n#{align.similarity}"
227
- # puts
228
- # puts '[Denotations original]'
229
- # pp denotations
230
- # puts
231
- # puts '[Denotations transformed]'
232
- # new_denotations = align.transform_hdenotations(denotations)
233
- # pp new_denotations
234
- # puts
235
- # puts '[Annotations transformed]'
236
- # anns2[:denotations] = new_denotations
237
- # puts anns2.to_json
238
-
239
- # p align.common_elements
240
- # puts "---------------"
241
- # p align.mapped_elements
121
+ # puts target_annotations.to_json
@@ -1,7 +1,7 @@
1
1
  module TextAlignment; end unless defined? TextAlignment
2
2
 
3
3
  TextAlignment::SIZE_NGRAM = 8 unless defined? TextAlignment::SIZE_NGRAM
4
- TextAlignment::SIZE_WINDOW = 60 unless defined? TextAlignment::SIZE_WINDOW
4
+ TextAlignment::SIZE_WINDOW = 10 unless defined? TextAlignment::SIZE_WINDOW
5
5
  TextAlignment::BUFFER_RATE = 0.1 unless defined? TextAlignment::BUFFER_RATE
6
6
  TextAlignment::BUFFER_MIN = 20 unless defined? TextAlignment::BUFFER_MIN
7
7
  TextAlignment::TEXT_SIMILARITY_THRESHOLD = 0.9 unless defined? TextAlignment::TEXT_SIMILARITY_THRESHOLD
@@ -1,74 +1,172 @@
1
1
  module TextAlignment; end unless defined? TextAlignment
2
2
 
3
3
  TextAlignment::MAPPINGS = [
4
- ["©", "(c)"], #U+00A9 (Copyright Sign)
5
-
6
- ["α", "alpha"], #U+03B1 (greek small letter alpha)
7
- ["β", "beta"], #U+03B2 (greek small letter beta)
8
- ["γ", "gamma"], #U+03B3 (greek small letter gamma)
9
- ["δ", "delta"], #U+03B4 (greek small letter delta)
10
- ["ε", "epsilon"], #U+03B5 (greek small letter epsilon)
11
- ["ζ", "zeta"], #U+03B6 (greek small letter zeta)
12
- ["η", "eta"], #U+03B7 (greek small letter eta)
13
- ["θ", "theta"], #U+03B7 (greek small letter eta)
14
- ["ι", "iota"], #U+03B7 (greek small letter eta)
15
- ["κ", "kappa"], #U+03BA (greek small letter kappa)
16
- ["λ", "lambda"], #U+03BB (greek small letter lambda)
17
- ["λ", "lamda"], #U+03BB (greek small letter lambda)
18
- ["μ", "mu"], #U+03BC (greek small letter mu)
19
- ["ν", "nu"], #U+03BD (greek small letter nu)
20
- ["ξ", "xi"], #U+03BE (greek small letter xi)
21
- ["ο", "omicron"], #U+03BF (greek small letter omicron)
22
- ["π", "pi"], #U+03C0 (greek small letter pi)
23
- ["ρ", "rho"], #U+03C1 (greek small letter rho)
24
- ["σ", "sigma"], #U+03C3 (greek small letter sigma)
25
- ["τ", "tau"], #U+03C4 (greek small letter tau)
26
- ["υ", "upsilon"], #U+03C5 (greek small letter upsilon)
27
- ["φ", "phi"], #U+03C6 (greek small letter phi)
28
- ["χ", "chi"], #U+03C7 (greek small letter chi)
29
- ["ψ", "psi"], #U+03C8 (greek small letter psi)
30
- ["ω", "omega"], #U+03C9 (greek small letter omega)
31
-
32
- ["Α", "Alpha"], #U+0391 (greek capital letter alpha)
33
- ["Β", "Beta"], #U+0392 (greek capital letter beta)
34
- ["Γ", "Gamma"], #U+0393 (greek capital letter gamma)
35
- ["Δ", "Delta"], #U+0394 (greek capital letter delta)
36
- ["Ε", "Epsilon"], #U+0395 (greek capital letter epsilon)
37
- ["Ζ", "Zeta"], #U+0396 (greek capital letter zeta)
38
- ["Η", "Eta"], #U+0397 (greek capital letter eta)
39
- ["Θ", "Theta"], #U+0398 (greek capital letter theta)
40
- ["Ι", "Iota"], #U+0399 (greek capital letter iota)
41
- ["Κ", "Kappa"], #U+039A (greek capital letter kappa)
42
- ["Λ", "Lambda"], #U+039B (greek capital letter lambda)
43
- ["Λ", "Lamda"], #U+039B (greek capital letter lambda)
44
- ["Μ", "Mu"], #U+039C (greek capital letter mu)
45
- ["Ν", "Nu"], #U+039D (greek capital letter nu)
46
- ["Ξ", "Xi"], #U+039E (greek capital letter xi)
47
- ["Ο", "Omicron"], #U+039F (greek capital letter omicron)
48
- ["Π", "Pi"], #U+03A0 (greek capital letter pi)
49
- ["Ρ", "Rho"], #U+03A1 (greek capital letter rho)
50
- ["Σ", "Sigma"], #U+03A3 (greek capital letter sigma)
51
- ["Τ", "Tau"], #U+03A4 (greek capital letter tau)
52
- ["Υ", "Upsilon"], #U+03A5 (greek capital letter upsilon)
53
- ["Φ", "Phi"], #U+03A6 (greek capital letter phi)
54
- ["Χ", "Chi"], #U+03A7 (greek capital letter chi)
55
- ["Ψ", "Psi"], #U+03A8 (greek capital letter Psi)
56
- ["Ω", "Omega"], #U+03A9 (greek capital letter omega)
57
-
58
- ["ϕ", "phi"], #U+03D5 (greek phi symbol)
59
-
60
- ["×", "x"], #U+00D7 (multiplication sign)
61
- ["•", "*"], #U+2022 (bullet)
62
- [" ", " "], #U+2009 (thin space)
63
- [" ", " "], #U+200A (hair space)
64
- [" ", " "], #U+00A0 (no-break space)
65
- [" ", " "], #U+3000 (ideographic space)
66
- ["", "-"], #U+2212 (minus sign)
67
- ["", "-"], #U+2013 (en dash)
68
- ["", "'"], #U+2032 (prime)
69
- ["", "'"], #U+2018 (left single quotation mark)
70
- ["", "'"], #U+2019 (right single quotation mark)
71
- ["", '"'], #U+201C (left double quotation mark)
72
- ["", '"'], #U+201D (right double quotation mark)
4
+ ["©", "(c)"], #U+00A9 (Copyright Sign)
5
+
6
+ ["α", "alpha"], #U+03B1 (greek small letter alpha)
7
+ ["β", "beta"], #U+03B2 (greek small letter beta)
8
+ ["γ", "gamma"], #U+03B3 (greek small letter gamma)
9
+ ["δ", "delta"], #U+03B4 (greek small letter delta)
10
+ ["ε", "epsilon"], #U+03B5 (greek small letter epsilon)
11
+ ["ζ", "zeta"], #U+03B6 (greek small letter zeta)
12
+ ["η", "eta"], #U+03B7 (greek small letter eta)
13
+ ["θ", "theta"], #U+03B7 (greek small letter eta)
14
+ ["ι", "iota"], #U+03B7 (greek small letter eta)
15
+ ["κ", "kappa"], #U+03BA (greek small letter kappa)
16
+ ["λ", "lambda"], #U+03BB (greek small letter lambda)
17
+ ["λ", "lamda"], #U+03BB (greek small letter lambda)
18
+ ["μ", "mu"], #U+03BC (greek small letter mu)
19
+ ["ν", "nu"], #U+03BD (greek small letter nu)
20
+ ["ξ", "xi"], #U+03BE (greek small letter xi)
21
+ ["ο", "omicron"], #U+03BF (greek small letter omicron)
22
+ ["π", "pi"], #U+03C0 (greek small letter pi)
23
+ ["ρ", "rho"], #U+03C1 (greek small letter rho)
24
+ ["σ", "sigma"], #U+03C3 (greek small letter sigma)
25
+ ["τ", "tau"], #U+03C4 (greek small letter tau)
26
+ ["υ", "upsilon"], #U+03C5 (greek small letter upsilon)
27
+ ["φ", "phi"], #U+03C6 (greek small letter phi)
28
+ ["χ", "chi"], #U+03C7 (greek small letter chi)
29
+ ["ψ", "psi"], #U+03C8 (greek small letter psi)
30
+ ["ω", "omega"], #U+03C9 (greek small letter omega)
31
+
32
+ ["Α", "Alpha"], #U+0391 (greek capital letter alpha)
33
+ ["Β", "Beta"], #U+0392 (greek capital letter beta)
34
+ ["Γ", "Gamma"], #U+0393 (greek capital letter gamma)
35
+ ["Δ", "Delta"], #U+0394 (greek capital letter delta)
36
+ ["Ε", "Epsilon"], #U+0395 (greek capital letter epsilon)
37
+ ["Ζ", "Zeta"], #U+0396 (greek capital letter zeta)
38
+ ["Η", "Eta"], #U+0397 (greek capital letter eta)
39
+ ["Θ", "Theta"], #U+0398 (greek capital letter theta)
40
+ ["Ι", "Iota"], #U+0399 (greek capital letter iota)
41
+ ["Κ", "Kappa"], #U+039A (greek capital letter kappa)
42
+ ["Λ", "Lambda"], #U+039B (greek capital letter lambda)
43
+ ["Λ", "Lamda"], #U+039B (greek capital letter lambda)
44
+ ["Μ", "Mu"], #U+039C (greek capital letter mu)
45
+ ["Ν", "Nu"], #U+039D (greek capital letter nu)
46
+ ["Ξ", "Xi"], #U+039E (greek capital letter xi)
47
+ ["Ο", "Omicron"], #U+039F (greek capital letter omicron)
48
+ ["Π", "Pi"], #U+03A0 (greek capital letter pi)
49
+ ["Ρ", "Rho"], #U+03A1 (greek capital letter rho)
50
+ ["Σ", "Sigma"], #U+03A3 (greek capital letter sigma)
51
+ ["Τ", "Tau"], #U+03A4 (greek capital letter tau)
52
+ ["Υ", "Upsilon"], #U+03A5 (greek capital letter upsilon)
53
+ ["Φ", "Phi"], #U+03A6 (greek capital letter phi)
54
+ ["Χ", "Chi"], #U+03A7 (greek capital letter chi)
55
+ ["Ψ", "Psi"], #U+03A8 (greek capital letter Psi)
56
+ ["Ω", "Omega"], #U+03A9 (greek capital letter omega)
57
+
58
+ ["ϕ", "phi"], #U+03D5 (greek phi symbol)
59
+
60
+ ["×", "x"], #U+00D7 (multiplication sign)
61
+ ["•", "*"], #U+2022 (bullet)
62
+ [" ", " "], #U+2009 (thin space)
63
+ [" ", " "], #U+200A (hair space)
64
+ [" ", " "], #U+00A0 (no-break space)
65
+ [" ", " "], #U+3000 (ideographic space)
66
+ ["", "-"], #U+2211 (Non-Breaking Hyphen)
67
+ ["", "-"], #U+2212 (minus sign)
68
+ ["", "-"], #U+2013 (en dash)
69
+ ["", "'"], #U+2032 (prime)
70
+ ["", "'"], #U+2018 (left single quotation mark)
71
+ ["", "'"], #U+2019 (right single quotation mark)
72
+ ["", '"'], #U+201C (left double quotation mark)
73
+ ["”", '"'], #U+201D (right double quotation mark)
73
74
  ['"', "''"]
74
- ]
75
+ ]
76
+
77
+
78
+ TextAlignment::PADDING_LETTERS = ['@', '^', '|', '#', '$', '%', '&', '_'] unless defined? TextAlignment::PADDING_LETTERS
79
+
80
+
81
+ class << TextAlignment
82
+ def single_character_mapping_preprocessing(_str1, _str2, _mappings = nil)
83
+ _mappings ||= TextAlignment::MAPPINGS
84
+
85
+ character_mappings = _mappings.select{|m| m[0].length == 1 && m[1].length == 1}
86
+ if character_mappings.empty?
87
+ [_str1, _str2, _mappings]
88
+ else
89
+ characters_from = character_mappings.collect{|m| m[0]}.join
90
+ characters_to = character_mappings.collect{|m| m[1]}.join
91
+ characters_to.gsub!(/-/, '\-')
92
+
93
+ str1 = _str1.tr(characters_from, characters_to)
94
+ str2 = _str2.tr(characters_from, characters_to)
95
+
96
+ mappings = _mappings.select{|m| m[0].length > 1 || m[1].length > 1}
97
+
98
+ [str1, str2, mappings]
99
+ end
100
+ end
101
+
102
+ def long_to_one_mapping_preprocessing(_str1, _str2, _mappings = nil)
103
+ _mappings ||= TextAlignment::MAPPINGS
104
+
105
+ long_to_one_mappings = _mappings.select{|m| m[0].length == 1 && m[1].length > 1}
106
+ if long_to_one_mappings.empty?
107
+ [_str1, _str2, _mappings]
108
+ else
109
+ ## long to one character mappings
110
+ pletters = TextAlignment::PADDING_LETTERS
111
+
112
+ # find the padding letter for str1
113
+ @padding_letter1 = begin
114
+ i = pletters.index{|l| _str2.index(l).nil?}
115
+ raise RuntimeError, "Could not find a padding letter for str1" if i.nil?
116
+ TextAlignment::PADDING_LETTERS[i]
117
+ end
118
+
119
+ # find the padding letter for str2
120
+ @padding_letter2 = begin
121
+ i = pletters.index{|l| l != @padding_letter1 && _str1.index(l).nil?}
122
+ raise RuntimeError, "Could not find a padding letter for str2" if i.nil?
123
+ TextAlignment::PADDING_LETTERS[i]
124
+ end
125
+
126
+ str1 = str2 = nil
127
+ long_to_one_mappings.each do |f|
128
+ from = f[1]
129
+
130
+ str1 = if _str2.index(f[0])
131
+ to = f[0] + (@padding_letter1 * (f[1].length - 1))
132
+ _str1.gsub(from, to)
133
+ else
134
+ _str1
135
+ end
136
+
137
+ str2 = if _str1.index(f[0])
138
+ to = f[0] + (@padding_letter2 * (f[1].length - 1))
139
+ _str2.gsub(from, to)
140
+ else
141
+ _str2
142
+ end
143
+ end
144
+ mappings = _mappings.select{|m| m[0].length > 1 || m[1].length == 1}
145
+
146
+ [str1, str2, mappings]
147
+ end
148
+ end
149
+
150
+ def compute_similarity(_s1, _s2, sdiff)
151
+ return 0 if sdiff.nil?
152
+
153
+ # compute the lcs only with non-whitespace letters
154
+ lcs = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/ && d.new_element =~ /\S/}
155
+ return 0 if lcs == 0
156
+
157
+ s1 = if @padding_letter1
158
+ _s1.tr(@padding_letter1, ' ')
159
+ else
160
+ _s1
161
+ end
162
+
163
+ s2 = if @padding_letter2
164
+ _s2.tr(@padding_letter2, ' ')
165
+ else
166
+ _s2
167
+ end
168
+
169
+ similarity = lcs.to_f / [s1.scan(/\S/).count, s2.scan(/\S/).count].min
170
+ end
171
+
172
+ end
@@ -17,10 +17,10 @@ class TextAlignment::MixedAlignment
17
17
  attr_reader :similarity
18
18
  attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
19
19
 
20
- def initialize(_str1, _str2)
20
+ def initialize(_str1, _str2, _mappings = nil)
21
21
  raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
22
22
 
23
- str1, str2, mappings = string_preprocessing(_str1, _str2)
23
+ str1, str2, mappings = TextAlignment::long_to_one_mapping_preprocessing(_str1, _str2, _mappings)
24
24
 
25
25
  _compute_mixed_alignment(str1, str2, mappings)
26
26
  end
@@ -63,7 +63,7 @@ class TextAlignment::MixedAlignment
63
63
  end
64
64
 
65
65
  cmp = TextAlignment::LCSComparison.new(str1, str2, lcs, @sdiff)
66
- @similarity = compute_similarity(str1, str2, @sdiff)
66
+ @similarity = TextAlignment::compute_similarity(str1, str2, @sdiff)
67
67
  @str1_match_initial = cmp.str1_match_initial
68
68
  @str1_match_final = cmp.str1_match_final
69
69
  @str2_match_initial = cmp.str2_match_initial
@@ -139,72 +139,4 @@ class TextAlignment::MixedAlignment
139
139
  @position_map_end = posmap_end.sort.to_h
140
140
  end
141
141
 
142
- private
143
-
144
- def string_preprocessing(_str1, _str2)
145
- str1 = _str1.dup
146
- str2 = _str2.dup
147
- mappings = TextAlignment::MAPPINGS.dup
148
-
149
- ## single character mappings
150
- character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
151
- characters_from = character_mappings.collect{|m| m[0]}.join
152
- characters_to = character_mappings.collect{|m| m[1]}.join
153
- characters_to.gsub!(/-/, '\-')
154
-
155
- str1.tr!(characters_from, characters_to)
156
- str2.tr!(characters_from, characters_to)
157
-
158
- mappings.delete_if{|m| m[0].length == 1 && m[1].length == 1}
159
-
160
- ## long to one character mappings
161
- pletters = TextAlignment::PADDING_LETTERS
162
-
163
- # find the padding letter for str1
164
- @padding_letter1 = begin
165
- i = pletters.index{|l| str2.index(l).nil?}
166
- raise RuntimeError, "Could not find a padding letter for str1" if i.nil?
167
- TextAlignment::PADDING_LETTERS[i]
168
- end
169
-
170
- # find the padding letter for str2
171
- @padding_letter2 = begin
172
- i = pletters.index{|l| l != @padding_letter1 && str1.index(l).nil?}
173
- raise RuntimeError, "Could not find a padding letter for str2" if i.nil?
174
- TextAlignment::PADDING_LETTERS[i]
175
- end
176
-
177
- # ASCII foldings
178
- ascii_foldings = mappings.select{|m| m[0].length == 1 && m[1].length > 1}
179
- ascii_foldings.each do |f|
180
- from = f[1]
181
-
182
- if str2.index(f[0])
183
- to = f[0] + (@padding_letter1 * (f[1].length - 1))
184
- str1.gsub!(from, to)
185
- end
186
-
187
- if str1.index(f[0])
188
- to = f[0] + (@padding_letter2 * (f[1].length - 1))
189
- str2.gsub!(from, to)
190
- end
191
- end
192
- mappings.delete_if{|m| m[0].length == 1 && m[1].length > 1}
193
-
194
- [str1, str2, mappings]
195
- end
196
-
197
- def compute_similarity(_s1, _s2, sdiff)
198
- return 0 if sdiff.nil?
199
-
200
- # compute the lcs only with non-whitespace letters
201
- lcs = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/ && d.new_element =~ /\S/}
202
- return 0 if lcs == 0
203
-
204
- s1 = _s1.tr(@padding_letter1, ' ')
205
- s2 = _s2.tr(@padding_letter2, ' ')
206
-
207
- similarity = lcs / [s1.scan(/\S/).count, s2.scan(/\S/).count].min.to_f
208
- end
209
-
210
142
  end
@@ -5,50 +5,44 @@ require 'text_alignment/mixed_alignment'
5
5
 
6
6
  module TextAlignment; end unless defined? TextAlignment
7
7
 
8
- TextAlignment::PADDING_LETTERS = ['@', '^', '|', '#', '$', '%', '&', '_'] unless defined? TextAlignment::PADDING_LETTERS
9
-
10
8
  class TextAlignment::TextAlignment
11
9
  attr_reader :block_alignment
12
10
  attr_reader :similarity
13
11
  attr_reader :lost_annotations
14
12
 
15
- def initialize(str1, str2, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
16
- raise ArgumentError, "nil string" if str1.nil? || str2.nil?
13
+ def initialize(_str1, _str2, denotations = nil, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
14
+ raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
17
15
 
18
- @block_alignment = {source_text:str1, target_text:str2}
16
+ @block_alignment = {source_text:_str1, target_text:_str2}
17
+ @original_str1 = _str1
18
+ @original_str2 = _str2
19
19
 
20
- # try exact match
21
- block_begin = str2.index(str1)
22
- unless block_begin.nil?
23
- @block_alignment[:blocks] = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
24
- return @block_alignment
25
- end
20
+ str1, str2, @mappings = TextAlignment::single_character_mapping_preprocessing(_str1, _str2)
26
21
 
27
- # try exact match
28
- block_begin = str2.downcase.index(str1.downcase)
29
- unless block_begin.nil?
30
- @block_alignment[:blocks] = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
31
- return @block_alignment
22
+ if r = whole_block_alignment(str1, str2)
23
+ @block_alignment[:blocks] = r
24
+ return
32
25
  end
33
26
 
27
+ ## to find block alignments
34
28
  anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, _size_ngram, _size_window, _text_similiarity_threshold)
35
29
 
36
- # To collect matched blocks
37
- mblocks = []
38
- while anchor = anchor_finder.get_next_anchor
39
- last = mblocks.last
40
- if last && (anchor[:source][:begin] == last[:source][:end] + 1) && (anchor[:target][:begin] == last[:target][:end] + 1)
41
- last[:source][:end] = anchor[:source][:end]
42
- last[:target][:end] = anchor[:target][:end]
30
+ blocks = []
31
+ while block = anchor_finder.get_next_anchor
32
+ last = blocks.last
33
+ if last && (block[:source][:begin] == last[:source][:end] + 1) && (block[:target][:begin] == last[:target][:end] + 1)
34
+ last[:source][:end] = block[:source][:end]
35
+ last[:target][:end] = block[:target][:end]
43
36
  else
44
- mblocks << anchor
37
+ blocks << block.merge(alignment: :block, delta: block[:target][:begin] - block[:source][:begin])
45
38
  end
46
39
  end
47
40
 
48
- # pp mblocks
41
+ # pp blocks
49
42
  # puts "-----"
50
43
  # puts
51
- # mblocks.each do |b|
44
+ # exit
45
+ # blocks.each do |b|
52
46
  # p [b[:source], b[:target]]
53
47
  # puts "---"
54
48
  # puts str1[b[:source][:begin] ... b[:source][:end]]
@@ -60,114 +54,218 @@ class TextAlignment::TextAlignment
60
54
  # puts "-=-=-=-=-"
61
55
  # puts
62
56
 
63
- ## To find block alignments
64
- @block_alignment[:blocks] = []
65
- return if mblocks.empty?
66
-
67
- # Initial step
68
- if mblocks[0][:source][:begin] > 0
69
- e1 = mblocks[0][:source][:begin]
70
- e2 = mblocks[0][:target][:begin]
57
+ ## to fill the gaps
58
+ last_block = nil
59
+ blocks2 = blocks.inject([]) do |sum, block|
60
+ b1 = last_block ? last_block[:source][:end] : 0
61
+ e1 = block[:source][:begin]
71
62
 
72
- if mblocks[0][:target][:begin] == 0
73
- @block_alignment[:blocks] << {source:{begin:0, end:e1}, target:{begin:0, end:0}, alignment: :empty}
63
+ sum += if b1 == e1
64
+ [block]
74
65
  else
75
- _str1 = str1[0 ... e1]
76
- _str2 = str2[0 ... e2]
66
+ b2 = last_block ? last_block[:target][:end] : 0
67
+ e2 = block[:target][:begin]
68
+
69
+ if b2 == e2
70
+ [
71
+ {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty},
72
+ block
73
+ ]
74
+ else
75
+ if b1 == 0 && b2 == 0
76
+ len_buffer = (e1 * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
77
+ b2 = e2 - len_buffer if e2 > len_buffer
78
+ end
77
79
 
78
- unless _str1.strip.empty?
79
- if _str2.strip.empty?
80
- @block_alignment[:blocks] << {source:{begin:0, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
80
+ _str1 = str1[b1 ... e1]
81
+ _str2 = str2[b2 ... e2]
82
+
83
+ if _str1.strip.empty? || _str2.strip.empty?
84
+ [
85
+ {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty},
86
+ block
87
+ ]
81
88
  else
82
- len_min = [_str1.length, _str2.length].min
83
- len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
84
- b1 = _str1.length < len_buffer ? 0 : e1 - len_buffer
85
- b2 = _str2.length < len_buffer ? 0 : e2 - len_buffer
86
-
87
- @block_alignment[:blocks] << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
88
-
89
- _str1 = str1[b1 ... e1]
90
- _str2 = str2[b2 ... e2]
91
- alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase)
92
- if alignment.similarity < 0.5
93
- @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty, similarity: alignment.similarity}
94
- else
95
- @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment, similarity: alignment.similarity}
96
- end
89
+ local_alignment_blocks(str1, b1, e1, str2, b2, e2, denotations) << block
97
90
  end
98
91
  end
99
92
  end
93
+
94
+ last_block = block
95
+ sum
100
96
  end
101
- @block_alignment[:blocks] << mblocks[0].merge(alignment: :block)
102
-
103
- (1 ... mblocks.length).each do |i|
104
- b1 = mblocks[i - 1][:source][:end]
105
- b2 = mblocks[i - 1][:target][:end]
106
- e1 = mblocks[i][:source][:begin]
107
- e2 = mblocks[i][:target][:begin]
108
- _str1 = str1[b1 ... e1]
109
- _str2 = str2[b2 ... e2]
110
- unless _str1.strip.empty?
111
- if _str2.strip.empty?
112
- @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
97
+
98
+ # the last step
99
+ blocks2 += if last_block.nil?
100
+ local_alignment_blocks(str1, 0, str1.length, str2, 0, str2.length, denotations)
101
+ else
102
+ b1 = last_block[:source][:end]
103
+ if b1 < str1.length
104
+ e1 = str1.length
105
+
106
+ b2 = last_block[:target][:end]
107
+ if b2 < str2.length
108
+ len_buffer = ((e1 - b1) * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
109
+ e2 = (str2.length - b2) > len_buffer ? b2 + len_buffer : str2.length
110
+ local_alignment_blocks(str1, b1, e1, str2, b2, e2, denotations)
113
111
  else
114
- alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase)
115
- if alignment.similarity < 0.5
116
- @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty, similarity: alignment.similarity}
117
- else
118
- @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment, similarity: alignment.similarity}
112
+ [{source:{begin:last_block[:source][:end], end:str1.length}, alignment: :empty}]
113
+ end
114
+ else
115
+ []
116
+ end
117
+ end
118
+
119
+ @block_alignment[:blocks] = blocks2
120
+ end
121
+
122
+ def whole_block_alignment(str1, str2)
123
+ ## Block exact match
124
+ block_begin = str2.index(str1)
125
+ unless block_begin.nil?
126
+ return [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
127
+ end
128
+
129
+ block_begin = str2.downcase.index(str1.downcase)
130
+ unless block_begin.nil?
131
+ return [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
132
+ end
133
+
134
+ nil
135
+ end
136
+
137
+ def local_alignment_blocks(str1, b1, e1, str2, b2, e2, denotations = nil)
138
+ block2 = str2[b2 ... e2]
139
+
140
+ ## term-based alignment
141
+ tblocks = if denotations
142
+ ds_in_scope = denotations.select{|d| d[:span][:begin] >= b1 && d[:span][:end] <= e1}.
143
+ sort{|d1, d2| d1[:span][:begin] <=> d2[:span][:begin] || d2[:span][:end] <=> d1[:span][:end] }.
144
+ map{|d| d.merge(lex:str1[d[:span][:begin] ... d[:span][:end]])}
145
+
146
+ position = 0
147
+ tblocks = ds_in_scope.map do |term|
148
+ lex = term[:lex]
149
+ r = block2.index(lex, position)
150
+ if r.nil?
151
+ position = nil
152
+ break
153
+ end
154
+ position = r + lex.length
155
+ {source:term[:span], target:{begin:r + b2, end:r + b2 + lex.length}, alignment: :term, delta: r - term[:span][:begin]}
156
+ end
157
+
158
+ # missing term found
159
+ tblocks = [] if position.nil?
160
+
161
+ # redundant matching found
162
+ unless position.nil?
163
+ ds_in_scope.each do |term|
164
+ lex = term[:lex]
165
+ look_forward = block2.index(lex, position)
166
+ unless look_forward.nil?
167
+ tblocks = []
168
+ break
119
169
  end
120
170
  end
121
171
  end
122
- @block_alignment[:blocks] << mblocks[i].merge(alignment: :block)
172
+
173
+ tblocks
174
+ else
175
+ []
123
176
  end
124
177
 
125
- # Final step
126
- if mblocks[-1][:source][:end] < str1.length && mblocks[-1][:target][:end] < str2.length
127
- b1 = mblocks[-1][:source][:end]
128
- b2 = mblocks[-1][:target][:end]
129
- _str1 = str1[b1 ... str1.length]
130
- _str2 = str2[b2 ... str2.length]
178
+ if tblocks.empty?
179
+ if b1 == 0 && e1 == str1.length
180
+ if (e1 > 2000) || (e2 > 2000)
181
+ [{source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}]
182
+ else
183
+ block1 = str1[b1 ... e1]
184
+ block2 = str2[b2 ... e2]
131
185
 
132
- unless _str1.strip.empty?
133
- if _str2.strip.empty?
134
- @block_alignment[:blocks] << {source:{begin:b1, end:str1.length}, target:{begin:b2, end:str2.length}, alignment: :empty}
186
+ ## character-based alignment
187
+ alignment = TextAlignment::MixedAlignment.new(block1.downcase, block2.downcase, @mappings)
188
+ if alignment.sdiff.nil?
189
+ [{source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}]
190
+ else
191
+ [{source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: alignment, similarity: alignment.similarity}]
192
+ end
193
+ end
194
+ else
195
+ block1 = str1[b1 ... e1]
196
+ block2 = str2[b2 ... e2]
197
+
198
+ ## character-based alignment
199
+ alignment = TextAlignment::MixedAlignment.new(block1.downcase, block2.downcase, @mappings)
200
+ if alignment.sdiff.nil?
201
+ [{source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}]
135
202
  else
136
- len_min = [_str1.length, _str2.length].min
137
- len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
138
- e1 = _str1.length < len_buffer ? str1.length : b1 + len_buffer
139
- e2 = _str2.length < len_buffer ? str2.length : b2 + len_buffer
140
- _str1 = str1[b1 ... e1]
141
- _str2 = str2[b2 ... e2]
203
+ [{source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: alignment, similarity: alignment.similarity}]
204
+ end
205
+ end
206
+ else
207
+ last_tblock = nil
208
+ lblocks = tblocks.inject([]) do |sum, tblock|
209
+ tb1 = last_tblock ? last_tblock[:source][:end] : b1
210
+ te1 = tblock[:source][:begin]
142
211
 
143
- alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase)
144
- if alignment.similarity < 0.5
145
- @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty, similarity: alignment.similarity}
212
+ sum += if te1 == tb1
213
+ [tblock]
214
+ else
215
+ tb2 = last_tblock ? last_tblock[:target][:end] : b2
216
+ te2 = tblock[:target][:begin]
217
+
218
+ if b2 == e2
219
+ [
220
+ {source:{begin:tb1, end:te1}, alignment: :empty},
221
+ tblock
222
+ ]
146
223
  else
147
- @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment, similarity: alignment.similarity}
224
+ [
225
+ {source:{begin:tb1, end:te1}, target:{begin:tb2, end:te2}, alignment: :empty},
226
+ tblock
227
+ ]
148
228
  end
229
+ end
230
+
231
+ last_tblock = tblock
232
+ sum
233
+ end
149
234
 
150
- @block_alignment[:blocks] << {source:{begin:e1, end:-1}, target:{begin:e2, end:-1}, alignment: :empty} if e1 < str1.length
235
+ if last_tblock[:source][:end] < e1
236
+ if last_tblock[:target][:end] < e2
237
+ lblocks << {source:{begin:last_tblock[:source][:end], end:e1}, target:{begin:last_tblock[:target][:end], end:e2}, alignment: :empty}
238
+ else
239
+ lblocks << {source:{begin:last_tblock[:source][:end], end:e1}, alignment: :empty}
151
240
  end
152
241
  end
153
- end
154
242
 
155
- @block_alignment[:blocks].each do |a|
156
- a[:delta] = a[:target][:begin] - a[:source][:begin]
243
+ lblocks
157
244
  end
158
245
  end
159
246
 
247
+
248
+ def indices(str, target)
249
+ position = 0
250
+ len = target.len
251
+ Enumerator.new do |yielder|
252
+ while idx = str.index(target, position)
253
+ yielder << idx
254
+ position = idx + len
255
+ end
256
+ end
257
+ end
258
+
160
259
  def transform_begin_position(begin_position)
161
260
  i = @block_alignment[:blocks].index{|b| b[:source][:end] > begin_position}
162
261
  block = @block_alignment[:blocks][i]
163
262
 
164
- b = if block[:alignment] == :block
263
+ b = if block[:alignment] == :block || block[:alignment] == :term
165
264
  begin_position + block[:delta]
166
265
  elsif block[:alignment] == :empty
167
266
  if begin_position == block[:source][:begin]
168
267
  block[:target][:begin]
169
268
  else
170
- # raise "lost annotation"
171
269
  nil
172
270
  end
173
271
  else
@@ -180,13 +278,12 @@ class TextAlignment::TextAlignment
180
278
  i = @block_alignment[:blocks].index{|b| b[:source][:end] >= end_position}
181
279
  block = @block_alignment[:blocks][i]
182
280
 
183
- e = if block[:alignment] == :block
281
+ e = if block[:alignment] == :block || block[:alignment] == :term
184
282
  end_position + block[:delta]
185
283
  elsif block[:alignment] == :empty
186
284
  if end_position == block[:source][:end]
187
285
  block[:target][:end]
188
286
  else
189
- # raise "lost annotation"
190
287
  nil
191
288
  end
192
289
  else
@@ -208,14 +305,14 @@ class TextAlignment::TextAlignment
208
305
  @lost_annotations = []
209
306
 
210
307
  denotations.each do |d|
211
- begin
212
- d.begin = transform_begin_position(d.begin);
213
- d.end = transform_end_position(d.end);
214
- rescue
215
- @lost_annotations << d
216
- d.begin = nil
217
- d.end = nil
218
- end
308
+ source = {begin:d.begin, end:d.end}
309
+ d.begin = transform_begin_position(d.begin);
310
+ d.end = transform_end_position(d.end);
311
+ raise "invalid transform" unless !d.begin.nil? && !d.end.nil? && d.begin >= 0 && d.end > d.begin && d.end <= @original_str2.length
312
+ rescue
313
+ @lost_annotations << {source: source, target:{begin:d.begin, end:d.end}}
314
+ d.begin = nil
315
+ d.end = nil
219
316
  end
220
317
 
221
318
  @lost_annotations
@@ -226,12 +323,12 @@ class TextAlignment::TextAlignment
226
323
  @lost_annotations = []
227
324
 
228
325
  r = hdenotations.collect do |d|
229
- new_d = begin
230
- d.dup.merge({span:transform_a_span(d[:span])})
231
- rescue
232
- @lost_annotations << d
233
- nil
234
- end
326
+ t = transform_a_span(d[:span])
327
+ raise "invalid transform" unless !t[:begin].nil? && !t[:end].nil? && t[:begin] >= 0 && t[:end] > t[:begin] && t[:end] <= @original_str2.length
328
+ new_d = d.dup.merge({span:t})
329
+ rescue
330
+ @lost_annotations << {source: d[:span], target:t}
331
+ nil
235
332
  end.compact
236
333
 
237
334
  r
@@ -245,14 +342,22 @@ class TextAlignment::TextAlignment
245
342
  @block_alignment[:blocks].each do |a|
246
343
  show += case a[:alignment]
247
344
  when :block
248
- "===== common ===== [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
345
+ "===== common (block) ===== [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
346
+ stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
347
+ when :term
348
+ "===== common (term) ===== [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
249
349
  stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
250
350
  when :empty
251
351
  "xxxxx disparate texts (similarity: #{a[:similarity]})\n" +
252
352
  "<<<<< string 1 [#{a[:source][:begin]} - #{a[:source][:end]}]\n" +
253
353
  stext[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
254
- ">>>>> string 2 [#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
255
- ttext[a[:target][:begin] ... a[:target][:end]] + "\n\n"
354
+ ">>>>> string 2 " +
355
+ if a[:target]
356
+ "[#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
357
+ ttext[a[:target][:begin] ... a[:target][:end]] + "\n\n"
358
+ else
359
+ "[-]\n\n"
360
+ end
256
361
  else
257
362
  astr1 = ''
258
363
  astr2 = ''
@@ -292,5 +397,4 @@ class TextAlignment::TextAlignment
292
397
  end
293
398
  show
294
399
  end
295
-
296
400
  end
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.6.4'
2
+ VERSION = '0.8.1'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.4
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-06 00:00:00.000000000 Z
11
+ date: 2020-10-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary