text_alignment 0.3.24 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fd59fb1ad977d3286f358d8c08315824e86d99cc6f9d72814adb760f2e680107
4
- data.tar.gz: baafd1b76f6c6447a5763ff731b77ad07e449fc87ef94abed74a978376f6334e
3
+ metadata.gz: 041aa1d92ea6bb54ca9fd005d0a8684b8c012b0cb55e5ea00d54be357eae646c
4
+ data.tar.gz: f821ae66f4c64eb7043ec85515694c207510be860568cc86fc47b89c8e504f87
5
5
  SHA512:
6
- metadata.gz: d417878396803e1169a24fae67a9a4b0d4e84948d0c6bc678626641d6f1b6ac1fe16c94e9f07ee747b98f83e4305fa553220a607475363378f32fac4d43a65c7
7
- data.tar.gz: efaf3640a67be46dddcf7dda9d819cbcb47828a3966d305887b6024eee6ba517a597e9303790f584264b8995f69671d90bdc0c7883fc9244f3b1746695960bf8
6
+ metadata.gz: 9bf264a8789a2630e6a820b0a0833854f6b03de802d78ea60b5849e5ee6ceb0119494221fadb2220ad27edb92f44711530ef840c05c044396402e2227f71c004
7
+ data.tar.gz: c4a95752b186092d2acc48dbdbeedde6ca952da02dcf206c95304f11f6d0c433dd2ad25f50729dceb5610fee438f9ca4df7171db66af7f2904a247ba50105149
@@ -35,6 +35,10 @@ def align_mdoc(source_annotations, target_annotations)
35
35
  source_annotations.each do |annotations|
36
36
  alignment = TextAlignment::TextAlignment.new(annotations[:text], target_annotations[:text])
37
37
 
38
+ puts alignment.alignment_show
39
+ puts "-----"
40
+ puts
41
+
38
42
  # alignment.block_alignments.each do |a|
39
43
  # p {source:a[:source], target:a[:target]}
40
44
  # puts "--"
@@ -103,65 +107,56 @@ target_annotations = if source_annotations.class == Array
103
107
  else
104
108
  alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
105
109
 
106
- pp alignment
110
+ # pp alignment
107
111
 
108
112
  # verification
109
- source_text = source_annotations[:text]
110
- puts "=====BEGIN"
111
- (0 ... source_text.rstrip.length).each do |p|
112
- t = alignment.transform_begin_position(p)
113
- if t.nil?
114
- print source_text[p]
115
- else
116
- print '.'
117
- end
118
- end
119
- puts
120
- puts "=====END"
121
-
122
- puts "=====BEGIN"
123
- (0 .. source_text.rstrip.length).each do |p|
124
- t = alignment.transform_end_position(p)
125
- if t.nil?
126
- print source_text[p]
127
- else
128
- print '.'
129
- end
130
- end
131
- puts
132
- puts "=====END"
133
-
134
- # alignment.block_alignments.each do |a|
135
- # if a[:alignment].nil? || a[:alignment] == :empty
136
- # # p [a[:source], a[:target]]
137
- # # p a[:alignment]
113
+ # source_text = source_annotations[:text]
114
+ # puts "=====BEGIN"
115
+ # (0 ... source_text.rstrip.length).each do |p|
116
+ # t = alignment.transform_begin_position(p)
117
+ # if t.nil?
118
+ # print source_text[p]
138
119
  # else
139
- # p [a[:source], a[:target]]
140
- # p a[:alignment].similarity
141
- # puts "--"
142
- # puts source_annotations[:text][a[:source][:begin] ... a[:source][:end]]
143
- # puts "--"
144
- # puts target_text[a[:target][:begin] ... a[:target][:end]]
145
- # puts "======"
120
+ # print '.'
146
121
  # end
147
122
  # end
123
+ # puts
124
+ # puts "=====END"
125
+
126
+ # puts "=====BEGIN"
127
+ # (0 .. source_text.rstrip.length).each do |p|
128
+ # t = alignment.transform_end_position(p)
129
+ # if t.nil?
130
+ # print source_text[p]
131
+ # else
132
+ # print '.'
133
+ # end
134
+ # end
135
+ # puts
136
+ # puts "=====END"
137
+
138
+ source_text = source_annotations[:text]
139
+
140
+ # puts "[block alignment]"
141
+ puts alignment.alignment_show
142
+ # puts "====="
148
143
  # exit
149
144
 
150
145
  # verification of source denotations
151
- puts "[Invalid source denotations]"
152
- source_annotations[:denotations] do |d|
153
- p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
154
- end
155
- puts "====="
156
- puts
146
+ # puts "[Invalid source denotations]"
147
+ # source_annotations[:denotations] do |d|
148
+ # p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
149
+ # end
150
+ # puts "====="
151
+ # puts
157
152
 
158
153
  denotations = alignment.transform_hdenotations(source_annotations[:denotations])
159
- puts "[Invalid transformation]"
160
- denotations.each do |d|
161
- p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < target_text.length
162
- end
163
- puts "====="
164
- puts
154
+ # puts "[Invalid transformation]"
155
+ # denotations.each do |d|
156
+ # p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < target_text.length
157
+ # end
158
+ # puts "====="
159
+ # puts
165
160
 
166
161
  lost_annotations += alignment.lost_annotations if alignment.lost_annotations
167
162
 
@@ -1,17 +1,15 @@
1
1
  #!/usr/bin/env ruby
2
+ require 'text_alignment/constants'
2
3
  require 'string-similarity'
3
4
 
4
5
  module TextAlignment; end unless defined? TextAlignment
5
6
 
6
- TextAlignment::SIZE_NGRAM = 8 unless defined? TextAlignment::SIZE_NGRAM
7
- TextAlignment::SIZE_WINDOW = 40 unless defined? TextAlignment::SIZE_WINDOW
8
- TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.8 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
9
-
10
7
  class TextAlignment::AnchorFinder
11
8
 
12
- def initialize(source_str, target_str, _size_ngram = nil, _size_window = nil)
9
+ def initialize(source_str, target_str, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
13
10
  @size_ngram = _size_ngram || TextAlignment::SIZE_NGRAM
14
11
  @size_window = _size_window || TextAlignment::SIZE_WINDOW
12
+ @sim_threshold = _text_similiarity_threshold || TextAlignment::TEXT_SIMILARITY_THRESHOLD
15
13
 
16
14
  @reverse = (target_str.length < source_str.length)
17
15
 
@@ -43,10 +41,10 @@ class TextAlignment::AnchorFinder
43
41
  break if @beg_s1 > 0 && @beg_s2 > 0 && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
44
42
 
45
43
  left_window_s1, left_window_s2 = get_left_windows
46
- break if left_window_s1 && (text_similarity(left_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD)
44
+ break if left_window_s1 && (text_similarity(left_window_s1, left_window_s2) > @sim_threshold)
47
45
 
48
46
  right_window_s1, right_window_s2 = get_right_windows
49
- break if right_window_s2 && (text_similarity(right_window_s1, right_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD)
47
+ break if right_window_s2 && (text_similarity(right_window_s1, right_window_s2) > @sim_threshold)
50
48
 
51
49
  search_position = @beg_s2 + 1
52
50
  end
@@ -1,13 +1,11 @@
1
1
  #!/usr/bin/env ruby
2
+ require 'text_alignment/constants'
2
3
  require 'string-similarity'
3
4
 
4
5
  module TextAlignment; end unless defined? TextAlignment
5
6
 
6
7
  # approximate the location of str1 in str2
7
- TextAlignment::SIGNATURE_NGRAM = 7 unless defined? TextAlignment::SIGNATURE_NGRAM
8
8
  TextAlignment::MIN_LENGTH_FOR_APPROXIMATION = 50 unless defined? TextAlignment::MIN_LENGTH_FOR_APPROXIMATION
9
- TextAlignment::BUFFER_RATE = 0.1 unless defined? TextAlignment::BUFFER_RATE
10
- TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.7 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
11
9
 
12
10
  class << TextAlignment
13
11
 
@@ -16,8 +14,8 @@ class << TextAlignment
16
14
  raise ArgumentError, 'nil string' if str1.nil? || str2.nil?
17
15
  return 0, str2.length if str2.length < TextAlignment::MIN_LENGTH_FOR_APPROXIMATION
18
16
 
19
- ngram1 = (0 .. str1.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str1[i, TextAlignment::SIGNATURE_NGRAM]}
20
- ngram2 = (0 .. str2.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str2[i, TextAlignment::SIGNATURE_NGRAM]}
17
+ ngram1 = (0 .. str1.length - TextAlignment::SIZE_NGRAM).collect{|i| str1[i, TextAlignment::SIZE_NGRAM]}
18
+ ngram2 = (0 .. str2.length - TextAlignment::SIZE_NGRAM).collect{|i| str2[i, TextAlignment::SIZE_NGRAM]}
21
19
  ngram_shared = ngram1 & ngram2
22
20
 
23
21
  # If there is no shared n-gram found, it may mean there is no serious overlap between the two strings
@@ -45,7 +43,7 @@ class << TextAlignment
45
43
  text_similarity = text_similarity(str1, str2[fit_begin ... fit_end])
46
44
  cache["#{fit_begin}-#{fit_end}"] = text_similarity
47
45
 
48
- break if text_similarity > TextAlignment::TEXT_SIMILARITY_TRESHOLD
46
+ break if text_similarity > TextAlignment::TEXT_SIMILARITY_THRESHOLD
49
47
  fit_begin, fit_end = nil, nil
50
48
  end
51
49
  return fit_begin, fit_end if fit_begin && fit_end && fit_begin < fit_end
@@ -0,0 +1,7 @@
1
+ module TextAlignment; end unless defined? TextAlignment
2
+
3
+ TextAlignment::SIZE_NGRAM = 8 unless defined? TextAlignment::SIZE_NGRAM
4
+ TextAlignment::SIZE_WINDOW = 60 unless defined? TextAlignment::SIZE_WINDOW
5
+ TextAlignment::BUFFER_RATE = 0.1 unless defined? TextAlignment::BUFFER_RATE
6
+ TextAlignment::BUFFER_MIN = 20 unless defined? TextAlignment::BUFFER_MIN
7
+ TextAlignment::TEXT_SIMILARITY_THRESHOLD = 0.9 unless defined? TextAlignment::TEXT_SIMILARITY_THRESHOLD
@@ -9,8 +9,6 @@ require 'text_alignment/mappings'
9
9
 
10
10
  module TextAlignment; end unless defined? TextAlignment
11
11
 
12
- TextAlignment::SIGNATURE_NGRAM = 5 unless defined? TextAlignment::SIGNATURE_NGRAM
13
-
14
12
  class TextAlignment::GLCSTextAlignment
15
13
  attr_reader :position_map_begin, :position_map_end
16
14
  attr_reader :common_elements, :mapped_elements
@@ -10,8 +10,6 @@ require 'text_alignment/mappings'
10
10
 
11
11
  module TextAlignment; end unless defined? TextAlignment
12
12
 
13
- TextAlignment::NOMATCH_CHARS = "@^|#$%&_" unless defined? TextAlignment::NOMATCH_CHARS
14
-
15
13
  class TextAlignment::MixedAlignment
16
14
  attr_reader :sdiff
17
15
  attr_reader :position_map_begin, :position_map_end
@@ -21,58 +19,7 @@ class TextAlignment::MixedAlignment
21
19
 
22
20
  def initialize(str1, str2, mappings = [])
23
21
  raise ArgumentError, "nil string" if str1.nil? || str2.nil?
24
- raise ArgumentError, "nil mappings" if mappings.nil?
25
-
26
- ## preprocessing
27
- str1 = str1.dup
28
- str2 = str2.dup
29
- mappings = mappings.dup
30
-
31
- ## find the first nomatch character
32
- TextAlignment::NOMATCH_CHARS.each_char do |c|
33
- if str2.index(c).nil?
34
- @nomatch_char1 = c
35
- break
36
- end
37
- end
38
- raise RuntimeError, "Cannot find nomatch character" if @nomatch_char1.nil?
39
-
40
- ## find the first nomatch character
41
- TextAlignment::NOMATCH_CHARS.each_char do |c|
42
- if c != @nomatch_char1 && str1.index(c).nil?
43
- @nomatch_char2 = c
44
- break
45
- end
46
- end
47
- raise RuntimeError, "Cannot find nomatch character" if @nomatch_char2.nil?
48
-
49
- # single character mappings
50
- character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
51
- characters_from = character_mappings.collect{|m| m[0]}.join
52
- characters_to = character_mappings.collect{|m| m[1]}.join
53
- characters_to.gsub!(/-/, '\-')
54
-
55
- str1.tr!(characters_from, characters_to)
56
- str2.tr!(characters_from, characters_to)
57
-
58
- mappings.delete_if{|m| m[0].length == 1 && m[1].length == 1}
59
-
60
- # ASCII foldings
61
- ascii_foldings = mappings.select{|m| m[0].length == 1 && m[1].length > 1}
62
- ascii_foldings.each do |f|
63
- from = f[1]
64
-
65
- if str2.index(f[0])
66
- to = f[0] + (@nomatch_char1 * (f[1].length - 1))
67
- str1.gsub!(from, to)
68
- end
69
-
70
- if str1.index(f[0])
71
- to = f[0] + (@nomatch_char2 * (f[1].length - 1))
72
- str2.gsub!(from, to)
73
- end
74
- end
75
- mappings.delete_if{|m| m[0].length == 1 && m[1].length > 1}
22
+ mappings ||= []
76
23
 
77
24
  _compute_mixed_alignment(str1, str2, mappings)
78
25
  end
@@ -1,32 +1,40 @@
1
1
  #!/usr/bin/env ruby
2
+ require 'text_alignment/constants'
2
3
  require 'text_alignment/anchor_finder'
3
4
  require 'text_alignment/mixed_alignment'
4
5
 
5
6
  module TextAlignment; end unless defined? TextAlignment
6
7
 
7
- TextAlignment::SIGNATURE_NGRAM = 7 unless defined? TextAlignment::SIGNATURE_NGRAM
8
- TextAlignment::BUFFER_RATE = 0.1 unless defined? TextAlignment::BUFFER_RATE
9
- TextAlignment::BUFFER_MIN = 20 unless defined? TextAlignment::BUFFER_MIN
10
-
8
+ TextAlignment::PADDING_LETTERS = ['@', '^', '|', '#', '$', '%', '&', '_'] unless defined? TextAlignment::PADDING_LETTERS
11
9
 
12
10
  class TextAlignment::TextAlignment
13
11
  attr_reader :block_alignments
14
12
  attr_reader :similarity
15
13
  attr_reader :lost_annotations
16
14
 
17
- def initialize(str1, str2, mappings = nil)
18
- raise ArgumentError, "nil string" if str1.nil? || str2.nil?
15
+ def initialize(_str1, _str2, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
16
+ raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
17
+
18
+ @ostr1 = _str1
19
+ @ostr2 = _str2
19
20
 
20
- mappings ||= TextAlignment::MAPPINGS
21
+ str1, str2, mappings = string_preprocessing(_str1, _str2)
21
22
 
22
23
  # try exact match
23
24
  block_begin = str2.index(str1)
24
25
  unless block_begin.nil?
25
- @block_alignments = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin}]
26
+ @block_alignments = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
27
+ return @block_alignments
28
+ end
29
+
30
+ # try exact match
31
+ block_begin = str2.downcase.index(str1.downcase)
32
+ unless block_begin.nil?
33
+ @block_alignments = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
26
34
  return @block_alignments
27
35
  end
28
36
 
29
- anchor_finder = TextAlignment::AnchorFinder.new(str1, str2)
37
+ anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, _size_ngram, _size_window, _text_similiarity_threshold)
30
38
 
31
39
  # To collect matched blocks
32
40
  mblocks = []
@@ -93,7 +101,7 @@ class TextAlignment::TextAlignment
93
101
  end
94
102
  end
95
103
  end
96
- @block_alignments << mblocks[0]
104
+ @block_alignments << mblocks[0].merge(alignment: :block)
97
105
 
98
106
  (1 ... mblocks.length).each do |i|
99
107
  b1 = mblocks[i - 1][:source][:end]
@@ -114,7 +122,7 @@ class TextAlignment::TextAlignment
114
122
  end
115
123
  end
116
124
  end
117
- @block_alignments << mblocks[i]
125
+ @block_alignments << mblocks[i].merge(alignment: :block)
118
126
  end
119
127
 
120
128
  # Final step
@@ -156,7 +164,7 @@ class TextAlignment::TextAlignment
156
164
  i = @block_alignments.index{|b| b[:source][:end] > begin_position}
157
165
  block_alignment = @block_alignments[i]
158
166
 
159
- b = if block_alignment[:alignment].nil?
167
+ b = if block_alignment[:alignment] == :block
160
168
  begin_position + block_alignment[:delta]
161
169
  elsif block_alignment[:alignment] == :empty
162
170
  if begin_position == block_alignment[:source][:begin]
@@ -175,7 +183,7 @@ class TextAlignment::TextAlignment
175
183
  i = @block_alignments.index{|b| b[:source][:end] >= end_position}
176
184
  block_alignment = @block_alignments[i]
177
185
 
178
- e = if block_alignment[:alignment].nil?
186
+ e = if block_alignment[:alignment] == :block
179
187
  end_position + block_alignment[:delta]
180
188
  elsif block_alignment[:alignment] == :empty
181
189
  if end_position == block_alignment[:source][:end]
@@ -232,4 +240,176 @@ class TextAlignment::TextAlignment
232
240
  r
233
241
  end
234
242
 
243
+ def alignment_table
244
+ table = <<-TABLE
245
+ <table class='text_alignment_table'>
246
+ <thead>
247
+ <tr>
248
+ <th class='text_alignment_left' style='width:50%'>Text 1</th>
249
+ <th class='text_alignment_rigt'>Text 2</th>
250
+ </tr>
251
+ </thead>
252
+ <tbody>
253
+ TABLE
254
+
255
+ @block_alignments.each do |a|
256
+ table += alignment_table_th(a)
257
+ table += "<tr>\n" + case a[:alignment]
258
+ when :block
259
+ "<td colspan='2' class='text_alignment_common'>" +
260
+ @ostr1[a[:source][:begin] ... a[:source][:end]] +
261
+ "</td>\n"
262
+ when :empty
263
+ "<td class='text_alignment_left'>" + @ostr1[a[:source][:begin] ... a[:source][:end]] + "</td>\n" +
264
+ "<td class='text_alignment_right'>" + @ostr2[a[:target][:begin] ... a[:target][:end]] + "</td>\n"
265
+ else
266
+ base = a[:source][:begin]
267
+ astr1 = a[:alignment].sdiff.map do |c|
268
+ case c.action
269
+ when '='
270
+ @ostr1[c.old_position + base]
271
+ when '+'
272
+ '_'
273
+ when '-'
274
+ @ostr1[c.old_position + base]
275
+ when '!'
276
+ @ostr1[c.old_position + base] + '_'
277
+ end
278
+ end.join('')
279
+
280
+ base = a[:target][:begin]
281
+ astr2 = a[:alignment].sdiff.map do |c|
282
+ case c.action
283
+ when '='
284
+ @ostr2[c.new_position + base]
285
+ when '+'
286
+ @ostr2[c.new_position + base]
287
+ when '-'
288
+ '_'
289
+ when '!'
290
+ '_' + @ostr2[c.new_position + base]
291
+ end
292
+ end.join('')
293
+
294
+ "<td class='text_alignment_left'>" + astr1 + "</td>\n" +
295
+ "<td class='text_alignment_right'>" + astr2 + "</td>\n"
296
+ end + "</tr>\n"
297
+ end
298
+ table += '</tbody></table>'
299
+ end
300
+
301
+ def alignment_table_th(a)
302
+ "<tr>" +
303
+ "<th class='text_alignment_left'>#{a[:source][:begin]} - #{a[:source][:end]}</th>" +
304
+ "<th class='text_alignment_right'>#{a[:target][:begin]} - #{a[:target][:end]}</th>" +
305
+ "</tr>"
306
+ end
307
+
308
+ def alignment_show
309
+ show = ''
310
+ @block_alignments.each do |a|
311
+ show += case a[:alignment]
312
+ when :block
313
+ "===== common =====\n" +
314
+ @ostr1[a[:source][:begin] ... a[:source][:end]] + "\n\n"
315
+ when :empty
316
+ "<<<<< string 1\n" +
317
+ @ostr1[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
318
+ ">>>>> string 2\n" +
319
+ @ostr2[a[:target][:begin] ... a[:target][:end]] + "\n\n"
320
+ else
321
+ astr1 = ''
322
+ astr2 = ''
323
+
324
+ base = a[:source][:begin]
325
+ astr1 = a[:alignment].sdiff.map do |c|
326
+ case c.action
327
+ when '='
328
+ @ostr1[c.old_position + base]
329
+ when '+'
330
+ '_'
331
+ when '-'
332
+ @ostr1[c.old_position + base]
333
+ when '!'
334
+ @ostr1[c.old_position + base] + '_'
335
+ end
336
+ end.join('')
337
+
338
+ base = a[:target][:begin]
339
+ astr2 = a[:alignment].sdiff.map do |c|
340
+ case c.action
341
+ when '='
342
+ @ostr2[c.new_position + base]
343
+ when '+'
344
+ @ostr2[c.new_position + base]
345
+ when '-'
346
+ '_'
347
+ when '!'
348
+ '_' + @ostr2[c.new_position + base]
349
+ end
350
+ end.join('')
351
+
352
+ "***** local mismatch\n" +
353
+ "[#{astr1}]\n" +
354
+ "[#{astr2}]\n\n"
355
+ end
356
+ end
357
+ show
358
+ end
359
+
360
+ private
361
+
362
+ def string_preprocessing(_str1, _str2)
363
+ str1 = _str1.dup
364
+ str2 = _str2.dup
365
+ mappings = TextAlignment::MAPPINGS.dup
366
+
367
+ ## single character mappings
368
+ character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
369
+ characters_from = character_mappings.collect{|m| m[0]}.join
370
+ characters_to = character_mappings.collect{|m| m[1]}.join
371
+ characters_to.gsub!(/-/, '\-')
372
+
373
+ str1.tr!(characters_from, characters_to)
374
+ str2.tr!(characters_from, characters_to)
375
+
376
+ mappings.delete_if{|m| m[0].length == 1 && m[1].length == 1}
377
+
378
+ ## long to one character mappings
379
+ pletters = TextAlignment::PADDING_LETTERS
380
+
381
+ # find the padding letter for str1
382
+ padding_letter1 = begin
383
+ i = pletters.index{|l| str2.index(l).nil?}
384
+ raise RuntimeError, "Could not find a padding letter for str1" if i.nil?
385
+ TextAlignment::PADDING_LETTERS[i]
386
+ end
387
+
388
+ # find the padding letter for str2
389
+ padding_letter2 = begin
390
+ i = pletters.index{|l| l != padding_letter1 && str1.index(l).nil?}
391
+ raise RuntimeError, "Could not find a padding letter for str2" if i.nil?
392
+ TextAlignment::PADDING_LETTERS[i]
393
+ end
394
+
395
+ # ASCII foldings
396
+ ascii_foldings = mappings.select{|m| m[0].length == 1 && m[1].length > 1}
397
+ ascii_foldings.each do |f|
398
+ from = f[1]
399
+
400
+ if str2.index(f[0])
401
+ to = f[0] + (padding_letter1 * (f[1].length - 1))
402
+ str1.gsub!(from, to)
403
+ end
404
+
405
+ if str1.index(f[0])
406
+ to = f[0] + (padding_letter2 * (f[1].length - 1))
407
+ str2.gsub!(from, to)
408
+ end
409
+ end
410
+ mappings.delete_if{|m| m[0].length == 1 && m[1].length > 1}
411
+
412
+ [str1, str2, mappings]
413
+ end
414
+
235
415
  end
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.3.24'
2
+ VERSION = '0.5.1'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.24
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-29 00:00:00.000000000 Z
11
+ date: 2020-10-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary
@@ -77,6 +77,7 @@ files:
77
77
  - lib/text_alignment.rb
78
78
  - lib/text_alignment/anchor_finder.rb
79
79
  - lib/text_alignment/approximate_fit.rb
80
+ - lib/text_alignment/constants.rb
80
81
  - lib/text_alignment/find_divisions.rb
81
82
  - lib/text_alignment/glcs_alignment.rb
82
83
  - lib/text_alignment/glcs_alignment_fast.rb