text_alignment 0.2.9 → 0.3.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8c1c45ed630cdd60291606b59e1944f0b854a689cfa0d281ae8b8879bf01e806
4
- data.tar.gz: 9b33688f3a08f9110f556b4357fbe598d42d1147bc06933323cae4df7187341f
3
+ metadata.gz: b4a8ed8b4cff6f5b04b10c91704939936530ad0dc820a126a514a58cce7a4df6
4
+ data.tar.gz: '087412fa8b9779073c67fa1d9a0afc05e32d7f1baad4518c375fcf804a45ecd4'
5
5
  SHA512:
6
- metadata.gz: 856a8fca63f80be4cea7f6beff85dcf475d9237d68bc96728bcfcc030397f414637d2a8e32b139a6fdbccfd8603d327738ff7f3f59d80dc9e61e55a11a04bf20
7
- data.tar.gz: 6055f50827354461f194a50da74259e8d97473be9b085634df270964bb815d96d9f916bbad2ef3961ddd845291a80e13b1680f1c42bafeab156b0d59fa3ba952
6
+ metadata.gz: e5f56b58d35a614c6b9a72ccb8282b775d19c2fb576d68420153b96703d954e47471cfbcd9b384bd244f19110cc436d28e409d5014f1ade66c23390a928111fc
7
+ data.tar.gz: 4dbdd214b0e2aab9b32751305517160016cfd16b60f5380282b2b1ba2e6946e3097789f4fd234b577cf8ab00e06ea8d3497f59843d600eac2b12b8d60c32441c
@@ -1,51 +1,202 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'text_alignment'
3
-
4
3
  require 'json'
5
4
  require 'pp'
6
5
 
6
+ def read_annotations(filename)
7
+ case File.extname(filename)
8
+ when '.json'
9
+ JSON.parse File.read(filename), :symbolize_names => true
10
+ when '.txt'
11
+ {text: File.read(filename)}
12
+ else
13
+ raise "unknown file type: #{filename}"
14
+ end
15
+ end
16
+
17
+ def read_text(filename)
18
+ case File.extname(filename)
19
+ when '.json'
20
+ json = JSON.parse File.read(filename), :symbolize_names => true
21
+ json[:text]
22
+ when '.txt'
23
+ File.read(filename)
24
+ else
25
+ raise "unknown file type: #{filename}"
26
+ end
27
+ end
28
+
29
+ def align_mdoc(source_annotations, target_annotations)
30
+ idnum_denotations = 0
31
+ idnum_relations = 0
32
+ idnum_attributes = 0
33
+ idnum_modifications = 0
34
+
35
+ source_annotations.each do |annotations|
36
+ alignment = TextAlignment::TextAlignment.new(annotations[:text], target_annotations[:text])
37
+
38
+ # alignment.block_alignments.each do |a|
39
+ # p {source:a[:source], target:a[:target]}
40
+ # puts "--"
41
+ # p a[:alignment] if a[:alignment].nil? || a[:alignment] == :empty
42
+ # puts "--"
43
+ # puts annotations[:text][a[:source][:begin] ... a[:source][:end]]
44
+ # puts "--"
45
+ # puts target_text[a[:target][:begin] ... a[:target][:end]]
46
+ # puts "======"
47
+ # end
48
+
49
+ if annotations.has_key?(:denotations) && !annotations[:denotations].empty?
50
+ ididx = {}
51
+ denotations = alignment.transform_hdenotations(annotations[:denotations])
52
+ denotations.each do |d|
53
+ reid = 'T' + (idnum_denotations += 1).to_s
54
+ ididx[d[:id]] = reid
55
+ d[:id] = reid
56
+ end
57
+ target_annotations[:denotations] = [] unless target_annotations.has_key? :denotations
58
+ target_annotations[:denotations] += denotations
59
+
60
+ if annotations.has_key?(:relations) && !annotations[:relations].empty?
61
+ target_annotations[:relations] = [] unless target_annotations.has_key? :relations
62
+ annotations[:relations].each do |r|
63
+ reid = 'R' + (idnum_relations += 1).to_s
64
+ ididx[r[:id]] = reid
65
+ target_annotations[:relations] << r.dup.merge({id:reid, subj:ididx[r[:subj]], obj:ididx[r[:obj]]})
66
+ end
67
+ end
68
+
69
+ if annotations.has_key?(:attributes) && !annotations[:attributes].empty?
70
+ target_annotations[:attributes] = [] unless target_annotations.has_key? :attributes
71
+ annotations[:attributes].each do |a|
72
+ reid = 'A' + (idnum_attributes += 1).to_s
73
+ ididx[a[:id]] = reid
74
+ target_annotations[:attributes] << a.dup.merge({id:reid, subj:ididx[a[:subj]]})
75
+ end
76
+ end
77
+
78
+ if annotations.has_key?(:modifications) && !annotations[:modifications].empty?
79
+ target_annotations[:modifications] = [] unless target_annotations.has_key? :modifications
80
+ annotations[:modifications].each do |m|
81
+ reid = 'M' + (idnum_modifications += 1).to_s
82
+ ididx[m[:id]] = reid
83
+ target_annotations[:modifications] << m.dup.merge({id:reid, obj:ididx[m[:obj]]})
84
+ end
85
+ end
86
+ end
87
+ end
88
+ target_annotations
89
+ end
90
+
91
+
7
92
  unless ARGV.length == 2
8
- warn "align_annotations target_annotations(.json) reference_annotations(.json)"
93
+ warn "align_annotations target_annotations(.json|.txt) reference_annotations(.json|.txt)"
9
94
  exit
10
95
  end
11
96
 
12
- anns1 = JSON.parse File.read(ARGV[0].strip), :symbolize_names => true
13
- anns2 = JSON.parse File.read(ARGV[1].strip), :symbolize_names => true
14
-
15
- str1 = anns1[:text]
16
- str2 = anns2[:text]
17
-
18
- denotations = anns1[:denotations]
19
-
20
- puts "[Alignment1]====="
21
- align = TextAlignment::TextAlignment.new(str1, str2, TextAlignment::MAPPINGS)
22
- puts TextAlignment::sdiff2cdiff(align.sdiff)
23
- puts
24
- puts "[Similarity]\n#{align.similarity}"
25
- puts
26
- puts '[Denotations original]'
27
- pp denotations
28
- puts
29
- puts '[Denotations transformed]'
30
- new_denotations = align.transform_hdenotations(denotations)
31
- pp new_denotations
32
- puts
33
- puts "[Alignment2 (downcased)]====="
34
- align = TextAlignment::TextAlignment.new(str1.downcase, str2.downcase, TextAlignment::MAPPINGS)
35
- puts TextAlignment::sdiff2cdiff(align.sdiff)
36
- puts
37
- puts "[Similarity]\n#{align.similarity}"
38
- puts
39
- puts '[Denotations original]'
40
- pp denotations
41
- puts
42
- puts '[Denotations transformed]'
43
- new_denotations = align.transform_hdenotations(denotations)
44
- pp new_denotations
45
- puts
46
- puts '[Annotations transformed]'
47
- anns2[:denotations] = new_denotations
48
- puts anns2.to_json
97
+ source_annotations = read_annotations(ARGV[0])
98
+ target_text = read_text(ARGV[1])
99
+
100
+ lost_annotations = []
101
+ target_annotations = if source_annotations.class == Array
102
+ align_mdoc(source_annotations, {text: target_text})
103
+ else
104
+ alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
105
+
106
+ # alignment.block_alignments.each do |a|
107
+ # if a[:alignment].nil? || a[:alignment] == :empty
108
+ # # p [a[:source], a[:target]]
109
+ # # p a[:alignment]
110
+ # else
111
+ # p [a[:source], a[:target]]
112
+ # p a[:alignment].similarity
113
+ # puts "--"
114
+ # puts source_annotations[:text][a[:source][:begin] ... a[:source][:end]]
115
+ # puts "--"
116
+ # puts target_text[a[:target][:begin] ... a[:target][:end]]
117
+ # puts "======"
118
+ # end
119
+ # end
120
+ # exit
121
+
122
+ denotations = alignment.transform_hdenotations(source_annotations[:denotations])
123
+ lost_annotations += alignment.lost_annotations if alignment.lost_annotations
124
+
125
+ source_annotations.merge({text:target_text, denotations:denotations})
126
+ end
127
+
128
+ num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source = if source_annotations.class == Array
129
+ num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source = 0, 0, 0, 0
130
+ source_annotations.each do |annotations|
131
+ num_denotations_source += annotations[:denotations].nil? ? 0 : annotations[:denotations].length
132
+ num_relations_source += annotations[:relations].nil? ? 0 : annotations[:relations].length
133
+ num_attributes_source += annotations[:attributes].nil? ? 0 : annotations[:attributes].length
134
+ num_modifications_source += annotations[:modifications].nil? ? 0 : annotations[:modifications].length
135
+ end
136
+ [num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source]
137
+ else
138
+ num_denotations_source = source_annotations[:denotations].nil? ? 0 : source_annotations[:denotations].length
139
+ num_relations_source = source_annotations[:relations].nil? ? 0 : source_annotations[:relations].length
140
+ num_attributes_source = source_annotations[:attributes].nil? ? 0 : source_annotations[:attributes].length
141
+ num_modifications_source = source_annotations[:modifications].nil? ? 0 : source_annotations[:modifications].length
142
+ [num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source]
143
+ end
144
+
145
+ warn "[source]"
146
+ warn "denotations:\t#{num_denotations_source}"
147
+ # warn "relations:\t#{num_relations_source}"
148
+ # warn "attributes:\t#{num_attributes_source}"
149
+ # warn "modifications:\t#{num_modifications_source}"
150
+
151
+ warn "\n[target]"
152
+ warn "denotations:\t#{target_annotations[:denotations].nil? ? 0 : target_annotations[:denotations].length}"
153
+ # warn "relations:\t#{target_annotations[:relations].nil? ? 0 : target_annotations[:relations].length}"
154
+ # warn "attributes:\t#{target_annotations[:attributes].nil? ? 0 : target_annotations[:attributes].length}"
155
+ # warn "modifications:\t#{target_annotations[:modifications].nil? ? 0 : target_annotations[:modifications].length}"
156
+
157
+ if lost_annotations
158
+ warn "\n[lost annotations]"
159
+ warn "#{lost_annotations.length}"
160
+ end
161
+
162
+ puts target_annotations.to_json
163
+
164
+ # denotations = anns1[:denotations]
165
+
166
+ # puts "[Alignment1]====="
167
+ # align = TextAlignment::TextAlignment.new(str1, str2, TextAlignment::MAPPINGS)
168
+
169
+ # align.alignment.each do |a|
170
+ # p [a[:target][:begin], a[:target][:end], a[:source][:begin], a[:source][:end]]
171
+ # end
172
+
173
+ # puts TextAlignment::sdiff2cdiff(align.sdiff)
174
+ # puts
175
+ # puts "[Similarity]\n#{align.similarity}"
176
+ # puts
177
+ # puts '[Denotations original]'
178
+ # pp denotations
179
+ # puts
180
+ # puts '[Denotations transformed]'
181
+ # new_denotations = align.transform_hdenotations(denotations)
182
+ # pp new_denotations
183
+ # puts
184
+ # puts "[Alignment2 (downcased)]====="
185
+ # align = TextAlignment::TextAlignment.new(str1.downcase, str2.downcase, TextAlignment::MAPPINGS)
186
+ # puts TextAlignment::sdiff2cdiff(align.sdiff)
187
+ # puts
188
+ # puts "[Similarity]\n#{align.similarity}"
189
+ # puts
190
+ # puts '[Denotations original]'
191
+ # pp denotations
192
+ # puts
193
+ # puts '[Denotations transformed]'
194
+ # new_denotations = align.transform_hdenotations(denotations)
195
+ # pp new_denotations
196
+ # puts
197
+ # puts '[Annotations transformed]'
198
+ # anns2[:denotations] = new_denotations
199
+ # puts anns2.to_json
49
200
 
50
201
  # p align.common_elements
51
202
  # puts "---------------"
@@ -0,0 +1,149 @@
1
+ #!/usr/bin/env ruby
2
+ require 'string-similarity'
3
+
4
+ module TextAlignment; end unless defined? TextAlignment
5
+
6
+ TextAlignment::SIZE_NGRAM = 5 unless defined? TextAlignment::SIZE_NGRAM
7
+ TextAlignment::SIZE_WINDOW = 10 unless defined? TextAlignment::SIZE_WINDOW
8
+ TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.7 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
9
+
10
+ class TextAlignment::AnchorFinder
11
+
12
+ def initialize(source_str, target_str, _size_ngram = nil, _size_window = nil)
13
+ @size_ngram = _size_ngram || TextAlignment::SIZE_NGRAM
14
+ @size_window = _size_window || TextAlignment::SIZE_WINDOW
15
+
16
+ @reverse = (target_str.length < source_str.length)
17
+
18
+ @s1, @s2 = if @reverse
19
+ [target_str.downcase, source_str.downcase]
20
+ else
21
+ [source_str.downcase, target_str.downcase]
22
+ end
23
+
24
+ # current position in s1
25
+ @beg_s1 = 0
26
+ end
27
+
28
+ def get_next_anchor
29
+ # find the position of an anchor ngram in s1 and s2
30
+ @beg_s2 = nil
31
+ while @beg_s1 < (@s1.length - @size_ngram)
32
+ while @beg_s1 < (@s1.length - @size_ngram)
33
+ anchor = @s1[@beg_s1, @size_ngram]
34
+ @beg_s2 = if defined? @end_s2_prev
35
+ @s2.index(anchor, @end_s2_prev)
36
+ else
37
+ @s2.index(anchor)
38
+ end
39
+ break unless @beg_s2.nil?
40
+ @beg_s1 += 1
41
+ end
42
+
43
+ # The loop above is terminated with beg_s2 == nil, which means no more anchor
44
+ break if @beg_s2.nil?
45
+
46
+ # if both the begining points are sufficiantly close to the end points of the last match
47
+ break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 - @end_s2_prev < 5)
48
+
49
+ left_window_s1, left_window_s2 = get_left_windows
50
+ break if left_window_s1 && text_similarity(left_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD
51
+
52
+ right_window_s1, right_window_s2 = get_right_windows
53
+ break if right_window_s2 && text_similarity(right_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD
54
+
55
+ @beg_s1 += 1
56
+ end
57
+
58
+ return nil if @beg_s2.nil?
59
+
60
+ # extend the block
61
+ b1 = @beg_s1
62
+ b2 = @beg_s2
63
+ while b1 > -1 && b2 > -1 && @s1[b1] == @s2[b2]
64
+ b1 -= 1; b2 -= 1
65
+ end
66
+ b1 += 1; b2 += 1
67
+
68
+ e1 = @beg_s1 + @size_ngram
69
+ e2 = @beg_s2 + @size_ngram
70
+ while @s1[e1] == @s2[e2]
71
+ e1 += 1; e2 += 1
72
+ end
73
+
74
+ @end_s1_prev = e1
75
+ @end_s2_prev = e2
76
+ @beg_s1 = e1
77
+
78
+ if @reverse
79
+ {source:{begin:b2 , end:e2}, target:{begin:b1, end:e1}}
80
+ else
81
+ {source:{begin:b1 , end:e1}, target:{begin:b2, end:e2}}
82
+ end
83
+ end
84
+
85
+ private
86
+
87
+ def get_left_windows
88
+ return if @beg_s1 < @size_window || @beg_s2 < @size_window
89
+
90
+ window_s1 = ''
91
+ loc = @beg_s1 - 1
92
+ count = 0
93
+ while count < @size_window && loc >= 0
94
+ if @s1[loc] =~ /[0-9a-zA-Z]/
95
+ window_s1 += @s1[loc]
96
+ count += 1
97
+ end
98
+ loc -= 1
99
+ end
100
+
101
+ window_s2 = ''
102
+ loc = @beg_s2 - 1
103
+ count = 0
104
+ while count < @size_window && loc >= 0
105
+ if @s2[loc] =~ /[0-9a-zA-Z]/
106
+ window_s2 += @s2[loc]
107
+ count += 1
108
+ end
109
+ loc -= 1
110
+ end
111
+
112
+ [window_s1, window_s2]
113
+ end
114
+
115
+ def get_right_windows
116
+ return if (@beg_s1 + @size_ngram < (@s1.length - @size_window)) || (@beg_s2 + @size_ngram < (@s2.length - @size_window))
117
+
118
+ window_s1 = ''
119
+ loc = @beg_s1 + @size_ngram
120
+ len_s1 = @s1.length
121
+ count = 0
122
+ while count < @size_window && loc < len_s1
123
+ if @s1[loc] =~ /[0-9a-zA-Z]/
124
+ window_s1 += @s1[loc]
125
+ count += 1
126
+ end
127
+ loc += 1
128
+ end
129
+
130
+ window_s2 = ''
131
+ loc = @beg_s2 + @size_ngram
132
+ len_s2 = @s2.length
133
+ count = 0
134
+ while count < @size_window && loc < len_s2
135
+ if @s2[loc] =~ /[0-9a-zA-Z]/
136
+ window_s2 += @s2[loc]
137
+ count += 1
138
+ end
139
+ loc += 1
140
+ end
141
+
142
+ [window_s1, window_s2]
143
+ end
144
+
145
+ def text_similarity(str1, str2, ngram_order = 2)
146
+ String::Similarity.cosine(str1, str2, ngram:ngram_order)
147
+ end
148
+
149
+ end
@@ -4,75 +4,73 @@ require 'string-similarity'
4
4
  module TextAlignment; end unless defined? TextAlignment
5
5
 
6
6
  # approximate the location of str1 in str2
7
- module TextAlignment
8
- SIGNATURE_NGRAM = 5
9
- MIN_LENGTH_FOR_APPROXIMATION = 50
10
- BUFFER_RATE = 0.1
11
- TEXT_SIMILARITY_TRESHOLD = 0.7
12
- end
7
+ TextAlignment::SIGNATURE_NGRAM = 7 unless defined? TextAlignment::SIGNATURE_NGRAM
8
+ TextAlignment::MIN_LENGTH_FOR_APPROXIMATION = 50 unless defined? TextAlignment::MIN_LENGTH_FOR_APPROXIMATION
9
+ TextAlignment::BUFFER_RATE = 0.1 unless defined? TextAlignment::BUFFER_RATE
10
+ TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.7 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
13
11
 
14
12
  class << TextAlignment
15
13
 
16
- # If finds an approximate region of str2 that contains str1
17
- def approximate_fit(str1, str2)
18
- raise ArgumentError, 'nil string' if str1.nil? || str2.nil?
19
- return 0, str2.length if str2.length < TextAlignment::MIN_LENGTH_FOR_APPROXIMATION
14
+ # If finds an approximate region of str2 that contains str1
15
+ def approximate_fit(str1, str2)
16
+ raise ArgumentError, 'nil string' if str1.nil? || str2.nil?
17
+ return 0, str2.length if str2.length < TextAlignment::MIN_LENGTH_FOR_APPROXIMATION
20
18
 
21
- ngram1 = (0 .. str1.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str1[i, TextAlignment::SIGNATURE_NGRAM]}
22
- ngram2 = (0 .. str2.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str2[i, TextAlignment::SIGNATURE_NGRAM]}
23
- ngram_shared = ngram1 & ngram2
19
+ ngram1 = (0 .. str1.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str1[i, TextAlignment::SIGNATURE_NGRAM]}
20
+ ngram2 = (0 .. str2.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str2[i, TextAlignment::SIGNATURE_NGRAM]}
21
+ ngram_shared = ngram1 & ngram2
24
22
 
25
- # If there is no shared n-gram found, it may mean there is no serious overlap between the two strings
26
- return nil, nil if ngram_shared.empty?
23
+ # If there is no shared n-gram found, it may mean there is no serious overlap between the two strings
24
+ return nil, nil if ngram_shared.empty?
27
25
 
28
- signature_ngrams = ngram_shared.select{|g| ngram2.count(g) == 1}
29
- return nil, nil if signature_ngrams.empty? #raise "no signature ngram"
26
+ signature_ngrams = ngram_shared.select{|g| ngram2.count(g) == 1}
27
+ return nil, nil if signature_ngrams.empty? #raise "no signature ngram"
30
28
 
31
- cache = {}
32
- fit_begin, fit_end = nil, nil
33
- signature_ngrams.each do |signature_ngram|
34
- loc_signature_ngram_in_str1 = str1.index(signature_ngram)
35
- loc_signature_ngram_in_str2 = str2.index(signature_ngram)
29
+ cache = {}
30
+ fit_begin, fit_end = nil, nil
31
+ signature_ngrams.each do |signature_ngram|
32
+ loc_signature_ngram_in_str1 = str1.index(signature_ngram)
33
+ loc_signature_ngram_in_str2 = str2.index(signature_ngram)
36
34
 
37
- # approximate the beginning of the fit
38
- fit_begin = loc_signature_ngram_in_str2 - loc_signature_ngram_in_str1 - (loc_signature_ngram_in_str1 * TextAlignment::BUFFER_RATE).to_i
39
- fit_begin = 0 if fit_begin < 0
35
+ # approximate the beginning of the fit
36
+ fit_begin = loc_signature_ngram_in_str2 - loc_signature_ngram_in_str1 - (loc_signature_ngram_in_str1 * TextAlignment::BUFFER_RATE).to_i
37
+ fit_begin = 0 if fit_begin < 0
40
38
 
41
- # approximate the end of the fit
42
- offset_end = str1.length - loc_signature_ngram_in_str1
43
- fit_end = loc_signature_ngram_in_str2 + offset_end + (offset_end * TextAlignment::BUFFER_RATE).to_i
44
- fit_end = str2.length if fit_end > str2.length
39
+ # approximate the end of the fit
40
+ offset_end = str1.length - loc_signature_ngram_in_str1
41
+ fit_end = loc_signature_ngram_in_str2 + offset_end + (offset_end * TextAlignment::BUFFER_RATE).to_i
42
+ fit_end = str2.length if fit_end > str2.length
45
43
 
46
- next if cache.has_key?("#{fit_begin}-#{fit_end}")
47
- text_similarity = text_similarity(str1, str2[fit_begin ... fit_end])
48
- cache["#{fit_begin}-#{fit_end}"] = text_similarity
44
+ next if cache.has_key?("#{fit_begin}-#{fit_end}")
45
+ text_similarity = text_similarity(str1, str2[fit_begin ... fit_end])
46
+ cache["#{fit_begin}-#{fit_end}"] = text_similarity
49
47
 
50
- break if text_similarity > TextAlignment::TEXT_SIMILARITY_TRESHOLD
51
- fit_begin, fit_end = nil, nil
52
- end
53
- return fit_begin, fit_end if fit_begin && fit_end && fit_begin < fit_end
54
- return nil, nil
55
- end
48
+ break if text_similarity > TextAlignment::TEXT_SIMILARITY_TRESHOLD
49
+ fit_begin, fit_end = nil, nil
50
+ end
51
+ return fit_begin, fit_end if fit_begin && fit_end && fit_begin < fit_end
52
+ return nil, nil
53
+ end
56
54
 
57
- private
55
+ private
58
56
 
59
- def text_similarity(str1, str2, ngram_order = 3)
60
- _str1 = str1.delete(" \t\r\n")
61
- _str2 = str2.delete(" \t\r\n")
62
- String::Similarity.cosine(_str1, _str2, ngram:2)
63
- end
57
+ def text_similarity(str1, str2, ngram_order = 3)
58
+ _str1 = str1.delete(" \t\r\n")
59
+ _str2 = str2.delete(" \t\r\n")
60
+ String::Similarity.cosine(_str1, _str2, ngram:2)
61
+ end
64
62
 
65
63
  end
66
64
 
67
65
  if __FILE__ == $0
68
- require 'json'
66
+ require 'json'
69
67
 
70
- if ARGV.length == 2
71
- str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
72
- str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
68
+ if ARGV.length == 2
69
+ str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
70
+ str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
73
71
 
74
- loc = TextAlignment::approximate_fit(str1, str2)
75
- p loc
76
- puts str2[loc[0]...loc[1]]
77
- end
72
+ loc = TextAlignment::approximate_fit(str1, str2)
73
+ p loc
74
+ puts str2[loc[0]...loc[1]]
75
+ end
78
76
  end