text_alignment 0.2.9 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8c1c45ed630cdd60291606b59e1944f0b854a689cfa0d281ae8b8879bf01e806
4
- data.tar.gz: 9b33688f3a08f9110f556b4357fbe598d42d1147bc06933323cae4df7187341f
3
+ metadata.gz: b4a8ed8b4cff6f5b04b10c91704939936530ad0dc820a126a514a58cce7a4df6
4
+ data.tar.gz: '087412fa8b9779073c67fa1d9a0afc05e32d7f1baad4518c375fcf804a45ecd4'
5
5
  SHA512:
6
- metadata.gz: 856a8fca63f80be4cea7f6beff85dcf475d9237d68bc96728bcfcc030397f414637d2a8e32b139a6fdbccfd8603d327738ff7f3f59d80dc9e61e55a11a04bf20
7
- data.tar.gz: 6055f50827354461f194a50da74259e8d97473be9b085634df270964bb815d96d9f916bbad2ef3961ddd845291a80e13b1680f1c42bafeab156b0d59fa3ba952
6
+ metadata.gz: e5f56b58d35a614c6b9a72ccb8282b775d19c2fb576d68420153b96703d954e47471cfbcd9b384bd244f19110cc436d28e409d5014f1ade66c23390a928111fc
7
+ data.tar.gz: 4dbdd214b0e2aab9b32751305517160016cfd16b60f5380282b2b1ba2e6946e3097789f4fd234b577cf8ab00e06ea8d3497f59843d600eac2b12b8d60c32441c
@@ -1,51 +1,202 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'text_alignment'
3
-
4
3
  require 'json'
5
4
  require 'pp'
6
5
 
6
+ def read_annotations(filename)
7
+ case File.extname(filename)
8
+ when '.json'
9
+ JSON.parse File.read(filename), :symbolize_names => true
10
+ when '.txt'
11
+ {text: File.read(filename)}
12
+ else
13
+ raise "unknown file type: #{filename}"
14
+ end
15
+ end
16
+
17
+ def read_text(filename)
18
+ case File.extname(filename)
19
+ when '.json'
20
+ json = JSON.parse File.read(filename), :symbolize_names => true
21
+ json[:text]
22
+ when '.txt'
23
+ File.read(filename)
24
+ else
25
+ raise "unknown file type: #{filename}"
26
+ end
27
+ end
28
+
29
+ def align_mdoc(source_annotations, target_annotations)
30
+ idnum_denotations = 0
31
+ idnum_relations = 0
32
+ idnum_attributes = 0
33
+ idnum_modifications = 0
34
+
35
+ source_annotations.each do |annotations|
36
+ alignment = TextAlignment::TextAlignment.new(annotations[:text], target_annotations[:text])
37
+
38
+ # alignment.block_alignments.each do |a|
39
+ # p {source:a[:source], target:a[:target]}
40
+ # puts "--"
41
+ # p a[:alignment] if a[:alignment].nil? || a[:alignment] == :empty
42
+ # puts "--"
43
+ # puts annotations[:text][a[:source][:begin] ... a[:source][:end]]
44
+ # puts "--"
45
+ # puts target_text[a[:target][:begin] ... a[:target][:end]]
46
+ # puts "======"
47
+ # end
48
+
49
+ if annotations.has_key?(:denotations) && !annotations[:denotations].empty?
50
+ ididx = {}
51
+ denotations = alignment.transform_hdenotations(annotations[:denotations])
52
+ denotations.each do |d|
53
+ reid = 'T' + (idnum_denotations += 1).to_s
54
+ ididx[d[:id]] = reid
55
+ d[:id] = reid
56
+ end
57
+ target_annotations[:denotations] = [] unless target_annotations.has_key? :denotations
58
+ target_annotations[:denotations] += denotations
59
+
60
+ if annotations.has_key?(:relations) && !annotations[:relations].empty?
61
+ target_annotations[:relations] = [] unless target_annotations.has_key? :relations
62
+ annotations[:relations].each do |r|
63
+ reid = 'R' + (idnum_relations += 1).to_s
64
+ ididx[r[:id]] = reid
65
+ target_annotations[:relations] << r.dup.merge({id:reid, subj:ididx[r[:subj]], obj:ididx[r[:obj]]})
66
+ end
67
+ end
68
+
69
+ if annotations.has_key?(:attributes) && !annotations[:attributes].empty?
70
+ target_annotations[:attributes] = [] unless target_annotations.has_key? :attributes
71
+ annotations[:attributes].each do |a|
72
+ reid = 'A' + (idnum_attributes += 1).to_s
73
+ ididx[a[:id]] = reid
74
+ target_annotations[:attributes] << a.dup.merge({id:reid, subj:ididx[a[:subj]]})
75
+ end
76
+ end
77
+
78
+ if annotations.has_key?(:modifications) && !annotations[:modifications].empty?
79
+ target_annotations[:modifications] = [] unless target_annotations.has_key? :modifications
80
+ annotations[:modifications].each do |m|
81
+ reid = 'M' + (idnum_modifications += 1).to_s
82
+ ididx[m[:id]] = reid
83
+ target_annotations[:modifications] << m.dup.merge({id:reid, obj:ididx[m[:obj]]})
84
+ end
85
+ end
86
+ end
87
+ end
88
+ target_annotations
89
+ end
90
+
91
+
7
92
  unless ARGV.length == 2
8
- warn "align_annotations target_annotations(.json) reference_annotations(.json)"
93
+ warn "align_annotations target_annotations(.json|.txt) reference_annotations(.json|.txt)"
9
94
  exit
10
95
  end
11
96
 
12
- anns1 = JSON.parse File.read(ARGV[0].strip), :symbolize_names => true
13
- anns2 = JSON.parse File.read(ARGV[1].strip), :symbolize_names => true
14
-
15
- str1 = anns1[:text]
16
- str2 = anns2[:text]
17
-
18
- denotations = anns1[:denotations]
19
-
20
- puts "[Alignment1]====="
21
- align = TextAlignment::TextAlignment.new(str1, str2, TextAlignment::MAPPINGS)
22
- puts TextAlignment::sdiff2cdiff(align.sdiff)
23
- puts
24
- puts "[Similarity]\n#{align.similarity}"
25
- puts
26
- puts '[Denotations original]'
27
- pp denotations
28
- puts
29
- puts '[Denotations transformed]'
30
- new_denotations = align.transform_hdenotations(denotations)
31
- pp new_denotations
32
- puts
33
- puts "[Alignment2 (downcased)]====="
34
- align = TextAlignment::TextAlignment.new(str1.downcase, str2.downcase, TextAlignment::MAPPINGS)
35
- puts TextAlignment::sdiff2cdiff(align.sdiff)
36
- puts
37
- puts "[Similarity]\n#{align.similarity}"
38
- puts
39
- puts '[Denotations original]'
40
- pp denotations
41
- puts
42
- puts '[Denotations transformed]'
43
- new_denotations = align.transform_hdenotations(denotations)
44
- pp new_denotations
45
- puts
46
- puts '[Annotations transformed]'
47
- anns2[:denotations] = new_denotations
48
- puts anns2.to_json
97
+ source_annotations = read_annotations(ARGV[0])
98
+ target_text = read_text(ARGV[1])
99
+
100
+ lost_annotations = []
101
+ target_annotations = if source_annotations.class == Array
102
+ align_mdoc(source_annotations, {text: target_text})
103
+ else
104
+ alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
105
+
106
+ # alignment.block_alignments.each do |a|
107
+ # if a[:alignment].nil? || a[:alignment] == :empty
108
+ # # p [a[:source], a[:target]]
109
+ # # p a[:alignment]
110
+ # else
111
+ # p [a[:source], a[:target]]
112
+ # p a[:alignment].similarity
113
+ # puts "--"
114
+ # puts source_annotations[:text][a[:source][:begin] ... a[:source][:end]]
115
+ # puts "--"
116
+ # puts target_text[a[:target][:begin] ... a[:target][:end]]
117
+ # puts "======"
118
+ # end
119
+ # end
120
+ # exit
121
+
122
+ denotations = alignment.transform_hdenotations(source_annotations[:denotations])
123
+ lost_annotations += alignment.lost_annotations if alignment.lost_annotations
124
+
125
+ source_annotations.merge({text:target_text, denotations:denotations})
126
+ end
127
+
128
+ num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source = if source_annotations.class == Array
129
+ num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source = 0, 0, 0, 0
130
+ source_annotations.each do |annotations|
131
+ num_denotations_source += annotations[:denotations].nil? ? 0 : annotations[:denotations].length
132
+ num_relations_source += annotations[:relations].nil? ? 0 : annotations[:relations].length
133
+ num_attributes_source += annotations[:attributes].nil? ? 0 : annotations[:attributes].length
134
+ num_modifications_source += annotations[:modifications].nil? ? 0 : annotations[:modifications].length
135
+ end
136
+ [num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source]
137
+ else
138
+ num_denotations_source = source_annotations[:denotations].nil? ? 0 : source_annotations[:denotations].length
139
+ num_relations_source = source_annotations[:relations].nil? ? 0 : source_annotations[:relations].length
140
+ num_attributes_source = source_annotations[:attributes].nil? ? 0 : source_annotations[:attributes].length
141
+ num_modifications_source = source_annotations[:modifications].nil? ? 0 : source_annotations[:modifications].length
142
+ [num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source]
143
+ end
144
+
145
+ warn "[source]"
146
+ warn "denotations:\t#{num_denotations_source}"
147
+ # warn "relations:\t#{num_relations_source}"
148
+ # warn "attributes:\t#{num_attributes_source}"
149
+ # warn "modifications:\t#{num_modifications_source}"
150
+
151
+ warn "\n[target]"
152
+ warn "denotations:\t#{target_annotations[:denotations].nil? ? 0 : target_annotations[:denotations].length}"
153
+ # warn "relations:\t#{target_annotations[:relations].nil? ? 0 : target_annotations[:relations].length}"
154
+ # warn "attributes:\t#{target_annotations[:attributes].nil? ? 0 : target_annotations[:attributes].length}"
155
+ # warn "modifications:\t#{target_annotations[:modifications].nil? ? 0 : target_annotations[:modifications].length}"
156
+
157
+ if lost_annotations
158
+ warn "\n[lost annotations]"
159
+ warn "#{lost_annotations.length}"
160
+ end
161
+
162
+ puts target_annotations.to_json
163
+
164
+ # denotations = anns1[:denotations]
165
+
166
+ # puts "[Alignment1]====="
167
+ # align = TextAlignment::TextAlignment.new(str1, str2, TextAlignment::MAPPINGS)
168
+
169
+ # align.alignment.each do |a|
170
+ # p [a[:target][:begin], a[:target][:end], a[:source][:begin], a[:source][:end]]
171
+ # end
172
+
173
+ # puts TextAlignment::sdiff2cdiff(align.sdiff)
174
+ # puts
175
+ # puts "[Similarity]\n#{align.similarity}"
176
+ # puts
177
+ # puts '[Denotations original]'
178
+ # pp denotations
179
+ # puts
180
+ # puts '[Denotations transformed]'
181
+ # new_denotations = align.transform_hdenotations(denotations)
182
+ # pp new_denotations
183
+ # puts
184
+ # puts "[Alignment2 (downcased)]====="
185
+ # align = TextAlignment::TextAlignment.new(str1.downcase, str2.downcase, TextAlignment::MAPPINGS)
186
+ # puts TextAlignment::sdiff2cdiff(align.sdiff)
187
+ # puts
188
+ # puts "[Similarity]\n#{align.similarity}"
189
+ # puts
190
+ # puts '[Denotations original]'
191
+ # pp denotations
192
+ # puts
193
+ # puts '[Denotations transformed]'
194
+ # new_denotations = align.transform_hdenotations(denotations)
195
+ # pp new_denotations
196
+ # puts
197
+ # puts '[Annotations transformed]'
198
+ # anns2[:denotations] = new_denotations
199
+ # puts anns2.to_json
49
200
 
50
201
  # p align.common_elements
51
202
  # puts "---------------"
@@ -0,0 +1,149 @@
1
+ #!/usr/bin/env ruby
2
+ require 'string-similarity'
3
+
4
+ module TextAlignment; end unless defined? TextAlignment
5
+
6
+ TextAlignment::SIZE_NGRAM = 5 unless defined? TextAlignment::SIZE_NGRAM
7
+ TextAlignment::SIZE_WINDOW = 10 unless defined? TextAlignment::SIZE_WINDOW
8
+ TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.7 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
9
+
10
+ class TextAlignment::AnchorFinder
11
+
12
+ def initialize(source_str, target_str, _size_ngram = nil, _size_window = nil)
13
+ @size_ngram = _size_ngram || TextAlignment::SIZE_NGRAM
14
+ @size_window = _size_window || TextAlignment::SIZE_WINDOW
15
+
16
+ @reverse = (target_str.length < source_str.length)
17
+
18
+ @s1, @s2 = if @reverse
19
+ [target_str.downcase, source_str.downcase]
20
+ else
21
+ [source_str.downcase, target_str.downcase]
22
+ end
23
+
24
+ # current position in s1
25
+ @beg_s1 = 0
26
+ end
27
+
28
+ def get_next_anchor
29
+ # find the position of an anchor ngram in s1 and s2
30
+ @beg_s2 = nil
31
+ while @beg_s1 < (@s1.length - @size_ngram)
32
+ while @beg_s1 < (@s1.length - @size_ngram)
33
+ anchor = @s1[@beg_s1, @size_ngram]
34
+ @beg_s2 = if defined? @end_s2_prev
35
+ @s2.index(anchor, @end_s2_prev)
36
+ else
37
+ @s2.index(anchor)
38
+ end
39
+ break unless @beg_s2.nil?
40
+ @beg_s1 += 1
41
+ end
42
+
43
+ # The loop above is terminated with beg_s2 == nil, which means no more anchor
44
+ break if @beg_s2.nil?
45
+
46
+ # if both the begining points are sufficiantly close to the end points of the last match
47
+ break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 - @end_s2_prev < 5)
48
+
49
+ left_window_s1, left_window_s2 = get_left_windows
50
+ break if left_window_s1 && text_similarity(left_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD
51
+
52
+ right_window_s1, right_window_s2 = get_right_windows
53
+ break if right_window_s2 && text_similarity(right_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD
54
+
55
+ @beg_s1 += 1
56
+ end
57
+
58
+ return nil if @beg_s2.nil?
59
+
60
+ # extend the block
61
+ b1 = @beg_s1
62
+ b2 = @beg_s2
63
+ while b1 > -1 && b2 > -1 && @s1[b1] == @s2[b2]
64
+ b1 -= 1; b2 -= 1
65
+ end
66
+ b1 += 1; b2 += 1
67
+
68
+ e1 = @beg_s1 + @size_ngram
69
+ e2 = @beg_s2 + @size_ngram
70
+ while @s1[e1] == @s2[e2]
71
+ e1 += 1; e2 += 1
72
+ end
73
+
74
+ @end_s1_prev = e1
75
+ @end_s2_prev = e2
76
+ @beg_s1 = e1
77
+
78
+ if @reverse
79
+ {source:{begin:b2 , end:e2}, target:{begin:b1, end:e1}}
80
+ else
81
+ {source:{begin:b1 , end:e1}, target:{begin:b2, end:e2}}
82
+ end
83
+ end
84
+
85
+ private
86
+
87
+ def get_left_windows
88
+ return if @beg_s1 < @size_window || @beg_s2 < @size_window
89
+
90
+ window_s1 = ''
91
+ loc = @beg_s1 - 1
92
+ count = 0
93
+ while count < @size_window && loc >= 0
94
+ if @s1[loc] =~ /[0-9a-zA-Z]/
95
+ window_s1 += @s1[loc]
96
+ count += 1
97
+ end
98
+ loc -= 1
99
+ end
100
+
101
+ window_s2 = ''
102
+ loc = @beg_s2 - 1
103
+ count = 0
104
+ while count < @size_window && loc >= 0
105
+ if @s2[loc] =~ /[0-9a-zA-Z]/
106
+ window_s2 += @s2[loc]
107
+ count += 1
108
+ end
109
+ loc -= 1
110
+ end
111
+
112
+ [window_s1, window_s2]
113
+ end
114
+
115
+ def get_right_windows
116
+ return if (@beg_s1 + @size_ngram < (@s1.length - @size_window)) || (@beg_s2 + @size_ngram < (@s2.length - @size_window))
117
+
118
+ window_s1 = ''
119
+ loc = @beg_s1 + @size_ngram
120
+ len_s1 = @s1.length
121
+ count = 0
122
+ while count < @size_window && loc < len_s1
123
+ if @s1[loc] =~ /[0-9a-zA-Z]/
124
+ window_s1 += @s1[loc]
125
+ count += 1
126
+ end
127
+ loc += 1
128
+ end
129
+
130
+ window_s2 = ''
131
+ loc = @beg_s2 + @size_ngram
132
+ len_s2 = @s2.length
133
+ count = 0
134
+ while count < @size_window && loc < len_s2
135
+ if @s2[loc] =~ /[0-9a-zA-Z]/
136
+ window_s2 += @s2[loc]
137
+ count += 1
138
+ end
139
+ loc += 1
140
+ end
141
+
142
+ [window_s1, window_s2]
143
+ end
144
+
145
+ def text_similarity(str1, str2, ngram_order = 2)
146
+ String::Similarity.cosine(str1, str2, ngram:ngram_order)
147
+ end
148
+
149
+ end
@@ -4,75 +4,73 @@ require 'string-similarity'
4
4
  module TextAlignment; end unless defined? TextAlignment
5
5
 
6
6
  # approximate the location of str1 in str2
7
- module TextAlignment
8
- SIGNATURE_NGRAM = 5
9
- MIN_LENGTH_FOR_APPROXIMATION = 50
10
- BUFFER_RATE = 0.1
11
- TEXT_SIMILARITY_TRESHOLD = 0.7
12
- end
7
+ TextAlignment::SIGNATURE_NGRAM = 7 unless defined? TextAlignment::SIGNATURE_NGRAM
8
+ TextAlignment::MIN_LENGTH_FOR_APPROXIMATION = 50 unless defined? TextAlignment::MIN_LENGTH_FOR_APPROXIMATION
9
+ TextAlignment::BUFFER_RATE = 0.1 unless defined? TextAlignment::BUFFER_RATE
10
+ TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.7 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
13
11
 
14
12
  class << TextAlignment
15
13
 
16
- # If finds an approximate region of str2 that contains str1
17
- def approximate_fit(str1, str2)
18
- raise ArgumentError, 'nil string' if str1.nil? || str2.nil?
19
- return 0, str2.length if str2.length < TextAlignment::MIN_LENGTH_FOR_APPROXIMATION
14
+ # If finds an approximate region of str2 that contains str1
15
+ def approximate_fit(str1, str2)
16
+ raise ArgumentError, 'nil string' if str1.nil? || str2.nil?
17
+ return 0, str2.length if str2.length < TextAlignment::MIN_LENGTH_FOR_APPROXIMATION
20
18
 
21
- ngram1 = (0 .. str1.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str1[i, TextAlignment::SIGNATURE_NGRAM]}
22
- ngram2 = (0 .. str2.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str2[i, TextAlignment::SIGNATURE_NGRAM]}
23
- ngram_shared = ngram1 & ngram2
19
+ ngram1 = (0 .. str1.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str1[i, TextAlignment::SIGNATURE_NGRAM]}
20
+ ngram2 = (0 .. str2.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str2[i, TextAlignment::SIGNATURE_NGRAM]}
21
+ ngram_shared = ngram1 & ngram2
24
22
 
25
- # If there is no shared n-gram found, it may mean there is no serious overlap between the two strings
26
- return nil, nil if ngram_shared.empty?
23
+ # If there is no shared n-gram found, it may mean there is no serious overlap between the two strings
24
+ return nil, nil if ngram_shared.empty?
27
25
 
28
- signature_ngrams = ngram_shared.select{|g| ngram2.count(g) == 1}
29
- return nil, nil if signature_ngrams.empty? #raise "no signature ngram"
26
+ signature_ngrams = ngram_shared.select{|g| ngram2.count(g) == 1}
27
+ return nil, nil if signature_ngrams.empty? #raise "no signature ngram"
30
28
 
31
- cache = {}
32
- fit_begin, fit_end = nil, nil
33
- signature_ngrams.each do |signature_ngram|
34
- loc_signature_ngram_in_str1 = str1.index(signature_ngram)
35
- loc_signature_ngram_in_str2 = str2.index(signature_ngram)
29
+ cache = {}
30
+ fit_begin, fit_end = nil, nil
31
+ signature_ngrams.each do |signature_ngram|
32
+ loc_signature_ngram_in_str1 = str1.index(signature_ngram)
33
+ loc_signature_ngram_in_str2 = str2.index(signature_ngram)
36
34
 
37
- # approximate the beginning of the fit
38
- fit_begin = loc_signature_ngram_in_str2 - loc_signature_ngram_in_str1 - (loc_signature_ngram_in_str1 * TextAlignment::BUFFER_RATE).to_i
39
- fit_begin = 0 if fit_begin < 0
35
+ # approximate the beginning of the fit
36
+ fit_begin = loc_signature_ngram_in_str2 - loc_signature_ngram_in_str1 - (loc_signature_ngram_in_str1 * TextAlignment::BUFFER_RATE).to_i
37
+ fit_begin = 0 if fit_begin < 0
40
38
 
41
- # approximate the end of the fit
42
- offset_end = str1.length - loc_signature_ngram_in_str1
43
- fit_end = loc_signature_ngram_in_str2 + offset_end + (offset_end * TextAlignment::BUFFER_RATE).to_i
44
- fit_end = str2.length if fit_end > str2.length
39
+ # approximate the end of the fit
40
+ offset_end = str1.length - loc_signature_ngram_in_str1
41
+ fit_end = loc_signature_ngram_in_str2 + offset_end + (offset_end * TextAlignment::BUFFER_RATE).to_i
42
+ fit_end = str2.length if fit_end > str2.length
45
43
 
46
- next if cache.has_key?("#{fit_begin}-#{fit_end}")
47
- text_similarity = text_similarity(str1, str2[fit_begin ... fit_end])
48
- cache["#{fit_begin}-#{fit_end}"] = text_similarity
44
+ next if cache.has_key?("#{fit_begin}-#{fit_end}")
45
+ text_similarity = text_similarity(str1, str2[fit_begin ... fit_end])
46
+ cache["#{fit_begin}-#{fit_end}"] = text_similarity
49
47
 
50
- break if text_similarity > TextAlignment::TEXT_SIMILARITY_TRESHOLD
51
- fit_begin, fit_end = nil, nil
52
- end
53
- return fit_begin, fit_end if fit_begin && fit_end && fit_begin < fit_end
54
- return nil, nil
55
- end
48
+ break if text_similarity > TextAlignment::TEXT_SIMILARITY_TRESHOLD
49
+ fit_begin, fit_end = nil, nil
50
+ end
51
+ return fit_begin, fit_end if fit_begin && fit_end && fit_begin < fit_end
52
+ return nil, nil
53
+ end
56
54
 
57
- private
55
+ private
58
56
 
59
- def text_similarity(str1, str2, ngram_order = 3)
60
- _str1 = str1.delete(" \t\r\n")
61
- _str2 = str2.delete(" \t\r\n")
62
- String::Similarity.cosine(_str1, _str2, ngram:2)
63
- end
57
+ def text_similarity(str1, str2, ngram_order = 3)
58
+ _str1 = str1.delete(" \t\r\n")
59
+ _str2 = str2.delete(" \t\r\n")
60
+ String::Similarity.cosine(_str1, _str2, ngram:2)
61
+ end
64
62
 
65
63
  end
66
64
 
67
65
  if __FILE__ == $0
68
- require 'json'
66
+ require 'json'
69
67
 
70
- if ARGV.length == 2
71
- str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
72
- str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
68
+ if ARGV.length == 2
69
+ str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
70
+ str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
73
71
 
74
- loc = TextAlignment::approximate_fit(str1, str2)
75
- p loc
76
- puts str2[loc[0]...loc[1]]
77
- end
72
+ loc = TextAlignment::approximate_fit(str1, str2)
73
+ p loc
74
+ puts str2[loc[0]...loc[1]]
75
+ end
78
76
  end