text_alignment 0.2.8 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2dba8334b3fbdc77976ae32ed2e5844a716f954c850bf24c0937462c1cd2b220
4
- data.tar.gz: 806ac498264b81111ef1055dbb8592fa5ec7fd4755f0b2106c2851c45c6eb498
3
+ metadata.gz: 19a2dfcf8dfffa752dfc0c3363d2d3e1cb3ef7498f79023cdd16e38aa8c46afd
4
+ data.tar.gz: 94d925dfc71d24b05fd6861a4f7f7344428b68785db84eeae8f430563b4e3318
5
5
  SHA512:
6
- metadata.gz: 4732107de89daff9e8bbe89254e0d138db517396958e75b56f7f0697e3ff9d38e6b64082bbc84482bf12a91da7a03cfacdb3691729aaebd3eeac6aa836bf07c5
7
- data.tar.gz: e101fd3c1f5b8a5d9604f4998218a816d0eeecd5e7afbed23bf7504403d97de5e282f0dcad37abb7a46804f1f0749f6b996430bc47e93ae0b4947a916c25f40d
6
+ metadata.gz: 72e61cf30c98df2c3d5ac19717c813c936b55daad22cb8c6e8b44bdb45321dab98c69d5f90820e9993d86263b04bdad2e96e8010afc8a57eee916126b673c8cc
7
+ data.tar.gz: d92c04294d58845f4a88cb8d9e3db42e9a18e0dd02d0398e3a95bf94662f64a33752754dd637163ef9bc4af77dc602c12fe683d49e9ac0ebb61a2469e5e08216
@@ -1,51 +1,204 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'text_alignment'
3
-
4
3
  require 'json'
5
4
  require 'pp'
6
5
 
6
+ def read_annotations(filename)
7
+ case File.extname(filename)
8
+ when '.json'
9
+ JSON.parse File.read(filename), :symbolize_names => true
10
+ when '.txt'
11
+ {text: File.read(filename)}
12
+ else
13
+ raise "unknown file type: #{filename}"
14
+ end
15
+ end
16
+
17
+ def read_text(filename)
18
+ case File.extname(filename)
19
+ when '.json'
20
+ json = JSON.parse File.read(filename), :symbolize_names => true
21
+ json[:text]
22
+ when '.txt'
23
+ File.read(filename)
24
+ else
25
+ raise "unknown file type: #{filename}"
26
+ end
27
+ end
28
+
29
+ def align_mdoc(source_annotations, target_annotations)
30
+ idnum_denotations = 0
31
+ idnum_relations = 0
32
+ idnum_attributes = 0
33
+ idnum_modifications = 0
34
+
35
+ source_annotations.each do |annotations|
36
+ alignment = TextAlignment::TextAlignment.new(annotations[:text], target_annotations[:text])
37
+
38
+ # alignment.block_alignments.each do |a|
39
+ # p {source:a[:source], target:a[:target]}
40
+ # puts "--"
41
+ # p a[:alignment] if a[:alignment].nil? || a[:alignment] == :empty
42
+ # puts "--"
43
+ # puts annotations[:text][a[:source][:begin] ... a[:source][:end]]
44
+ # puts "--"
45
+ # puts target_text[a[:target][:begin] ... a[:target][:end]]
46
+ # puts "======"
47
+ # end
48
+
49
+ if annotations.has_key?(:denotations) && !annotations[:denotations].empty?
50
+ ididx = {}
51
+ denotations = alignment.transform_hdenotations(annotations[:denotations])
52
+ denotations.each do |d|
53
+ reid = 'T' + (idnum_denotations += 1).to_s
54
+ ididx[d[:id]] = reid
55
+ d[:id] = reid
56
+ end
57
+ target_annotations[:denotations] = [] unless target_annotations.has_key? :denotations
58
+ target_annotations[:denotations] += denotations
59
+
60
+ if annotations.has_key?(:relations) && !annotations[:relations].empty?
61
+ target_annotations[:relations] = [] unless target_annotations.has_key? :relations
62
+ annotations[:relations].each do |r|
63
+ reid = 'R' + (idnum_relations += 1).to_s
64
+ ididx[r[:id]] = reid
65
+ target_annotations[:relations] << r.dup.merge({id:reid, subj:ididx[r[:subj]], obj:ididx[r[:obj]]})
66
+ end
67
+ end
68
+
69
+ if annotations.has_key?(:attributes) && !annotations[:attributes].empty?
70
+ target_annotations[:attributes] = [] unless target_annotations.has_key? :attributes
71
+ annotations[:attributes].each do |a|
72
+ reid = 'A' + (idnum_attributes += 1).to_s
73
+ ididx[a[:id]] = reid
74
+ target_annotations[:attributes] << a.dup.merge({id:reid, subj:ididx[a[:subj]]})
75
+ end
76
+ end
77
+
78
+ if annotations.has_key?(:modifications) && !annotations[:modifications].empty?
79
+ target_annotations[:modifications] = [] unless target_annotations.has_key? :modifications
80
+ annotations[:modifications].each do |m|
81
+ reid = 'M' + (idnum_modifications += 1).to_s
82
+ ididx[m[:id]] = reid
83
+ target_annotations[:modifications] << m.dup.merge({id:reid, obj:ididx[m[:obj]]})
84
+ end
85
+ end
86
+ end
87
+ end
88
+ target_annotations
89
+ end
90
+
91
+
7
92
  unless ARGV.length == 2
8
- warn "align_annotations target_annotations(.json) reference_annotations(.json)"
93
+ warn "align_annotations target_annotations(.json|.txt) reference_annotations(.json|.txt)"
9
94
  exit
10
95
  end
11
96
 
12
- anns1 = JSON.parse File.read(ARGV[0].strip), :symbolize_names => true
13
- anns2 = JSON.parse File.read(ARGV[1].strip), :symbolize_names => true
14
-
15
- str1 = anns1[:text]
16
- str2 = anns2[:text]
17
-
18
- denotations = anns1[:denotations]
19
-
20
- puts "[Alignment1]====="
21
- align = TextAlignment::TextAlignment.new(str1, str2, TextAlignment::MAPPINGS)
22
- puts TextAlignment::sdiff2cdiff(align.sdiff)
23
- puts
24
- puts "[Similarity]\n#{align.similarity}"
25
- puts
26
- puts '[Denotations original]'
27
- pp denotations
28
- puts
29
- puts '[Denotations transformed]'
30
- new_denotations = align.transform_hdenotations(denotations)
31
- pp new_denotations
32
- puts
33
- puts "[Alignment2 (downcased)]====="
34
- align = TextAlignment::TextAlignment.new(str1.downcase, str2.downcase, TextAlignment::MAPPINGS)
35
- puts TextAlignment::sdiff2cdiff(align.sdiff)
36
- puts
37
- puts "[Similarity]\n#{align.similarity}"
38
- puts
39
- puts '[Denotations original]'
40
- pp denotations
41
- puts
42
- puts '[Denotations transformed]'
43
- new_denotations = align.transform_hdenotations(denotations)
44
- pp new_denotations
45
- puts
46
- puts '[Annotations transformed]'
47
- anns2[:denotations] = new_denotations
48
- puts anns2.to_json
97
+ source_annotations = read_annotations(ARGV[0])
98
+ target_text = read_text(ARGV[1])
99
+
100
+ lost_annotations = []
101
+ target_annotations = if source_annotations.class == Array
102
+ align_mdoc(source_annotations, {text: target_text})
103
+ else
104
+ alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
105
+
106
+ pp alignment
107
+
108
+ # alignment.block_alignments.each do |a|
109
+ # if a[:alignment].nil? || a[:alignment] == :empty
110
+ # # p [a[:source], a[:target]]
111
+ # # p a[:alignment]
112
+ # else
113
+ # p [a[:source], a[:target]]
114
+ # p a[:alignment].similarity
115
+ # puts "--"
116
+ # puts source_annotations[:text][a[:source][:begin] ... a[:source][:end]]
117
+ # puts "--"
118
+ # puts target_text[a[:target][:begin] ... a[:target][:end]]
119
+ # puts "======"
120
+ # end
121
+ # end
122
+ # exit
123
+
124
+ denotations = alignment.transform_hdenotations(source_annotations[:denotations])
125
+ lost_annotations += alignment.lost_annotations if alignment.lost_annotations
126
+
127
+ source_annotations.merge({text:target_text, denotations:denotations})
128
+ end
129
+
130
+ num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source = if source_annotations.class == Array
131
+ num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source = 0, 0, 0, 0
132
+ source_annotations.each do |annotations|
133
+ num_denotations_source += annotations[:denotations].nil? ? 0 : annotations[:denotations].length
134
+ num_relations_source += annotations[:relations].nil? ? 0 : annotations[:relations].length
135
+ num_attributes_source += annotations[:attributes].nil? ? 0 : annotations[:attributes].length
136
+ num_modifications_source += annotations[:modifications].nil? ? 0 : annotations[:modifications].length
137
+ end
138
+ [num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source]
139
+ else
140
+ num_denotations_source = source_annotations[:denotations].nil? ? 0 : source_annotations[:denotations].length
141
+ num_relations_source = source_annotations[:relations].nil? ? 0 : source_annotations[:relations].length
142
+ num_attributes_source = source_annotations[:attributes].nil? ? 0 : source_annotations[:attributes].length
143
+ num_modifications_source = source_annotations[:modifications].nil? ? 0 : source_annotations[:modifications].length
144
+ [num_denotations_source, num_relations_source, num_attributes_source, num_modifications_source]
145
+ end
146
+
147
+ warn "[source]"
148
+ warn "denotations:\t#{num_denotations_source}"
149
+ # warn "relations:\t#{num_relations_source}"
150
+ # warn "attributes:\t#{num_attributes_source}"
151
+ # warn "modifications:\t#{num_modifications_source}"
152
+
153
+ warn "\n[target]"
154
+ warn "denotations:\t#{target_annotations[:denotations].nil? ? 0 : target_annotations[:denotations].length}"
155
+ # warn "relations:\t#{target_annotations[:relations].nil? ? 0 : target_annotations[:relations].length}"
156
+ # warn "attributes:\t#{target_annotations[:attributes].nil? ? 0 : target_annotations[:attributes].length}"
157
+ # warn "modifications:\t#{target_annotations[:modifications].nil? ? 0 : target_annotations[:modifications].length}"
158
+
159
+ if lost_annotations
160
+ warn "\n[lost annotations]"
161
+ warn "#{lost_annotations.length}"
162
+ end
163
+
164
+ #puts target_annotations.to_json
165
+
166
+ # denotations = anns1[:denotations]
167
+
168
+ # puts "[Alignment1]====="
169
+ # align = TextAlignment::TextAlignment.new(str1, str2, TextAlignment::MAPPINGS)
170
+
171
+ # align.alignment.each do |a|
172
+ # p [a[:target][:begin], a[:target][:end], a[:source][:begin], a[:source][:end]]
173
+ # end
174
+
175
+ # puts TextAlignment::sdiff2cdiff(align.sdiff)
176
+ # puts
177
+ # puts "[Similarity]\n#{align.similarity}"
178
+ # puts
179
+ # puts '[Denotations original]'
180
+ # pp denotations
181
+ # puts
182
+ # puts '[Denotations transformed]'
183
+ # new_denotations = align.transform_hdenotations(denotations)
184
+ # pp new_denotations
185
+ # puts
186
+ # puts "[Alignment2 (downcased)]====="
187
+ # align = TextAlignment::TextAlignment.new(str1.downcase, str2.downcase, TextAlignment::MAPPINGS)
188
+ # puts TextAlignment::sdiff2cdiff(align.sdiff)
189
+ # puts
190
+ # puts "[Similarity]\n#{align.similarity}"
191
+ # puts
192
+ # puts '[Denotations original]'
193
+ # pp denotations
194
+ # puts
195
+ # puts '[Denotations transformed]'
196
+ # new_denotations = align.transform_hdenotations(denotations)
197
+ # pp new_denotations
198
+ # puts
199
+ # puts '[Annotations transformed]'
200
+ # anns2[:denotations] = new_denotations
201
+ # puts anns2.to_json
49
202
 
50
203
  # p align.common_elements
51
204
  # puts "---------------"
@@ -0,0 +1,146 @@
1
+ #!/usr/bin/env ruby
2
+ require 'string-similarity'
3
+
4
+ module TextAlignment; end unless defined? TextAlignment
5
+
6
+ TextAlignment::SIZE_NGRAM = 10 unless defined? TextAlignment::SIZE_NGRAM
7
+ TextAlignment::SIZE_WINDOW = 20 unless defined? TextAlignment::SIZE_WINDOW
8
+ TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.8 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
9
+
10
+ class TextAlignment::AnchorFinder
11
+
12
+ def initialize(source_str, target_str, _size_ngram = nil, _size_window = nil)
13
+ @size_ngram = _size_ngram || TextAlignment::SIZE_NGRAM
14
+ @size_window = _size_window || TextAlignment::SIZE_WINDOW
15
+
16
+ @reverse = (target_str.length < source_str.length)
17
+
18
+ @s1, @s2 = if @reverse
19
+ [target_str.downcase, source_str.downcase]
20
+ else
21
+ [source_str.downcase, target_str.downcase]
22
+ end
23
+
24
+ # current position in s1
25
+ @beg_s1 = 0
26
+ @end_s2_prev = 0
27
+ end
28
+
29
+ def get_next_anchor
30
+ # find the position of an anchor ngram in s1 and s2
31
+ while @beg_s1 < (@s1.length - @size_ngram)
32
+ anchor = @s1[@beg_s1, @size_ngram]
33
+
34
+ search_position = 0
35
+ # search_position = @end_s2_prev
36
+ while @beg_s2 = @s2.index(anchor, search_position)
37
+ # if both the begining points are sufficiantly close to the end points of the last match
38
+ break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
39
+
40
+ left_window_s1, left_window_s2 = get_left_windows
41
+ break if left_window_s1 && (text_similarity(left_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD)
42
+
43
+ right_window_s1, right_window_s2 = get_right_windows
44
+ break if right_window_s2 && (text_similarity(right_window_s1, right_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD)
45
+
46
+ search_position = @beg_s2 + 1
47
+ end
48
+
49
+ break unless @beg_s2.nil?
50
+
51
+ @beg_s1 += 1
52
+ end
53
+
54
+ return nil if @beg_s1 >= (@s1.length - @size_ngram)
55
+
56
+ # extend the block
57
+ b1 = @beg_s1
58
+ b2 = @beg_s2
59
+ while b1 > -1 && b2 > -1 && @s1[b1] == @s2[b2]
60
+ b1 -= 1; b2 -= 1
61
+ end
62
+ b1 += 1; b2 += 1
63
+
64
+ e1 = @beg_s1 + @size_ngram
65
+ e2 = @beg_s2 + @size_ngram
66
+ while @s1[e1] && @s1[e1] == @s2[e2]
67
+ e1 += 1; e2 += 1
68
+ end
69
+
70
+ @end_s1_prev = e1
71
+ @end_s2_prev = e2
72
+ @beg_s1 = e1
73
+
74
+ if @reverse
75
+ {source:{begin:b2 , end:e2}, target:{begin:b1, end:e1}}
76
+ else
77
+ {source:{begin:b1 , end:e1}, target:{begin:b2, end:e2}}
78
+ end
79
+ end
80
+
81
+ private
82
+
83
+ def get_left_windows
84
+ return if @beg_s1 < @size_window || @beg_s2 < @size_window
85
+
86
+ window_s1 = ''
87
+ loc = @beg_s1 - 1
88
+ count = 0
89
+ while count < @size_window && loc >= 0
90
+ if @s1[loc] =~ /[0-9a-zA-Z]/
91
+ window_s1 += @s1[loc]
92
+ count += 1
93
+ end
94
+ loc -= 1
95
+ end
96
+
97
+ window_s2 = ''
98
+ loc = @beg_s2 - 1
99
+ count = 0
100
+ while count < @size_window && loc >= 0
101
+ if @s2[loc] =~ /[0-9a-zA-Z]/
102
+ window_s2 += @s2[loc]
103
+ count += 1
104
+ end
105
+ loc -= 1
106
+ end
107
+
108
+ [window_s1, window_s2]
109
+ end
110
+
111
+ def get_right_windows
112
+ return if (@beg_s1 + @size_ngram > (@s1.length - @size_window)) || (@beg_s2 + @size_ngram > (@s2.length - @size_window))
113
+
114
+ window_s1 = ''
115
+ loc = @beg_s1 + @size_ngram
116
+ len_s1 = @s1.length
117
+ count = 0
118
+ while count < @size_window && loc < len_s1
119
+ if @s1[loc] =~ /[0-9a-zA-Z]/
120
+ window_s1 += @s1[loc]
121
+ count += 1
122
+ end
123
+ loc += 1
124
+ end
125
+
126
+ window_s2 = ''
127
+ loc = @beg_s2 + @size_ngram
128
+ len_s2 = @s2.length
129
+ count = 0
130
+ while count < @size_window && loc < len_s2
131
+ if @s2[loc] =~ /[0-9a-zA-Z]/
132
+ window_s2 += @s2[loc]
133
+ count += 1
134
+ end
135
+ loc += 1
136
+ end
137
+
138
+ [window_s1, window_s2]
139
+ end
140
+
141
+ def text_similarity(str1, str2, ngram_order = 2)
142
+ return 0 if str1.nil? || str2.nil?
143
+ String::Similarity.cosine(str1, str2, ngram:ngram_order)
144
+ end
145
+
146
+ end
@@ -4,72 +4,73 @@ require 'string-similarity'
4
4
  module TextAlignment; end unless defined? TextAlignment
5
5
 
6
6
  # approximate the location of str1 in str2
7
- module TextAlignment
8
- SIGNATURE_NGRAM = 5
9
- MIN_LENGTH_FOR_APPROXIMATION = 50
10
- BUFFER_RATE = 0.1
11
- TEXT_SIMILARITY_TRESHOLD = 0.8
12
- end
7
+ TextAlignment::SIGNATURE_NGRAM = 7 unless defined? TextAlignment::SIGNATURE_NGRAM
8
+ TextAlignment::MIN_LENGTH_FOR_APPROXIMATION = 50 unless defined? TextAlignment::MIN_LENGTH_FOR_APPROXIMATION
9
+ TextAlignment::BUFFER_RATE = 0.1 unless defined? TextAlignment::BUFFER_RATE
10
+ TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.7 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
13
11
 
14
12
  class << TextAlignment
15
13
 
16
- # If finds an approximate region of str2 that contains str1
17
- def approximate_fit(str1, str2)
18
- raise ArgumentError, 'nil string' if str1.nil? || str2.nil?
19
- return 0, str2.length if str2.length < TextAlignment::MIN_LENGTH_FOR_APPROXIMATION
14
+ # If finds an approximate region of str2 that contains str1
15
+ def approximate_fit(str1, str2)
16
+ raise ArgumentError, 'nil string' if str1.nil? || str2.nil?
17
+ return 0, str2.length if str2.length < TextAlignment::MIN_LENGTH_FOR_APPROXIMATION
20
18
 
21
- ngram1 = (0 .. str1.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str1[i, TextAlignment::SIGNATURE_NGRAM]}
22
- ngram2 = (0 .. str2.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str2[i, TextAlignment::SIGNATURE_NGRAM]}
23
- ngram_shared = ngram1 & ngram2
19
+ ngram1 = (0 .. str1.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str1[i, TextAlignment::SIGNATURE_NGRAM]}
20
+ ngram2 = (0 .. str2.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str2[i, TextAlignment::SIGNATURE_NGRAM]}
21
+ ngram_shared = ngram1 & ngram2
24
22
 
25
- # If there is no shared n-gram found, it may mean there is no serious overlap between the two strings
26
- return nil, nil if ngram_shared.empty?
23
+ # If there is no shared n-gram found, it may mean there is no serious overlap between the two strings
24
+ return nil, nil if ngram_shared.empty?
27
25
 
28
- signature_ngrams = ngram_shared.select{|g| ngram2.count(g) == 1}
29
- return nil, nil if signature_ngrams.empty? #raise "no signature ngram"
26
+ signature_ngrams = ngram_shared.select{|g| ngram2.count(g) == 1}
27
+ return nil, nil if signature_ngrams.empty? #raise "no signature ngram"
30
28
 
31
- fit_begin, fit_end = nil, nil
32
- signature_ngrams.each do |signature_ngram|
33
- loc_signature_ngram_in_str1 = str1.index(signature_ngram)
34
- loc_signature_ngram_in_str2 = str2.index(signature_ngram)
29
+ cache = {}
30
+ fit_begin, fit_end = nil, nil
31
+ signature_ngrams.each do |signature_ngram|
32
+ loc_signature_ngram_in_str1 = str1.index(signature_ngram)
33
+ loc_signature_ngram_in_str2 = str2.index(signature_ngram)
35
34
 
36
- # approximate the beginning of the fit
37
- fit_begin = loc_signature_ngram_in_str2 - loc_signature_ngram_in_str1 - (loc_signature_ngram_in_str1 * TextAlignment::BUFFER_RATE).to_i
38
- fit_begin = 0 if fit_begin < 0
35
+ # approximate the beginning of the fit
36
+ fit_begin = loc_signature_ngram_in_str2 - loc_signature_ngram_in_str1 - (loc_signature_ngram_in_str1 * TextAlignment::BUFFER_RATE).to_i
37
+ fit_begin = 0 if fit_begin < 0
39
38
 
40
- # approximate the end of the fit
41
- offset_end = str1.length - loc_signature_ngram_in_str1
42
- fit_end = loc_signature_ngram_in_str2 + offset_end + (offset_end * TextAlignment::BUFFER_RATE).to_i
43
- fit_end = str2.length if fit_end > str2.length
39
+ # approximate the end of the fit
40
+ offset_end = str1.length - loc_signature_ngram_in_str1
41
+ fit_end = loc_signature_ngram_in_str2 + offset_end + (offset_end * TextAlignment::BUFFER_RATE).to_i
42
+ fit_end = str2.length if fit_end > str2.length
44
43
 
45
- text_similarity = text_similarity(str1, str2[fit_begin ... fit_end])
46
- break if text_similarity > TextAlignment::TEXT_SIMILARITY_TRESHOLD
47
- fit_begin, fit_end = nil, nil
48
- end
44
+ next if cache.has_key?("#{fit_begin}-#{fit_end}")
45
+ text_similarity = text_similarity(str1, str2[fit_begin ... fit_end])
46
+ cache["#{fit_begin}-#{fit_end}"] = text_similarity
49
47
 
50
- return nil, nil if fit_begin >= fit_end
51
- return fit_begin, fit_end
52
- end
48
+ break if text_similarity > TextAlignment::TEXT_SIMILARITY_TRESHOLD
49
+ fit_begin, fit_end = nil, nil
50
+ end
51
+ return fit_begin, fit_end if fit_begin && fit_end && fit_begin < fit_end
52
+ return nil, nil
53
+ end
53
54
 
54
- private
55
+ private
55
56
 
56
- def text_similarity(str1, str2, ngram_order = 3)
57
- _str1 = str1.delete(" \t\r\n")
58
- _str2 = str2.delete(" \t\r\n")
59
- String::Similarity.cosine(_str1, _str2, ngram:2)
60
- end
57
+ def text_similarity(str1, str2, ngram_order = 3)
58
+ _str1 = str1.delete(" \t\r\n")
59
+ _str2 = str2.delete(" \t\r\n")
60
+ String::Similarity.cosine(_str1, _str2, ngram:2)
61
+ end
61
62
 
62
63
  end
63
64
 
64
65
  if __FILE__ == $0
65
- require 'json'
66
+ require 'json'
66
67
 
67
- if ARGV.length == 2
68
- str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
69
- str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
68
+ if ARGV.length == 2
69
+ str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
70
+ str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
70
71
 
71
- loc = TextAlignment::approximate_fit(str1, str2)
72
- p loc
73
- puts str2[loc[0]...loc[1]]
74
- end
72
+ loc = TextAlignment::approximate_fit(str1, str2)
73
+ p loc
74
+ puts str2[loc[0]...loc[1]]
75
+ end
75
76
  end