string_diff 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/string_diff.rb +113 -6
- data/lib/string_diff/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b8695203741c40b206db357d9df4c02f2bc5787
|
4
|
+
data.tar.gz: c8c5ef569e59e6aa86862e860e2fc3bd29cc0c2d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 27e8f9967dcd916a1b8296274a805965266502c2aff01b113fd4aa649f9ee7f1cf71d6d21d4856ffae8c3b64e69d45b286be3428f7f74e2c527e38ac6f3c5995
|
7
|
+
data.tar.gz: 3f650464f981076bbea98a11e799df2613db1fff523ecd79a31dcbd89644fda1d64c7e645626eb05b885a1b1a00168f46db4534f328a6756240da1fbb82f2039
|
data/README.md
CHANGED
@@ -44,6 +44,9 @@ StringDiff::Diff.new(string_1, string_2).diff
|
|
44
44
|
# => "hello <span class='deletion'>world</span>"
|
45
45
|
```
|
46
46
|
|
47
|
+
## Known Bugs
|
48
|
+
|
49
|
+
The gem at this point in time does not handle words that have simply changed position in the string but otherwise unchanged.
|
47
50
|
|
48
51
|
## Development
|
49
52
|
|
data/lib/string_diff.rb
CHANGED
@@ -14,14 +14,47 @@ module StringDiff
|
|
14
14
|
a1 = PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string1)
|
15
15
|
a2 = PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string2)
|
16
16
|
|
17
|
-
construct_string(compare(a1, a2))
|
17
|
+
construct_string(compare(process_parens(a1), process_parens(a2)))
|
18
18
|
end
|
19
19
|
|
20
20
|
private
|
21
21
|
|
22
|
+
def process_parens(array)
|
23
|
+
if array.include?('(') && array.include?(')')
|
24
|
+
array_open_parens_indexes = array.each_index.select{|i| array[i] == "("}
|
25
|
+
array_closed_parens_indexes = array.each_index.select{|i| array[i] == ")"}
|
26
|
+
|
27
|
+
if array_open_parens_indexes.count == array_closed_parens_indexes.count
|
28
|
+
removed_count = 0
|
29
|
+
array_open_parens_indexes.each do |i|
|
30
|
+
combined_string = ""
|
31
|
+
combined_string += (array[i-removed_count] + array[i+1-removed_count])
|
32
|
+
array.delete_at(i-removed_count)
|
33
|
+
array.delete_at(i-removed_count)
|
34
|
+
array.insert(i-removed_count, combined_string)
|
35
|
+
removed_count += 1
|
36
|
+
end
|
37
|
+
|
38
|
+
array_closed_parens_indexes.each do |i|
|
39
|
+
combined_string = ""
|
40
|
+
combined_string += (array[i-(removed_count+1)] + array[i-removed_count])
|
41
|
+
array.delete_at(i-(removed_count+1))
|
42
|
+
array.delete_at(i-(removed_count+1))
|
43
|
+
array.insert(i-(removed_count+1), combined_string)
|
44
|
+
removed_count += 1
|
45
|
+
end
|
46
|
+
end
|
47
|
+
else
|
48
|
+
array
|
49
|
+
end
|
50
|
+
array
|
51
|
+
end
|
52
|
+
|
22
53
|
def compare(array1, array2)
|
23
54
|
deletions = array1 - array2
|
55
|
+
puts "deletions: #{deletions.to_s}"
|
24
56
|
insertions = array2 - array1
|
57
|
+
puts "insertions: #{insertions.to_s}"
|
25
58
|
|
26
59
|
process_duplicates(array1, array2)
|
27
60
|
annotate_deletions(deletions, array1)
|
@@ -46,12 +79,32 @@ module StringDiff
|
|
46
79
|
end
|
47
80
|
|
48
81
|
if index.nil?
|
49
|
-
|
82
|
+
# Check whether or not we're dealing with an annotated deletion/insertion, or plain token
|
83
|
+
contains_span = array1.last.include?("<span") ? true : false
|
84
|
+
contains_punct_in_span = !(array1.last.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[[:punct:]]/)).nil? if contains_span
|
85
|
+
stand_alone_punct = array1.last =~ (/[[:punct:]]/) if !contains_span
|
86
|
+
|
87
|
+
# If there is punctuation after a deletion, we need to make sure the
|
88
|
+
# insertion is added before the punctuation.
|
89
|
+
if (contains_punct_in_span || stand_alone_punct) && array1[-2].include?("<span class='deletion'")
|
90
|
+
array1.insert(-2, "<span class='insertion'>#{v}</span>")
|
91
|
+
elsif array2.find_index(v) < (PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string1).count)
|
92
|
+
# Count how many insertions up to the original position
|
93
|
+
insertions_count = 0
|
94
|
+
deletions_count = 0
|
95
|
+
for i in 0..(array2.find_index(v)+1) do
|
96
|
+
insertions_count += 1 if array1[i].include?("<span class='insertion'")
|
97
|
+
deletions_count += 1 if array1[i].include?("<span class='deletion'")
|
98
|
+
end
|
99
|
+
array1.insert(((find_correct_index(v, array1, array2) + insertions_count + deletions_count) - 1), "<span class='insertion'>#{v}</span>")
|
100
|
+
else
|
101
|
+
# Otherwise we put it on the end.
|
102
|
+
array1.insert(-1, "<span class='insertion'>#{v}</span>")
|
103
|
+
end
|
50
104
|
else
|
51
105
|
array1.insert(index + 1, "<span class='insertion'>#{v}</span>")
|
52
106
|
end
|
53
107
|
end
|
54
|
-
|
55
108
|
array1
|
56
109
|
end
|
57
110
|
|
@@ -60,6 +113,11 @@ module StringDiff
|
|
60
113
|
dup2 = array2.find_all { |e| array2.count(e) > 1 }
|
61
114
|
|
62
115
|
missing_words = (dup1 - dup2).uniq
|
116
|
+
additional_words = (dup2 - dup1).uniq
|
117
|
+
|
118
|
+
unless additional_words.empty?
|
119
|
+
set_additional_duplicates_indexes(array2, additional_words)
|
120
|
+
end
|
63
121
|
|
64
122
|
duplicate_indexs_of_array1 = []
|
65
123
|
duplicate_indexs_of_array2 = []
|
@@ -79,6 +137,28 @@ module StringDiff
|
|
79
137
|
end
|
80
138
|
end
|
81
139
|
|
140
|
+
def set_additional_duplicates_indexes(array, dup)
|
141
|
+
@additional_indexes = array.each_index.select{|i| array[i] == dup[0]}
|
142
|
+
end
|
143
|
+
|
144
|
+
def find_correct_index(token, array1, array2)
|
145
|
+
unless @additional_indexes.nil?
|
146
|
+
# We need to find if the word has already been added, if so, use a later index
|
147
|
+
appeared_count = 0
|
148
|
+
array1.each do |item|
|
149
|
+
appeared_count += 1 if item.include?("<span class='insertion'>#{token}")
|
150
|
+
end
|
151
|
+
|
152
|
+
if appeared_count == 0
|
153
|
+
@additional_indexes[0]
|
154
|
+
else
|
155
|
+
@additional_indexes[appeared_count]
|
156
|
+
end
|
157
|
+
else
|
158
|
+
array2.find_index(token)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
82
162
|
def construct_string(array1)
|
83
163
|
string = ""
|
84
164
|
|
@@ -89,12 +169,40 @@ module StringDiff
|
|
89
169
|
if token.include?("<span")
|
90
170
|
if token.scan(/(?<='>).*(?=<\/)/)[0] !~ /[[:punct:]]/ || string1.include?(" #{token.scan(/(?<='>).*(?=<\/)/)[0]}")
|
91
171
|
string += " #{token}"
|
172
|
+
elsif !( token.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[']/) ).nil?
|
173
|
+
if string.scan(/[']/).empty? || string.scan(/[(]/).empty?
|
174
|
+
string += " #{token}#{array1[i+1]}"
|
175
|
+
array1.slice!(i+1)
|
176
|
+
else
|
177
|
+
string += token
|
178
|
+
end
|
179
|
+
elsif !( token.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[(]/) ).nil?
|
180
|
+
if string.scan(/[(]/).empty?
|
181
|
+
string += " #{token}#{array1[i+1]}"
|
182
|
+
array1.slice!(i+1)
|
183
|
+
else
|
184
|
+
string += token
|
185
|
+
end
|
92
186
|
else
|
93
187
|
string += token
|
94
188
|
end
|
95
189
|
else
|
96
|
-
if token !~ /[[:punct:]]/
|
190
|
+
if token !~ /[[:punct:]]/
|
97
191
|
string += " #{token}"
|
192
|
+
elsif !( token =~ (/[']/) ).nil?
|
193
|
+
if string.scan(/[']/).empty?
|
194
|
+
string += " #{token}#{array1[i+1]}"
|
195
|
+
array1.slice!(i+1)
|
196
|
+
else
|
197
|
+
string += token
|
198
|
+
end
|
199
|
+
elsif !( token =~ (/[(]/) ).nil?
|
200
|
+
if string.scan(/[(]/).empty?
|
201
|
+
string += " #{token}#{array1[i+1]}"
|
202
|
+
array1.slice!(i+1)
|
203
|
+
else
|
204
|
+
string += token
|
205
|
+
end
|
98
206
|
else
|
99
207
|
string += token
|
100
208
|
end
|
@@ -103,6 +211,5 @@ module StringDiff
|
|
103
211
|
end
|
104
212
|
string
|
105
213
|
end
|
106
|
-
|
107
214
|
end
|
108
|
-
end
|
215
|
+
end
|
data/lib/string_diff/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- natanio
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|