string_diff 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/string_diff.rb +113 -6
- data/lib/string_diff/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b8695203741c40b206db357d9df4c02f2bc5787
|
4
|
+
data.tar.gz: c8c5ef569e59e6aa86862e860e2fc3bd29cc0c2d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 27e8f9967dcd916a1b8296274a805965266502c2aff01b113fd4aa649f9ee7f1cf71d6d21d4856ffae8c3b64e69d45b286be3428f7f74e2c527e38ac6f3c5995
|
7
|
+
data.tar.gz: 3f650464f981076bbea98a11e799df2613db1fff523ecd79a31dcbd89644fda1d64c7e645626eb05b885a1b1a00168f46db4534f328a6756240da1fbb82f2039
|
data/README.md
CHANGED
@@ -44,6 +44,9 @@ StringDiff::Diff.new(string_1, string_2).diff
|
|
44
44
|
# => "hello <span class='deletion'>world</span>"
|
45
45
|
```
|
46
46
|
|
47
|
+
## Known Bugs
|
48
|
+
|
49
|
+
The gem at this point in time does not handle words that have simply changed position in the string but otherwise unchanged.
|
47
50
|
|
48
51
|
## Development
|
49
52
|
|
data/lib/string_diff.rb
CHANGED
@@ -14,14 +14,47 @@ module StringDiff
|
|
14
14
|
a1 = PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string1)
|
15
15
|
a2 = PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string2)
|
16
16
|
|
17
|
-
construct_string(compare(a1, a2))
|
17
|
+
construct_string(compare(process_parens(a1), process_parens(a2)))
|
18
18
|
end
|
19
19
|
|
20
20
|
private
|
21
21
|
|
22
|
+
def process_parens(array)
|
23
|
+
if array.include?('(') && array.include?(')')
|
24
|
+
array_open_parens_indexes = array.each_index.select{|i| array[i] == "("}
|
25
|
+
array_closed_parens_indexes = array.each_index.select{|i| array[i] == ")"}
|
26
|
+
|
27
|
+
if array_open_parens_indexes.count == array_closed_parens_indexes.count
|
28
|
+
removed_count = 0
|
29
|
+
array_open_parens_indexes.each do |i|
|
30
|
+
combined_string = ""
|
31
|
+
combined_string += (array[i-removed_count] + array[i+1-removed_count])
|
32
|
+
array.delete_at(i-removed_count)
|
33
|
+
array.delete_at(i-removed_count)
|
34
|
+
array.insert(i-removed_count, combined_string)
|
35
|
+
removed_count += 1
|
36
|
+
end
|
37
|
+
|
38
|
+
array_closed_parens_indexes.each do |i|
|
39
|
+
combined_string = ""
|
40
|
+
combined_string += (array[i-(removed_count+1)] + array[i-removed_count])
|
41
|
+
array.delete_at(i-(removed_count+1))
|
42
|
+
array.delete_at(i-(removed_count+1))
|
43
|
+
array.insert(i-(removed_count+1), combined_string)
|
44
|
+
removed_count += 1
|
45
|
+
end
|
46
|
+
end
|
47
|
+
else
|
48
|
+
array
|
49
|
+
end
|
50
|
+
array
|
51
|
+
end
|
52
|
+
|
22
53
|
def compare(array1, array2)
|
23
54
|
deletions = array1 - array2
|
55
|
+
puts "deletions: #{deletions.to_s}"
|
24
56
|
insertions = array2 - array1
|
57
|
+
puts "insertions: #{insertions.to_s}"
|
25
58
|
|
26
59
|
process_duplicates(array1, array2)
|
27
60
|
annotate_deletions(deletions, array1)
|
@@ -46,12 +79,32 @@ module StringDiff
|
|
46
79
|
end
|
47
80
|
|
48
81
|
if index.nil?
|
49
|
-
|
82
|
+
# Check whether or not we're dealing with an annotated deletion/insertion, or plain token
|
83
|
+
contains_span = array1.last.include?("<span") ? true : false
|
84
|
+
contains_punct_in_span = !(array1.last.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[[:punct:]]/)).nil? if contains_span
|
85
|
+
stand_alone_punct = array1.last =~ (/[[:punct:]]/) if !contains_span
|
86
|
+
|
87
|
+
# If there is punctuation after a deletion, we need to make sure the
|
88
|
+
# insertion is added before the punctuation.
|
89
|
+
if (contains_punct_in_span || stand_alone_punct) && array1[-2].include?("<span class='deletion'")
|
90
|
+
array1.insert(-2, "<span class='insertion'>#{v}</span>")
|
91
|
+
elsif array2.find_index(v) < (PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string1).count)
|
92
|
+
# Count how many insertions up to the original position
|
93
|
+
insertions_count = 0
|
94
|
+
deletions_count = 0
|
95
|
+
for i in 0..(array2.find_index(v)+1) do
|
96
|
+
insertions_count += 1 if array1[i].include?("<span class='insertion'")
|
97
|
+
deletions_count += 1 if array1[i].include?("<span class='deletion'")
|
98
|
+
end
|
99
|
+
array1.insert(((find_correct_index(v, array1, array2) + insertions_count + deletions_count) - 1), "<span class='insertion'>#{v}</span>")
|
100
|
+
else
|
101
|
+
# Otherwise we put it on the end.
|
102
|
+
array1.insert(-1, "<span class='insertion'>#{v}</span>")
|
103
|
+
end
|
50
104
|
else
|
51
105
|
array1.insert(index + 1, "<span class='insertion'>#{v}</span>")
|
52
106
|
end
|
53
107
|
end
|
54
|
-
|
55
108
|
array1
|
56
109
|
end
|
57
110
|
|
@@ -60,6 +113,11 @@ module StringDiff
|
|
60
113
|
dup2 = array2.find_all { |e| array2.count(e) > 1 }
|
61
114
|
|
62
115
|
missing_words = (dup1 - dup2).uniq
|
116
|
+
additional_words = (dup2 - dup1).uniq
|
117
|
+
|
118
|
+
unless additional_words.empty?
|
119
|
+
set_additional_duplicates_indexes(array2, additional_words)
|
120
|
+
end
|
63
121
|
|
64
122
|
duplicate_indexs_of_array1 = []
|
65
123
|
duplicate_indexs_of_array2 = []
|
@@ -79,6 +137,28 @@ module StringDiff
|
|
79
137
|
end
|
80
138
|
end
|
81
139
|
|
140
|
+
def set_additional_duplicates_indexes(array, dup)
|
141
|
+
@additional_indexes = array.each_index.select{|i| array[i] == dup[0]}
|
142
|
+
end
|
143
|
+
|
144
|
+
def find_correct_index(token, array1, array2)
|
145
|
+
unless @additional_indexes.nil?
|
146
|
+
# We need to find if the word has already been added, if so, use a later index
|
147
|
+
appeared_count = 0
|
148
|
+
array1.each do |item|
|
149
|
+
appeared_count += 1 if item.include?("<span class='insertion'>#{token}")
|
150
|
+
end
|
151
|
+
|
152
|
+
if appeared_count == 0
|
153
|
+
@additional_indexes[0]
|
154
|
+
else
|
155
|
+
@additional_indexes[appeared_count]
|
156
|
+
end
|
157
|
+
else
|
158
|
+
array2.find_index(token)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
82
162
|
def construct_string(array1)
|
83
163
|
string = ""
|
84
164
|
|
@@ -89,12 +169,40 @@ module StringDiff
|
|
89
169
|
if token.include?("<span")
|
90
170
|
if token.scan(/(?<='>).*(?=<\/)/)[0] !~ /[[:punct:]]/ || string1.include?(" #{token.scan(/(?<='>).*(?=<\/)/)[0]}")
|
91
171
|
string += " #{token}"
|
172
|
+
elsif !( token.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[']/) ).nil?
|
173
|
+
if string.scan(/[']/).empty? || string.scan(/[(]/).empty?
|
174
|
+
string += " #{token}#{array1[i+1]}"
|
175
|
+
array1.slice!(i+1)
|
176
|
+
else
|
177
|
+
string += token
|
178
|
+
end
|
179
|
+
elsif !( token.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[(]/) ).nil?
|
180
|
+
if string.scan(/[(]/).empty?
|
181
|
+
string += " #{token}#{array1[i+1]}"
|
182
|
+
array1.slice!(i+1)
|
183
|
+
else
|
184
|
+
string += token
|
185
|
+
end
|
92
186
|
else
|
93
187
|
string += token
|
94
188
|
end
|
95
189
|
else
|
96
|
-
if token !~ /[[:punct:]]/
|
190
|
+
if token !~ /[[:punct:]]/
|
97
191
|
string += " #{token}"
|
192
|
+
elsif !( token =~ (/[']/) ).nil?
|
193
|
+
if string.scan(/[']/).empty?
|
194
|
+
string += " #{token}#{array1[i+1]}"
|
195
|
+
array1.slice!(i+1)
|
196
|
+
else
|
197
|
+
string += token
|
198
|
+
end
|
199
|
+
elsif !( token =~ (/[(]/) ).nil?
|
200
|
+
if string.scan(/[(]/).empty?
|
201
|
+
string += " #{token}#{array1[i+1]}"
|
202
|
+
array1.slice!(i+1)
|
203
|
+
else
|
204
|
+
string += token
|
205
|
+
end
|
98
206
|
else
|
99
207
|
string += token
|
100
208
|
end
|
@@ -103,6 +211,5 @@ module StringDiff
|
|
103
211
|
end
|
104
212
|
string
|
105
213
|
end
|
106
|
-
|
107
214
|
end
|
108
|
-
end
|
215
|
+
end
|
data/lib/string_diff/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- natanio
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|