string_diff 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3f70c7948e72d79c0b55964ae736f1fc809fea94
4
- data.tar.gz: 386023934c17d1e9537ac938b0d194291fd38f01
3
+ metadata.gz: 8b8695203741c40b206db357d9df4c02f2bc5787
4
+ data.tar.gz: c8c5ef569e59e6aa86862e860e2fc3bd29cc0c2d
5
5
  SHA512:
6
- metadata.gz: 9270d80153e93f2cba4654aa2363f0a572d8d616526b313b5d342cb91aa66a150180b806a217498c0836c40830e3dd001e7980b29a62dbea76a3d2a0d868a143
7
- data.tar.gz: 1cf252fc68bbaa2aa3fc64cd00b5c22c019b25d5f5fd7a032c505d9198d19b2eb550899009a61916740df48068e68c11404a06fb4ab0b9b14334d955bc9cbfd2
6
+ metadata.gz: 27e8f9967dcd916a1b8296274a805965266502c2aff01b113fd4aa649f9ee7f1cf71d6d21d4856ffae8c3b64e69d45b286be3428f7f74e2c527e38ac6f3c5995
7
+ data.tar.gz: 3f650464f981076bbea98a11e799df2613db1fff523ecd79a31dcbd89644fda1d64c7e645626eb05b885a1b1a00168f46db4534f328a6756240da1fbb82f2039
data/README.md CHANGED
@@ -44,6 +44,9 @@ StringDiff::Diff.new(string_1, string_2).diff
44
44
  # => "hello <span class='deletion'>world</span>"
45
45
  ```
46
46
 
47
+ ## Known Bugs
48
+
49
+ The gem at this point in time does not handle words that have simply changed position in the string but otherwise unchanged.
47
50
 
48
51
  ## Development
49
52
 
@@ -14,14 +14,47 @@ module StringDiff
14
14
  a1 = PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string1)
15
15
  a2 = PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string2)
16
16
 
17
- construct_string(compare(a1, a2))
17
+ construct_string(compare(process_parens(a1), process_parens(a2)))
18
18
  end
19
19
 
20
20
  private
21
21
 
22
+ def process_parens(array)
23
+ if array.include?('(') && array.include?(')')
24
+ array_open_parens_indexes = array.each_index.select{|i| array[i] == "("}
25
+ array_closed_parens_indexes = array.each_index.select{|i| array[i] == ")"}
26
+
27
+ if array_open_parens_indexes.count == array_closed_parens_indexes.count
28
+ removed_count = 0
29
+ array_open_parens_indexes.each do |i|
30
+ combined_string = ""
31
+ combined_string += (array[i-removed_count] + array[i+1-removed_count])
32
+ array.delete_at(i-removed_count)
33
+ array.delete_at(i-removed_count)
34
+ array.insert(i-removed_count, combined_string)
35
+ removed_count += 1
36
+ end
37
+
38
+ array_closed_parens_indexes.each do |i|
39
+ combined_string = ""
40
+ combined_string += (array[i-(removed_count+1)] + array[i-removed_count])
41
+ array.delete_at(i-(removed_count+1))
42
+ array.delete_at(i-(removed_count+1))
43
+ array.insert(i-(removed_count+1), combined_string)
44
+ removed_count += 1
45
+ end
46
+ end
47
+ else
48
+ array
49
+ end
50
+ array
51
+ end
52
+
22
53
  def compare(array1, array2)
23
54
  deletions = array1 - array2
55
+ puts "deletions: #{deletions.to_s}"
24
56
  insertions = array2 - array1
57
+ puts "insertions: #{insertions.to_s}"
25
58
 
26
59
  process_duplicates(array1, array2)
27
60
  annotate_deletions(deletions, array1)
@@ -46,12 +79,32 @@ module StringDiff
46
79
  end
47
80
 
48
81
  if index.nil?
49
- array1.insert(-1, "<span class='insertion'>#{v}</span>")
82
+ # Check whether or not we're dealing with an annotated deletion/insertion, or plain token
83
+ contains_span = array1.last.include?("<span") ? true : false
84
+ contains_punct_in_span = !(array1.last.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[[:punct:]]/)).nil? if contains_span
85
+ stand_alone_punct = array1.last =~ (/[[:punct:]]/) if !contains_span
86
+
87
+ # If there is punctuation after a deletion, we need to make sure the
88
+ # insertion is added before the punctuation.
89
+ if (contains_punct_in_span || stand_alone_punct) && array1[-2].include?("<span class='deletion'")
90
+ array1.insert(-2, "<span class='insertion'>#{v}</span>")
91
+ elsif array2.find_index(v) < (PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string1).count)
92
+ # Count how many insertions up to the original position
93
+ insertions_count = 0
94
+ deletions_count = 0
95
+ for i in 0..(array2.find_index(v)+1) do
96
+ insertions_count += 1 if array1[i].include?("<span class='insertion'")
97
+ deletions_count += 1 if array1[i].include?("<span class='deletion'")
98
+ end
99
+ array1.insert(((find_correct_index(v, array1, array2) + insertions_count + deletions_count) - 1), "<span class='insertion'>#{v}</span>")
100
+ else
101
+ # Otherwise we put it on the end.
102
+ array1.insert(-1, "<span class='insertion'>#{v}</span>")
103
+ end
50
104
  else
51
105
  array1.insert(index + 1, "<span class='insertion'>#{v}</span>")
52
106
  end
53
107
  end
54
-
55
108
  array1
56
109
  end
57
110
 
@@ -60,6 +113,11 @@ module StringDiff
60
113
  dup2 = array2.find_all { |e| array2.count(e) > 1 }
61
114
 
62
115
  missing_words = (dup1 - dup2).uniq
116
+ additional_words = (dup2 - dup1).uniq
117
+
118
+ unless additional_words.empty?
119
+ set_additional_duplicates_indexes(array2, additional_words)
120
+ end
63
121
 
64
122
  duplicate_indexs_of_array1 = []
65
123
  duplicate_indexs_of_array2 = []
@@ -79,6 +137,28 @@ module StringDiff
79
137
  end
80
138
  end
81
139
 
140
+ def set_additional_duplicates_indexes(array, dup)
141
+ @additional_indexes = array.each_index.select{|i| array[i] == dup[0]}
142
+ end
143
+
144
+ def find_correct_index(token, array1, array2)
145
+ unless @additional_indexes.nil?
146
+ # We need to find if the word has already been added, if so, use a later index
147
+ appeared_count = 0
148
+ array1.each do |item|
149
+ appeared_count += 1 if item.include?("<span class='insertion'>#{token}")
150
+ end
151
+
152
+ if appeared_count == 0
153
+ @additional_indexes[0]
154
+ else
155
+ @additional_indexes[appeared_count]
156
+ end
157
+ else
158
+ array2.find_index(token)
159
+ end
160
+ end
161
+
82
162
  def construct_string(array1)
83
163
  string = ""
84
164
 
@@ -89,12 +169,40 @@ module StringDiff
89
169
  if token.include?("<span")
90
170
  if token.scan(/(?<='>).*(?=<\/)/)[0] !~ /[[:punct:]]/ || string1.include?(" #{token.scan(/(?<='>).*(?=<\/)/)[0]}")
91
171
  string += " #{token}"
172
+ elsif !( token.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[']/) ).nil?
173
+ if string.scan(/[']/).empty? || string.scan(/[(]/).empty?
174
+ string += " #{token}#{array1[i+1]}"
175
+ array1.slice!(i+1)
176
+ else
177
+ string += token
178
+ end
179
+ elsif !( token.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[(]/) ).nil?
180
+ if string.scan(/[(]/).empty?
181
+ string += " #{token}#{array1[i+1]}"
182
+ array1.slice!(i+1)
183
+ else
184
+ string += token
185
+ end
92
186
  else
93
187
  string += token
94
188
  end
95
189
  else
96
- if token !~ /[[:punct:]]/ || string1.include?(" #{token.scan(/(?<='>).*(?=<\/)/)[0]}")
190
+ if token !~ /[[:punct:]]/
97
191
  string += " #{token}"
192
+ elsif !( token =~ (/[']/) ).nil?
193
+ if string.scan(/[']/).empty?
194
+ string += " #{token}#{array1[i+1]}"
195
+ array1.slice!(i+1)
196
+ else
197
+ string += token
198
+ end
199
+ elsif !( token =~ (/[(]/) ).nil?
200
+ if string.scan(/[(]/).empty?
201
+ string += " #{token}#{array1[i+1]}"
202
+ array1.slice!(i+1)
203
+ else
204
+ string += token
205
+ end
98
206
  else
99
207
  string += token
100
208
  end
@@ -103,6 +211,5 @@ module StringDiff
103
211
  end
104
212
  string
105
213
  end
106
-
107
214
  end
108
- end
215
+ end
@@ -1,3 +1,3 @@
1
1
  module StringDiff
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string_diff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - natanio
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-05-19 00:00:00.000000000 Z
11
+ date: 2016-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler