string_diff 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3f70c7948e72d79c0b55964ae736f1fc809fea94
4
- data.tar.gz: 386023934c17d1e9537ac938b0d194291fd38f01
3
+ metadata.gz: 8b8695203741c40b206db357d9df4c02f2bc5787
4
+ data.tar.gz: c8c5ef569e59e6aa86862e860e2fc3bd29cc0c2d
5
5
  SHA512:
6
- metadata.gz: 9270d80153e93f2cba4654aa2363f0a572d8d616526b313b5d342cb91aa66a150180b806a217498c0836c40830e3dd001e7980b29a62dbea76a3d2a0d868a143
7
- data.tar.gz: 1cf252fc68bbaa2aa3fc64cd00b5c22c019b25d5f5fd7a032c505d9198d19b2eb550899009a61916740df48068e68c11404a06fb4ab0b9b14334d955bc9cbfd2
6
+ metadata.gz: 27e8f9967dcd916a1b8296274a805965266502c2aff01b113fd4aa649f9ee7f1cf71d6d21d4856ffae8c3b64e69d45b286be3428f7f74e2c527e38ac6f3c5995
7
+ data.tar.gz: 3f650464f981076bbea98a11e799df2613db1fff523ecd79a31dcbd89644fda1d64c7e645626eb05b885a1b1a00168f46db4534f328a6756240da1fbb82f2039
data/README.md CHANGED
@@ -44,6 +44,9 @@ StringDiff::Diff.new(string_1, string_2).diff
44
44
  # => "hello <span class='deletion'>world</span>"
45
45
  ```
46
46
 
47
+ ## Known Bugs
48
+
49
+ The gem at this point in time does not handle words that have simply changed position in the string but otherwise unchanged.
47
50
 
48
51
  ## Development
49
52
 
@@ -14,14 +14,47 @@ module StringDiff
14
14
  a1 = PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string1)
15
15
  a2 = PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string2)
16
16
 
17
- construct_string(compare(a1, a2))
17
+ construct_string(compare(process_parens(a1), process_parens(a2)))
18
18
  end
19
19
 
20
20
  private
21
21
 
22
+ def process_parens(array)
23
+ if array.include?('(') && array.include?(')')
24
+ array_open_parens_indexes = array.each_index.select{|i| array[i] == "("}
25
+ array_closed_parens_indexes = array.each_index.select{|i| array[i] == ")"}
26
+
27
+ if array_open_parens_indexes.count == array_closed_parens_indexes.count
28
+ removed_count = 0
29
+ array_open_parens_indexes.each do |i|
30
+ combined_string = ""
31
+ combined_string += (array[i-removed_count] + array[i+1-removed_count])
32
+ array.delete_at(i-removed_count)
33
+ array.delete_at(i-removed_count)
34
+ array.insert(i-removed_count, combined_string)
35
+ removed_count += 1
36
+ end
37
+
38
+ array_closed_parens_indexes.each do |i|
39
+ combined_string = ""
40
+ combined_string += (array[i-(removed_count+1)] + array[i-removed_count])
41
+ array.delete_at(i-(removed_count+1))
42
+ array.delete_at(i-(removed_count+1))
43
+ array.insert(i-(removed_count+1), combined_string)
44
+ removed_count += 1
45
+ end
46
+ end
47
+ else
48
+ array
49
+ end
50
+ array
51
+ end
52
+
22
53
  def compare(array1, array2)
23
54
  deletions = array1 - array2
55
+ puts "deletions: #{deletions.to_s}"
24
56
  insertions = array2 - array1
57
+ puts "insertions: #{insertions.to_s}"
25
58
 
26
59
  process_duplicates(array1, array2)
27
60
  annotate_deletions(deletions, array1)
@@ -46,12 +79,32 @@ module StringDiff
46
79
  end
47
80
 
48
81
  if index.nil?
49
- array1.insert(-1, "<span class='insertion'>#{v}</span>")
82
+ # Check whether or not we're dealing with an annotated deletion/insertion, or plain token
83
+ contains_span = array1.last.include?("<span") ? true : false
84
+ contains_punct_in_span = !(array1.last.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[[:punct:]]/)).nil? if contains_span
85
+ stand_alone_punct = array1.last =~ (/[[:punct:]]/) if !contains_span
86
+
87
+ # If there is punctuation after a deletion, we need to make sure the
88
+ # insertion is added before the punctuation.
89
+ if (contains_punct_in_span || stand_alone_punct) && array1[-2].include?("<span class='deletion'")
90
+ array1.insert(-2, "<span class='insertion'>#{v}</span>")
91
+ elsif array2.find_index(v) < (PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string1).count)
92
+ # Count how many insertions up to the original position
93
+ insertions_count = 0
94
+ deletions_count = 0
95
+ for i in 0..(array2.find_index(v)+1) do
96
+ insertions_count += 1 if array1[i].include?("<span class='insertion'")
97
+ deletions_count += 1 if array1[i].include?("<span class='deletion'")
98
+ end
99
+ array1.insert(((find_correct_index(v, array1, array2) + insertions_count + deletions_count) - 1), "<span class='insertion'>#{v}</span>")
100
+ else
101
+ # Otherwise we put it on the end.
102
+ array1.insert(-1, "<span class='insertion'>#{v}</span>")
103
+ end
50
104
  else
51
105
  array1.insert(index + 1, "<span class='insertion'>#{v}</span>")
52
106
  end
53
107
  end
54
-
55
108
  array1
56
109
  end
57
110
 
@@ -60,6 +113,11 @@ module StringDiff
60
113
  dup2 = array2.find_all { |e| array2.count(e) > 1 }
61
114
 
62
115
  missing_words = (dup1 - dup2).uniq
116
+ additional_words = (dup2 - dup1).uniq
117
+
118
+ unless additional_words.empty?
119
+ set_additional_duplicates_indexes(array2, additional_words)
120
+ end
63
121
 
64
122
  duplicate_indexs_of_array1 = []
65
123
  duplicate_indexs_of_array2 = []
@@ -79,6 +137,28 @@ module StringDiff
79
137
  end
80
138
  end
81
139
 
140
+ def set_additional_duplicates_indexes(array, dup)
141
+ @additional_indexes = array.each_index.select{|i| array[i] == dup[0]}
142
+ end
143
+
144
+ def find_correct_index(token, array1, array2)
145
+ unless @additional_indexes.nil?
146
+ # We need to find if the word has already been added, if so, use a later index
147
+ appeared_count = 0
148
+ array1.each do |item|
149
+ appeared_count += 1 if item.include?("<span class='insertion'>#{token}")
150
+ end
151
+
152
+ if appeared_count == 0
153
+ @additional_indexes[0]
154
+ else
155
+ @additional_indexes[appeared_count]
156
+ end
157
+ else
158
+ array2.find_index(token)
159
+ end
160
+ end
161
+
82
162
  def construct_string(array1)
83
163
  string = ""
84
164
 
@@ -89,12 +169,40 @@ module StringDiff
89
169
  if token.include?("<span")
90
170
  if token.scan(/(?<='>).*(?=<\/)/)[0] !~ /[[:punct:]]/ || string1.include?(" #{token.scan(/(?<='>).*(?=<\/)/)[0]}")
91
171
  string += " #{token}"
172
+ elsif !( token.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[']/) ).nil?
173
+ if string.scan(/[']/).empty? || string.scan(/[(]/).empty?
174
+ string += " #{token}#{array1[i+1]}"
175
+ array1.slice!(i+1)
176
+ else
177
+ string += token
178
+ end
179
+ elsif !( token.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[(]/) ).nil?
180
+ if string.scan(/[(]/).empty?
181
+ string += " #{token}#{array1[i+1]}"
182
+ array1.slice!(i+1)
183
+ else
184
+ string += token
185
+ end
92
186
  else
93
187
  string += token
94
188
  end
95
189
  else
96
- if token !~ /[[:punct:]]/ || string1.include?(" #{token.scan(/(?<='>).*(?=<\/)/)[0]}")
190
+ if token !~ /[[:punct:]]/
97
191
  string += " #{token}"
192
+ elsif !( token =~ (/[']/) ).nil?
193
+ if string.scan(/[']/).empty?
194
+ string += " #{token}#{array1[i+1]}"
195
+ array1.slice!(i+1)
196
+ else
197
+ string += token
198
+ end
199
+ elsif !( token =~ (/[(]/) ).nil?
200
+ if string.scan(/[(]/).empty?
201
+ string += " #{token}#{array1[i+1]}"
202
+ array1.slice!(i+1)
203
+ else
204
+ string += token
205
+ end
98
206
  else
99
207
  string += token
100
208
  end
@@ -103,6 +211,5 @@ module StringDiff
103
211
  end
104
212
  string
105
213
  end
106
-
107
214
  end
108
- end
215
+ end
@@ -1,3 +1,3 @@
1
1
  module StringDiff
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string_diff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - natanio
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-05-19 00:00:00.000000000 Z
11
+ date: 2016-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler