RubyGems - string_diff - Versions diffs - 0.1.1 → 0.1.2 - Mend

string_diff 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 3f70c7948e72d79c0b55964ae736f1fc809fea94
-  data.tar.gz: 386023934c17d1e9537ac938b0d194291fd38f01
+  metadata.gz: 8b8695203741c40b206db357d9df4c02f2bc5787
+  data.tar.gz: c8c5ef569e59e6aa86862e860e2fc3bd29cc0c2d
 SHA512:
-  metadata.gz: 9270d80153e93f2cba4654aa2363f0a572d8d616526b313b5d342cb91aa66a150180b806a217498c0836c40830e3dd001e7980b29a62dbea76a3d2a0d868a143
-  data.tar.gz: 1cf252fc68bbaa2aa3fc64cd00b5c22c019b25d5f5fd7a032c505d9198d19b2eb550899009a61916740df48068e68c11404a06fb4ab0b9b14334d955bc9cbfd2
+  metadata.gz: 27e8f9967dcd916a1b8296274a805965266502c2aff01b113fd4aa649f9ee7f1cf71d6d21d4856ffae8c3b64e69d45b286be3428f7f74e2c527e38ac6f3c5995
+  data.tar.gz: 3f650464f981076bbea98a11e799df2613db1fff523ecd79a31dcbd89644fda1d64c7e645626eb05b885a1b1a00168f46db4534f328a6756240da1fbb82f2039

data/README.md CHANGED

@@ -44,6 +44,9 @@ StringDiff::Diff.new(string_1, string_2).diff
 # => "hello <span class='deletion'>world</span>"
 ```
+## Known Bugs
+The gem at this point in time does not handle words that have simply changed position in the string but otherwise unchanged.
 ## Development

data/lib/string_diff.rb CHANGED

@@ -14,14 +14,47 @@ module StringDiff
       a1 = PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string1)
       a2 = PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string2)
-      construct_string(compare(a1, a2))
+      construct_string(compare(process_parens(a1), process_parens(a2)))
     end
     private
+    def process_parens(array)
+      if array.include?('(') && array.include?(')')
+        array_open_parens_indexes = array.each_index.select{|i| array[i] == "("}
+        array_closed_parens_indexes = array.each_index.select{|i| array[i] == ")"}
+        if array_open_parens_indexes.count == array_closed_parens_indexes.count
+          removed_count = 0
+          array_open_parens_indexes.each do |i|
+            combined_string = ""
+            combined_string += (array[i-removed_count] + array[i+1-removed_count])
+            array.delete_at(i-removed_count)
+            array.delete_at(i-removed_count)
+            array.insert(i-removed_count, combined_string)
+            removed_count += 1
+          end
+          array_closed_parens_indexes.each do |i|
+            combined_string = ""
+            combined_string += (array[i-(removed_count+1)] + array[i-removed_count])
+            array.delete_at(i-(removed_count+1))
+            array.delete_at(i-(removed_count+1))
+            array.insert(i-(removed_count+1), combined_string)
+            removed_count += 1
+          end
+        end
+      else
+        array
+      end
+      array
+    end
     def compare(array1, array2)
       deletions = array1 - array2
+      puts "deletions: #{deletions.to_s}"
       insertions = array2 - array1
+      puts "insertions: #{insertions.to_s}"
       process_duplicates(array1, array2)
       annotate_deletions(deletions, array1)
@@ -46,12 +79,32 @@ module StringDiff
         end
         if index.nil?
-          array1.insert(-1, "<span class='insertion'>#{v}</span>")
+          # Check whether or not we're dealing with an annotated deletion/insertion, or plain token
+          contains_span = array1.last.include?("<span") ? true : false
+          contains_punct_in_span = !(array1.last.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[[:punct:]]/)).nil? if contains_span
+          stand_alone_punct = array1.last =~ (/[[:punct:]]/) if !contains_span
+          # If there is punctuation after a deletion, we need to make sure the
+          # insertion is added before the punctuation.
+          if (contains_punct_in_span || stand_alone_punct) && array1[-2].include?("<span class='deletion'")
+            array1.insert(-2, "<span class='insertion'>#{v}</span>")
+          elsif array2.find_index(v) < (PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string1).count)
+            # Count how many insertions up to the original position
+            insertions_count = 0
+            deletions_count = 0
+            for i in 0..(array2.find_index(v)+1) do
+              insertions_count += 1 if array1[i].include?("<span class='insertion'")
+              deletions_count += 1 if array1[i].include?("<span class='deletion'")
+            end
+            array1.insert(((find_correct_index(v, array1, array2) + insertions_count + deletions_count) - 1), "<span class='insertion'>#{v}</span>")
+          else
+            # Otherwise we put it on the end.
+             array1.insert(-1, "<span class='insertion'>#{v}</span>")
+          end
         else
           array1.insert(index + 1, "<span class='insertion'>#{v}</span>")
         end
       end
       array1
     end
@@ -60,6 +113,11 @@ module StringDiff
       dup2 = array2.find_all { |e| array2.count(e) > 1 }
       missing_words = (dup1 - dup2).uniq
+      additional_words = (dup2 - dup1).uniq
+      unless additional_words.empty?
+        set_additional_duplicates_indexes(array2, additional_words)
+      end
       duplicate_indexs_of_array1 = []
       duplicate_indexs_of_array2 = []
@@ -79,6 +137,28 @@ module StringDiff
       end
     end
+    def set_additional_duplicates_indexes(array, dup)
+      @additional_indexes = array.each_index.select{|i| array[i] == dup[0]}
+    end
+    def find_correct_index(token, array1, array2)
+      unless @additional_indexes.nil?
+        # We need to find if the word has already been added, if so, use a later index
+        appeared_count = 0
+        array1.each do |item|
+          appeared_count += 1 if item.include?("<span class='insertion'>#{token}")
+        end
+        if appeared_count == 0
+          @additional_indexes[0]
+        else
+          @additional_indexes[appeared_count]
+        end
+      else
+        array2.find_index(token)
+      end
+    end
     def construct_string(array1)
       string = ""
@@ -89,12 +169,40 @@ module StringDiff
           if token.include?("<span")
             if token.scan(/(?<='>).*(?=<\/)/)[0] !~ /[[:punct:]]/ || string1.include?(" #{token.scan(/(?<='>).*(?=<\/)/)[0]}")
               string += " #{token}"
+            elsif !( token.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[']/) ).nil?
+              if string.scan(/[']/).empty? || string.scan(/[(]/).empty?
+                string += " #{token}#{array1[i+1]}"
+                array1.slice!(i+1)
+              else
+                string += token
+              end
+            elsif !( token.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[(]/) ).nil?
+              if string.scan(/[(]/).empty?
+                string += " #{token}#{array1[i+1]}"
+                array1.slice!(i+1)
+              else
+                string += token
+              end
             else
               string += token
             end
           else
-            if token !~ /[[:punct:]]/ || string1.include?(" #{token.scan(/(?<='>).*(?=<\/)/)[0]}")
+            if token !~ /[[:punct:]]/
               string += " #{token}"
+            elsif !( token =~ (/[']/) ).nil?
+              if string.scan(/[']/).empty?
+                string += " #{token}#{array1[i+1]}"
+                array1.slice!(i+1)
+              else
+                string += token
+              end
+            elsif !( token =~ (/[(]/) ).nil?
+              if string.scan(/[(]/).empty?
+                string += " #{token}#{array1[i+1]}"
+                array1.slice!(i+1)
+              else
+                string += token
+              end
             else
               string += token
             end
@@ -103,6 +211,5 @@ module StringDiff
       end
       string
     end
   end
-end
+end

data/lib/string_diff/version.rb CHANGED

@@ -1,3 +1,3 @@
 module StringDiff
-  VERSION = "0.1.1"
+  VERSION = "0.1.2"
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: string_diff
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
 platform: ruby
 authors:
 - natanio
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2016-05-19 00:00:00.000000000 Z
+date: 2016-05-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler