html-dom-diff 0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/html-dom-diff/delta_tree_builder.rb +40 -7
- data/lib/html-dom-diff/differ.rb +25 -21
- data/lib/html-dom-diff/node.rb +4 -2
- data/lib/html-dom-diff/version.rb +1 -1
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 0d731a32d076bd3fc64436c74dff88fa653349a1dc35fd7dfe1598bb7210b001
         | 
| 4 | 
            +
              data.tar.gz: 79e7466f13f022fd357dd4997d5f168aa6a68b56e9e4f78f494b645658d1fbb5
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: ca5d8dcc3952fff8d1b4f03cad348b178f23b1b7c27de085117fce57eba4596fd8e05875f7fc84d9608cbb0c4d4c0ad7665d7d62bfdf1c513d336b0bfa7cdb2d
         | 
| 7 | 
            +
              data.tar.gz: 1590f4c2fc8325396d07c2483d2f59f82348c4485132653ca966662a3e62456e9da93ebd687b65858365479b34313d26341dcb4e5edb2de325302e0a910afc3b
         | 
| @@ -1,22 +1,55 @@ | |
| 1 1 | 
             
            module HTMLDOMDiff
         | 
| 2 2 | 
             
              class DeltaTreeBuilder
         | 
| 3 3 | 
             
                attr_reader :ldoc, :rdoc
         | 
| 4 | 
            -
                def initialize(ldoc, rdoc | 
| 4 | 
            +
                def initialize(ldoc, rdoc)
         | 
| 5 5 | 
             
                  @ldoc     = ldoc
         | 
| 6 6 | 
             
                  @rdoc     = rdoc
         | 
| 7 | 
            -
                  @weights  =  | 
| 8 | 
            -
                  @forward  =  | 
| 9 | 
            -
                  @backward =  | 
| 7 | 
            +
                  @weights  = {}
         | 
| 8 | 
            +
                  @forward  = {}
         | 
| 9 | 
            +
                  @backward = {}
         | 
| 10 10 | 
             
                end
         | 
| 11 11 |  | 
| 12 | 
            -
                def  | 
| 12 | 
            +
                def root
         | 
| 13 13 | 
             
                  wrap @rdoc
         | 
| 14 14 | 
             
                end
         | 
| 15 15 |  | 
| 16 | 
            +
                def total_weight
         | 
| 17 | 
            +
                  @weights[ldoc].to_f + @weights[rdoc].to_f
         | 
| 18 | 
            +
                end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                def add_weight(element, weight)
         | 
| 21 | 
            +
                  @weights[element] = weight
         | 
| 22 | 
            +
                end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                def weight(element)
         | 
| 25 | 
            +
                  @weights[element]
         | 
| 26 | 
            +
                end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                def match(left, right)
         | 
| 29 | 
            +
                  @forward[left]   = right
         | 
| 30 | 
            +
                  @backward[right] = left
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                def left_matches?(lnode, rnode)
         | 
| 34 | 
            +
                  @forward[lnode] == rnode
         | 
| 35 | 
            +
                end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                def left_match(lnode)
         | 
| 38 | 
            +
                  @forward[lnode]
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                def left_matched?(lnode)
         | 
| 42 | 
            +
                  @forward.has_key?(lnode)
         | 
| 43 | 
            +
                end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                def right_matched?(rnode)
         | 
| 46 | 
            +
                  @backward.has_key?(rnode)
         | 
| 47 | 
            +
                end
         | 
| 48 | 
            +
             | 
| 16 49 | 
             
                private
         | 
| 17 50 |  | 
| 18 51 | 
             
                def wrap(rnode, parent=nil)
         | 
| 19 | 
            -
                  result = Node.new rnode, @backward[rnode], parent
         | 
| 52 | 
            +
                  result = Node.new rnode, @backward[rnode], @weights[rnode], parent
         | 
| 20 53 | 
             
                  rnode.children.each do |child|
         | 
| 21 54 | 
             
                    wrap child, result
         | 
| 22 55 | 
             
                  end
         | 
| @@ -33,7 +66,7 @@ module HTMLDOMDiff | |
| 33 66 |  | 
| 34 67 | 
             
                def reverse_wrap(lnode, parent)
         | 
| 35 68 | 
             
                  return if @forward[lnode]
         | 
| 36 | 
            -
                  result = Node.new nil, lnode
         | 
| 69 | 
            +
                  result = Node.new nil, lnode, @weights[lnode]
         | 
| 37 70 | 
             
                  lnode.children.each { |c| reverse_wrap c, result }
         | 
| 38 71 | 
             
                  parent.add_child result
         | 
| 39 72 | 
             
                end
         | 
    
        data/lib/html-dom-diff/differ.rb
    CHANGED
    
    | @@ -13,7 +13,7 @@ module HTMLDOMDiff | |
| 13 13 | 
             
                end
         | 
| 14 14 |  | 
| 15 15 | 
             
                def diff(ldoc, rdoc)
         | 
| 16 | 
            -
                  reset
         | 
| 16 | 
            +
                  reset ldoc, rdoc
         | 
| 17 17 |  | 
| 18 18 | 
             
                  match_by_ids ldoc, rdoc
         | 
| 19 19 | 
             
                  prep_with @lsignatures, ldoc
         | 
| @@ -27,11 +27,17 @@ module HTMLDOMDiff | |
| 27 27 | 
             
                  match_bottom_up ldoc
         | 
| 28 28 | 
             
                  match_top_down  ldoc
         | 
| 29 29 |  | 
| 30 | 
            -
                   | 
| 30 | 
            +
                  @builder
         | 
| 31 31 | 
             
                end
         | 
| 32 32 |  | 
| 33 33 | 
             
                private
         | 
| 34 34 |  | 
| 35 | 
            +
                [:left_matches?, :left_match, :left_matched?, :right_matched?].each do |m|
         | 
| 36 | 
            +
                  define_method m do |*args|
         | 
| 37 | 
            +
                    @builder.send m, *args
         | 
| 38 | 
            +
                  end
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
             | 
| 35 41 | 
             
                def parse(string)
         | 
| 36 42 | 
             
                  Nokogiri::HTML(string, nil, nil, (Nokogiri::XML::ParseOptions::DEFAULT_HTML & Nokogiri::XML::ParseOptions::NOBLANKS))
         | 
| 37 43 | 
             
                end
         | 
| @@ -40,14 +46,12 @@ module HTMLDOMDiff | |
| 40 46 | 
             
                  Nokogiri::HTML::DocumentFragment.parse(string)
         | 
| 41 47 | 
             
                end
         | 
| 42 48 |  | 
| 43 | 
            -
                def reset
         | 
| 44 | 
            -
                  @ | 
| 45 | 
            -
                  @backward    = {}
         | 
| 46 | 
            -
                  @weights     = {}
         | 
| 49 | 
            +
                def reset(ldoc, rdoc)
         | 
| 50 | 
            +
                  @builder     = DeltaTreeBuilder.new(ldoc, rdoc)
         | 
| 47 51 | 
             
                  @depths      = {}
         | 
| 48 52 | 
             
                  @lsignatures = {}
         | 
| 49 53 | 
             
                  @rsignatures = {}
         | 
| 50 | 
            -
                  @matchqueue  = PQueue.new() { |a, b| @ | 
| 54 | 
            +
                  @matchqueue  = PQueue.new() { |a, b| @builder.weight(a) > @builder.weight(b) }
         | 
| 51 55 | 
             
                end
         | 
| 52 56 |  | 
| 53 57 | 
             
                def match_by_ids(ldoc, rdoc)
         | 
| @@ -70,11 +74,11 @@ module HTMLDOMDiff | |
| 70 74 | 
             
                    signatures << signature
         | 
| 71 75 | 
             
                  end
         | 
| 72 76 |  | 
| 73 | 
            -
                  @ | 
| 77 | 
            +
                  @builder.add_weight(element, weights)
         | 
| 74 78 | 
             
                  sig_hash[element] = hash_for(signatures)
         | 
| 75 79 | 
             
                  @depths[element]  = level
         | 
| 76 80 |  | 
| 77 | 
            -
                  [  | 
| 81 | 
            +
                  [ weights, sig_hash[element] ]
         | 
| 78 82 | 
             
                end
         | 
| 79 83 |  | 
| 80 84 | 
             
                def weight_for(element)
         | 
| @@ -98,8 +102,7 @@ module HTMLDOMDiff | |
| 98 102 | 
             
                end
         | 
| 99 103 |  | 
| 100 104 | 
             
                def record_matching(left, right)
         | 
| 101 | 
            -
                  @ | 
| 102 | 
            -
                  @backward[right] = left
         | 
| 105 | 
            +
                  @builder.match(left, right)
         | 
| 103 106 | 
             
                end
         | 
| 104 107 |  | 
| 105 108 | 
             
                def perform_initial_top_down_matching(lnodes, rnodes)
         | 
| @@ -119,7 +122,7 @@ module HTMLDOMDiff | |
| 119 122 | 
             
                def perform_initial_matching
         | 
| 120 123 | 
             
                  while @matchqueue.size > 0
         | 
| 121 124 | 
             
                    element = @matchqueue.pop
         | 
| 122 | 
            -
                    if  | 
| 125 | 
            +
                    if !right_matched?(element) && (match = find_best_match(element))
         | 
| 123 126 | 
             
                      match_all_children match, element
         | 
| 124 127 | 
             
                      match_parents match, element
         | 
| 125 128 | 
             
                    else
         | 
| @@ -131,7 +134,7 @@ module HTMLDOMDiff | |
| 131 134 | 
             
                def find_best_match(element)
         | 
| 132 135 | 
             
                  candidates = []
         | 
| 133 136 | 
             
                  @lsignatures.each do |left, sig|
         | 
| 134 | 
            -
                    if  | 
| 137 | 
            +
                    if !left_matched?(left) && sig == @rsignatures[element]
         | 
| 135 138 | 
             
                      candidates << left
         | 
| 136 139 | 
             
                    end
         | 
| 137 140 | 
             
                  end
         | 
| @@ -142,7 +145,7 @@ module HTMLDOMDiff | |
| 142 145 | 
             
                    return candidates.first
         | 
| 143 146 | 
             
                  else
         | 
| 144 147 | 
             
                    matching_parents = candidates.select do |left|
         | 
| 145 | 
            -
                       | 
| 148 | 
            +
                      left_matches?(left.parent, element.parent)
         | 
| 146 149 | 
             
                    end
         | 
| 147 150 |  | 
| 148 151 | 
             
                    if matching_parents.size == 1
         | 
| @@ -162,9 +165,9 @@ module HTMLDOMDiff | |
| 162 165 |  | 
| 163 166 | 
             
                def match_parents(left, right)
         | 
| 164 167 | 
             
                  # TODO implement multi-ancestor matching
         | 
| 165 | 
            -
                  return if  | 
| 168 | 
            +
                  return if left_matched?(left.parent) || right_matched?(right.parent)
         | 
| 166 169 | 
             
                  if left.parent.name == right.parent.name
         | 
| 167 | 
            -
                    record_matching left, right
         | 
| 170 | 
            +
                    record_matching left.parent, right.parent
         | 
| 168 171 | 
             
                  end
         | 
| 169 172 | 
             
                end
         | 
| 170 173 |  | 
| @@ -173,16 +176,17 @@ module HTMLDOMDiff | |
| 173 176 | 
             
                    match_bottom_up child
         | 
| 174 177 | 
             
                  end
         | 
| 175 178 |  | 
| 176 | 
            -
                  if element.respond_to?(:parent) &&  | 
| 177 | 
            -
                     | 
| 179 | 
            +
                  if !left_matched?(element) && element.respond_to?(:parent) && left_matched?(element.parent)
         | 
| 180 | 
            +
                    children = left_match(element.parent).children.reject { |c| right_matched?(c) }
         | 
| 181 | 
            +
                    match    = children.find { |c| c.name == element.name }
         | 
| 178 182 | 
             
                    record_matching(element, match) if match
         | 
| 179 183 | 
             
                  end
         | 
| 180 184 | 
             
                end
         | 
| 181 185 |  | 
| 182 186 | 
             
                def match_top_down(element)
         | 
| 183 | 
            -
                   | 
| 184 | 
            -
                    childmatches = element.children. | 
| 185 | 
            -
                    childmatches.reject! { |e|  | 
| 187 | 
            +
                  unless left_matched?(element)
         | 
| 188 | 
            +
                    childmatches = element.children.select { |c| left_matched?(c) }.map { |c| left_match(c).parent }.uniq
         | 
| 189 | 
            +
                    childmatches.reject! { |e| right_matched?(e) }
         | 
| 186 190 | 
             
                    if childmatches.size == 1 && childmatches.first.name == element.name
         | 
| 187 191 | 
             
                      record_matching(element, childmatches.first)
         | 
| 188 192 | 
             
                    end
         | 
    
        data/lib/html-dom-diff/node.rb
    CHANGED
    
    | @@ -1,12 +1,13 @@ | |
| 1 1 | 
             
            module HTMLDOMDiff
         | 
| 2 2 | 
             
              class Node
         | 
| 3 | 
            -
                attr_reader :parent, :children
         | 
| 3 | 
            +
                attr_reader :parent, :children, :weight
         | 
| 4 4 |  | 
| 5 5 | 
             
                attr_reader :rnode
         | 
| 6 6 |  | 
| 7 | 
            -
                def initialize(rnode, lnode, parent=nil)
         | 
| 7 | 
            +
                def initialize(rnode, lnode, weight, parent=nil)
         | 
| 8 8 | 
             
                  @rnode    = rnode
         | 
| 9 9 | 
             
                  @lnode    = lnode
         | 
| 10 | 
            +
                  @weight   = weight
         | 
| 10 11 | 
             
                  @parent   = parent
         | 
| 11 12 | 
             
                  @children = []
         | 
| 12 13 | 
             
                end
         | 
| @@ -71,6 +72,7 @@ module HTMLDOMDiff | |
| 71 72 |  | 
| 72 73 | 
             
                # states
         | 
| 73 74 | 
             
                def changed?
         | 
| 75 | 
            +
                  return false unless @rnode && @lnode
         | 
| 74 76 | 
             
                  if @rnode.text?
         | 
| 75 77 | 
             
                    text != original_text
         | 
| 76 78 | 
             
                  else
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: html-dom-diff
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version:  | 
| 4 | 
            +
              version: 0.1.1
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Frederik Fix
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2018-04- | 
| 11 | 
            +
            date: 2018-04-08 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: nokogiri
         |