tenderlove-tree_diff 1.0.0.20090329202825

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,23 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'autotest/restart'
4
+
5
+ # Autotest.add_hook :initialize do |at|
6
+ # at.extra_files << "../some/external/dependency.rb"
7
+ #
8
+ # at.libs << ":../some/external"
9
+ #
10
+ # at.add_exception 'vendor'
11
+ #
12
+ # at.add_mapping(/dependency.rb/) do |f, _|
13
+ # at.files_matching(/test_.*rb$/)
14
+ # end
15
+ #
16
+ # %w(TestA TestB).each do |klass|
17
+ # at.extra_class_map[klass] = "test/test_misc.rb"
18
+ # end
19
+ # end
20
+
21
+ # Autotest.add_hook :run_command do |at|
22
+ # system "rake build"
23
+ # end
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2009-03-29
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birthday!
6
+
@@ -0,0 +1,18 @@
1
+ .autotest
2
+ CHANGELOG.rdoc
3
+ Manifest.txt
4
+ README.rdoc
5
+ Rakefile
6
+ bin/tree_diff
7
+ lib/tree_diff.rb
8
+ lib/tree_diff/dot_visitor.rb
9
+ lib/tree_diff/hpricot.rb
10
+ lib/tree_diff/narf.rb
11
+ lib/tree_diff/node.rb
12
+ lib/tree_diff/nokogiri.rb
13
+ lib/tree_diff/rexml.rb
14
+ lib/tree_diff/unify.rb
15
+ test/test_dots.rb
16
+ test/test_merge.rb
17
+ test/test_tree_diff.rb
18
+ tree_diff.gemspec
@@ -0,0 +1,47 @@
1
+ = tree_diff
2
+
3
+ * http://github.com/tenderlove/tree_diff
4
+
5
+ == DESCRIPTION:
6
+
7
+ Parse HTML using different parsers, then show the differences between
8
+ the generated trees.
9
+
10
+ == SYNOPSIS:
11
+
12
+ $ tree_diff --graph /path/to/file.html
13
+
14
+ == REQUIREMENTS:
15
+
16
+ * hpricot
17
+ * nokogiri
18
+ * narf
19
+
20
+ == INSTALL:
21
+
22
+ * sudo gem install tenderlove-tree_diff
23
+
24
+ == LICENSE:
25
+
26
+ (The MIT License)
27
+
28
+ Copyright (c) 2009 Aaron Patterson
29
+
30
+ Permission is hereby granted, free of charge, to any person obtaining
31
+ a copy of this software and associated documentation files (the
32
+ 'Software'), to deal in the Software without restriction, including
33
+ without limitation the rights to use, copy, modify, merge, publish,
34
+ distribute, sublicense, and/or sell copies of the Software, and to
35
+ permit persons to whom the Software is furnished to do so, subject to
36
+ the following conditions:
37
+
38
+ The above copyright notice and this permission notice shall be
39
+ included in all copies or substantial portions of the Software.
40
+
41
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
42
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
43
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
44
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
45
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
46
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
47
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,31 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require 'rubygems'
6
+ $: << File.expand_path(File.join(File.dirname(__FILE__), 'lib'))
7
+ require 'tree_diff'
8
+
9
+ HOE = Hoe.new('tree_diff', TreeDiff::VERSION) do |p|
10
+ p.developer('Aaron Patterson', 'aaronp@rubyforge.org')
11
+ p.extra_deps = [
12
+ ['nokogiri', '>= 1.2.3'],
13
+ ['hpricot'],
14
+ ['narf'],
15
+ ]
16
+ p.readme_file = 'README.rdoc'
17
+ p.history_file = 'CHANGELOG.rdoc'
18
+ p.extra_rdoc_files = FileList['*.rdoc']
19
+ end
20
+
21
+ namespace :gem do
22
+ desc 'Generate a gem spec'
23
+ task :spec do
24
+ File.open("#{HOE.name}.gemspec", 'w') do |f|
25
+ HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
26
+ f.write(HOE.spec.to_ruby)
27
+ end
28
+ end
29
+ end
30
+
31
+ # vim: syntax=Ruby
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ require 'rubygems'
4
+ require 'tree_diff'
5
+
6
+ if ARGV[0] =~ /--list/
7
+ Dir[File.join(ARGV[1], '**', '*.html')].sort_by { |file|
8
+ File.stat(file).size
9
+ }.each do |file|
10
+ html = File.read(file)
11
+ begin
12
+ ndoc = TreeDiff::Unify.new('nokogiri').accept(Nokogiri::HTML(html).root)
13
+ hdoc = TreeDiff::Unify.new('hpricot').accept(
14
+ Hpricot(html, :xhtml_struct => true).root
15
+ )
16
+ next if ndoc.merge(hdoc).all_equal?
17
+ puts "#{file} Differs"
18
+ rescue
19
+ end
20
+ end
21
+ elsif ARGV[0] =~ /--graph/
22
+ html = File.read(ARGV[1])
23
+ ndoc = TreeDiff::Unify.new('nokogiri').accept(Nokogiri::HTML(html).root)
24
+ hdoc = TreeDiff::Unify.new('hpricot').accept(
25
+ Hpricot(html, :xhtml_struct => true).root
26
+ )
27
+ puts ndoc.merge(hdoc).to_dot
28
+ else
29
+ html = File.read(ARGV[0])
30
+ ndoc = TreeDiff::Unify.new('nokogiri').accept(Nokogiri::HTML(html).root)
31
+ hdoc = TreeDiff::Unify.new('hpricot').accept(
32
+ Hpricot(html, :xhtml_struct => true).root
33
+ )
34
+ puts(ndoc.merge(hdoc).all_equal? ? 'Same' : 'Different')
35
+ end
@@ -0,0 +1,10 @@
1
+ require 'tree_diff/dot_visitor'
2
+ require 'tree_diff/nokogiri'
3
+ require 'tree_diff/hpricot'
4
+ require 'tree_diff/narf'
5
+ require 'tree_diff/node'
6
+ require 'tree_diff/unify'
7
+
8
+ module TreeDiff
9
+ VERSION = '1.0.0'
10
+ end
@@ -0,0 +1,60 @@
1
+ module TreeDiff
2
+ class DotVisitor
3
+ COLORS = {
4
+ 1 => 'blue',
5
+ 2 => 'red',
6
+ 3 => 'green',
7
+ }
8
+
9
+ def initialize root
10
+ @root = root
11
+ @nodes = []
12
+ @edges = []
13
+ end
14
+
15
+ def accept target
16
+ target.accept(self)
17
+ end
18
+
19
+ def visit node
20
+ if @root.source == node.source
21
+ @nodes << String.new(<<-eonode)
22
+ "#{node.object_id}" [
23
+ label = "<f0> (#{node.name})"
24
+ ];
25
+ eonode
26
+ else
27
+ i = 0
28
+ @nodes << String.new(<<-eonode)
29
+ "#{node.object_id}" [
30
+ label = "{<f0> (#{node.name}) | #{node.source.map { |src|
31
+ "<f#{i}> #{src}"
32
+ }.join(" | ")}}"
33
+ color = blue
34
+ ];
35
+ eonode
36
+ end
37
+ node.children.each { |child|
38
+ @edges << String.new(<<-eoedge)
39
+ "#{node.object_id}" -> "#{child.object_id}":f0 [
40
+ id = #{@edges.length}
41
+ ];
42
+ eoedge
43
+ }
44
+ node.children.each { |c| c.accept(self) }
45
+ end
46
+
47
+ def to_s
48
+ String.new(<<-eograph)
49
+ digraph g {
50
+ node [
51
+ fontsize = "16"
52
+ shape = "record"
53
+ style = filled
54
+ ];
55
+ #{@nodes.join + @edges.join}
56
+ }
57
+ eograph
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,14 @@
1
+ require 'hpricot'
2
+
3
+ class Hpricot::Elem
4
+ def accept visitor
5
+ visitor.visit(self)
6
+ end
7
+
8
+ alias :old_text? :text?
9
+ def text?
10
+ return true if old_text?
11
+ return true if %w{ script }.include?(name)
12
+ return false
13
+ end
14
+ end
@@ -0,0 +1,19 @@
1
+ require 'web/htmltools/xmltree'
2
+
3
+ class REXML::Element
4
+ def accept visitor
5
+ visitor.visit(self)
6
+ end
7
+
8
+ def text?
9
+ children.length == 0 && has_text?
10
+ end
11
+ end
12
+
13
+ class REXML::Comment
14
+ def text?; true; end
15
+ end
16
+
17
+ class REXML::Text
18
+ def text?; true; end
19
+ end
@@ -0,0 +1,58 @@
1
+ module TreeDiff
2
+ class Node < Struct.new(:name, :edge, :children, :source)
3
+ def == other
4
+ name == other.name && edge == other.edge
5
+ end
6
+ alias :eql? :==
7
+
8
+ def hash
9
+ "#{name}#{edge}".hash
10
+ end
11
+
12
+ def merge other
13
+ raise unless other.name == name
14
+ diff = (children - other.children) + (other.children - children)
15
+
16
+ left_same = (children & other.children)
17
+
18
+ merged = left_same.map do |node|
19
+ node.merge(other.children.find { |n| n == node })
20
+ end
21
+
22
+ Node.new(name, edge, diff + merged, source + other.source)
23
+ end
24
+
25
+ def accept visitor
26
+ visitor.visit(self)
27
+ end
28
+
29
+ def to_dot
30
+ dv = DotVisitor.new(self)
31
+ dv.accept(self)
32
+ dv
33
+ end
34
+
35
+ def all_equal?
36
+ eql = Class.new {
37
+ attr_accessor :equal
38
+
39
+ def initialize root
40
+ @root = root
41
+ @equal = true
42
+ end
43
+
44
+ def accept target
45
+ target.accept(self)
46
+ end
47
+
48
+ def visit node
49
+ return unless @equal
50
+ @equal = node.source == @root.source
51
+ node.children.each { |c| c.accept(self) }
52
+ end
53
+ }.new(self)
54
+ eql.accept(self)
55
+ eql.equal
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,15 @@
1
+ require 'nokogiri'
2
+
3
+ class Nokogiri::XML::Node
4
+ def accept visitor
5
+ visitor.visit(self)
6
+ end
7
+
8
+ alias :old_text? :text?
9
+ def text?
10
+ return true if old_text?
11
+ return true if %w{ comment script }.include?(name)
12
+ return true if name == '#cdata-section'
13
+ false
14
+ end
15
+ end
@@ -0,0 +1 @@
1
+ require 'rexml/document'
@@ -0,0 +1,28 @@
1
+ module TreeDiff
2
+ class Unify
3
+ def initialize source
4
+ @source = source
5
+ end
6
+
7
+ def accept target
8
+ target.accept(self)
9
+ end
10
+
11
+ def visit node
12
+ children = (node.children || []).find_all { |c|
13
+ !c.text? && c.respond_to?(:accept)
14
+ }
15
+
16
+ edge = node.parent.children.find_all { |c|
17
+ !c.text? && c.respond_to?(:accept)
18
+ }.index(node)
19
+
20
+ Node.new(
21
+ node.name,
22
+ edge,
23
+ children.map { |child| child.accept(self) },
24
+ [@source]
25
+ )
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,22 @@
1
+ require "test/unit"
2
+ require 'rubygems'
3
+ require "tree_diff"
4
+
5
+ class TestMerge < Test::Unit::TestCase
6
+ def setup
7
+ @html = <<-eohtml
8
+ <html>
9
+ <body>
10
+ <table>
11
+ <td>
12
+ </table>
13
+ </body>
14
+ </html>
15
+ eohtml
16
+ @ndoc = TreeDiff::Unify.new('nokogiri').accept(Nokogiri::HTML(@html).root)
17
+ end
18
+
19
+ def test_to_dot
20
+ assert @ndoc.to_dot
21
+ end
22
+ end
@@ -0,0 +1,58 @@
1
+ require "test/unit"
2
+ require 'rubygems'
3
+ require "tree_diff"
4
+
5
+ class TestMerge < Test::Unit::TestCase
6
+ def setup
7
+ @html = <<-eohtml
8
+ <html>
9
+ <body>
10
+ <table>
11
+ <td>
12
+ </table>
13
+ </body>
14
+ </html>
15
+ eohtml
16
+ @ndoc = TreeDiff::Unify.new('nokogiri').accept(Nokogiri::HTML(@html).root)
17
+ @hdoc = TreeDiff::Unify.new('hpricot').accept(Hpricot(@html).root)
18
+ @narf = HTMLTree::XMLParser.new
19
+ @narf.feed(@html)
20
+ @narf = TreeDiff::Unify.new('narf').accept(@narf.root)
21
+ end
22
+
23
+ def test_all_equal?
24
+ merged = @ndoc.merge(@narf).merge(@hdoc)
25
+ assert merged.all_equal?
26
+ end
27
+
28
+ def test_two_way_merge
29
+ merged = @ndoc.merge(@narf)
30
+
31
+ source_counter = Class.new(Struct.new(:sources)) {
32
+ def visit node
33
+ self.sources += node.source
34
+ node.children.each { |c| c.accept(self) }
35
+ end
36
+ }.new([])
37
+
38
+ merged.accept(source_counter)
39
+ assert source_counter.sources.include?('nokogiri')
40
+ assert source_counter.sources.include?('narf')
41
+ end
42
+
43
+ def test_three_way_merge
44
+ merged = @ndoc.merge(@narf).merge(@hdoc)
45
+
46
+ source_counter = Class.new(Struct.new(:sources)) {
47
+ def visit node
48
+ self.sources += node.source
49
+ node.children.each { |c| c.accept(self) }
50
+ end
51
+ }.new([])
52
+
53
+ merged.accept(source_counter)
54
+ assert source_counter.sources.include?('nokogiri')
55
+ assert source_counter.sources.include?('narf')
56
+ assert source_counter.sources.include?('hpricot')
57
+ end
58
+ end
@@ -0,0 +1,37 @@
1
+ require "test/unit"
2
+ require 'rubygems'
3
+ require "tree_diff"
4
+
5
+ class TestTreeDiff < Test::Unit::TestCase
6
+ def setup
7
+ @html = <<-eohtml
8
+ <html>
9
+ <body>
10
+ <table>
11
+ <td>
12
+ <td>
13
+ </table>
14
+ </body>
15
+ </html>
16
+ eohtml
17
+ @ndoc = Nokogiri::HTML(@html)
18
+ @hdoc = Hpricot(@html)
19
+ @narf = HTMLTree::XMLParser.new
20
+ @narf.feed(@html)
21
+ end
22
+
23
+ def test_unify_nokogiri
24
+ assert_instance_of TreeDiff::Node,
25
+ TreeDiff::Unify.new('nokogiri').accept(@ndoc.root)
26
+ end
27
+
28
+ def test_unify_hpricot
29
+ assert_instance_of TreeDiff::Node,
30
+ TreeDiff::Unify.new('hpricot').accept(@hdoc.root)
31
+ end
32
+
33
+ def test_unify_narf
34
+ assert_instance_of TreeDiff::Node,
35
+ TreeDiff::Unify.new('narf').accept(@narf.root)
36
+ end
37
+ end
@@ -0,0 +1,46 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{tree_diff}
5
+ s.version = "1.0.0.20090329202825"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Aaron Patterson"]
9
+ s.date = %q{2009-03-29}
10
+ s.default_executable = %q{tree_diff}
11
+ s.description = %q{Parse HTML using different parsers, then show the differences between the generated trees.}
12
+ s.email = ["aaronp@rubyforge.org"]
13
+ s.executables = ["tree_diff"]
14
+ s.extra_rdoc_files = ["Manifest.txt", "CHANGELOG.rdoc", "README.rdoc"]
15
+ s.files = [".autotest", "CHANGELOG.rdoc", "Manifest.txt", "README.rdoc", "Rakefile", "bin/tree_diff", "lib/tree_diff.rb", "lib/tree_diff/dot_visitor.rb", "lib/tree_diff/hpricot.rb", "lib/tree_diff/narf.rb", "lib/tree_diff/node.rb", "lib/tree_diff/nokogiri.rb", "lib/tree_diff/rexml.rb", "lib/tree_diff/unify.rb", "test/test_dots.rb", "test/test_merge.rb", "test/test_tree_diff.rb", "tree_diff.gemspec"]
16
+ s.has_rdoc = true
17
+ s.homepage = %q{http://github.com/tenderlove/tree_diff}
18
+ s.rdoc_options = ["--main", "README.rdoc"]
19
+ s.require_paths = ["lib"]
20
+ s.rubyforge_project = %q{tree_diff}
21
+ s.rubygems_version = %q{1.3.1}
22
+ s.summary = %q{Parse HTML using different parsers, then show the differences between the generated trees.}
23
+ s.test_files = ["test/test_dots.rb", "test/test_merge.rb", "test/test_tree_diff.rb"]
24
+
25
+ if s.respond_to? :specification_version then
26
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
27
+ s.specification_version = 2
28
+
29
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
30
+ s.add_runtime_dependency(%q<nokogiri>, [">= 1.2.3"])
31
+ s.add_runtime_dependency(%q<hpricot>, [">= 0"])
32
+ s.add_runtime_dependency(%q<narf>, [">= 0"])
33
+ s.add_development_dependency(%q<hoe>, [">= 1.11.0"])
34
+ else
35
+ s.add_dependency(%q<nokogiri>, [">= 1.2.3"])
36
+ s.add_dependency(%q<hpricot>, [">= 0"])
37
+ s.add_dependency(%q<narf>, [">= 0"])
38
+ s.add_dependency(%q<hoe>, [">= 1.11.0"])
39
+ end
40
+ else
41
+ s.add_dependency(%q<nokogiri>, [">= 1.2.3"])
42
+ s.add_dependency(%q<hpricot>, [">= 0"])
43
+ s.add_dependency(%q<narf>, [">= 0"])
44
+ s.add_dependency(%q<hoe>, [">= 1.11.0"])
45
+ end
46
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tenderlove-tree_diff
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0.20090329202825
5
+ platform: ruby
6
+ authors:
7
+ - Aaron Patterson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-29 00:00:00 -07:00
13
+ default_executable: tree_diff
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.3
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: hpricot
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: narf
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: hoe
47
+ type: :development
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 1.11.0
54
+ version:
55
+ description: Parse HTML using different parsers, then show the differences between the generated trees.
56
+ email:
57
+ - aaronp@rubyforge.org
58
+ executables:
59
+ - tree_diff
60
+ extensions: []
61
+
62
+ extra_rdoc_files:
63
+ - Manifest.txt
64
+ - CHANGELOG.rdoc
65
+ - README.rdoc
66
+ files:
67
+ - .autotest
68
+ - CHANGELOG.rdoc
69
+ - Manifest.txt
70
+ - README.rdoc
71
+ - Rakefile
72
+ - bin/tree_diff
73
+ - lib/tree_diff.rb
74
+ - lib/tree_diff/dot_visitor.rb
75
+ - lib/tree_diff/hpricot.rb
76
+ - lib/tree_diff/narf.rb
77
+ - lib/tree_diff/node.rb
78
+ - lib/tree_diff/nokogiri.rb
79
+ - lib/tree_diff/rexml.rb
80
+ - lib/tree_diff/unify.rb
81
+ - test/test_dots.rb
82
+ - test/test_merge.rb
83
+ - test/test_tree_diff.rb
84
+ - tree_diff.gemspec
85
+ has_rdoc: true
86
+ homepage: http://github.com/tenderlove/tree_diff
87
+ post_install_message:
88
+ rdoc_options:
89
+ - --main
90
+ - README.rdoc
91
+ require_paths:
92
+ - lib
93
+ required_ruby_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: "0"
98
+ version:
99
+ required_rubygems_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: "0"
104
+ version:
105
+ requirements: []
106
+
107
+ rubyforge_project: tree_diff
108
+ rubygems_version: 1.2.0
109
+ signing_key:
110
+ specification_version: 2
111
+ summary: Parse HTML using different parsers, then show the differences between the generated trees.
112
+ test_files:
113
+ - test/test_dots.rb
114
+ - test/test_merge.rb
115
+ - test/test_tree_diff.rb