sitediff 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,92 @@
1
+ require 'sitediff/sanitize'
2
+ require 'nokogiri'
3
+
4
+ class SiteDiff
5
+ class Sanitizer
6
+
7
+ # Currently supported transforms:
8
+ #
9
+ # * { :type => "unwrap_root" }
10
+ # * { :type => "unwrap", :selector => "div.field-item" }
11
+ # * { :type => "remove", :selector => "div.extra-stuff" }
12
+ # * { :type => "remove_class", :class => 'class1' }
13
+ class DomTransform
14
+
15
+ Transforms = {}
16
+
17
+ def initialize(rule)
18
+ @rule = rule
19
+ end
20
+
21
+ # Often an array or scalar are both ok values. Turn either into an array.
22
+ def to_array(v)
23
+ return [v].flatten
24
+ end
25
+
26
+ def targets(node)
27
+ selectors = to_array(@rule['selector'])
28
+ selectors.each do |sel|
29
+ node.css(sel).each { |n| yield n }
30
+ end
31
+ end
32
+
33
+ def apply(node)
34
+ targets(node) { |t| process(t) }
35
+ end
36
+
37
+ def self.register(name)
38
+ Transforms[name] = self
39
+ end
40
+
41
+ def self.create(rule)
42
+ type = rule['type'] or
43
+ raise InvalidSanitization, "DOM transform needs a type"
44
+ transform = Transforms[type] or
45
+ raise InvalidSanitization, "No DOM transform named #{type}"
46
+ return transform.new(rule)
47
+ end
48
+
49
+ # Remove elements matching 'selector'
50
+ class Remove < DomTransform
51
+ register "remove"
52
+ def process(node)
53
+ node.remove
54
+ end
55
+ end
56
+
57
+ # Unwrap elements matching 'selector'
58
+ class Unwrap < DomTransform
59
+ register "unwrap"
60
+ def process(node)
61
+ node.add_next_sibling(node.children)
62
+ node.remove
63
+ end
64
+ end
65
+
66
+ # Remove classes from elements matching selector
67
+ class RemoveClass < DomTransform
68
+ register "remove_class"
69
+ def process(node)
70
+ classes = to_array(@rule['class'])
71
+
72
+ # Must call remove_class on a NodeSet!
73
+ ns = Nokogiri::XML::NodeSet.new(node.document, [node])
74
+ classes.each do |class_name|
75
+ ns.remove_class(class_name)
76
+ end
77
+ end
78
+ end
79
+
80
+ # Unwrap the root element
81
+ class UnwrapRoot < DomTransform
82
+ register "unwrap_root"
83
+ def apply(node)
84
+ node.children.size == 1 or
85
+ raise InvalidSanitization, "Multiple root elements in unwrap_root"
86
+ node.children = node.children[0].children
87
+ end
88
+ end
89
+
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,56 @@
1
+ class SiteDiff
2
+ class Sanitizer
3
+ class Regexp
4
+ def initialize(rule)
5
+ @rule = rule
6
+ end
7
+
8
+ def selector?
9
+ false
10
+ end
11
+
12
+ def applies?(html, node)
13
+ applies_to_string?(html)
14
+ end
15
+
16
+ def apply(html)
17
+ gsub!(html)
18
+ end
19
+
20
+ def self.create(rule)
21
+ rule['selector'] ? WithSelector.new(rule) : new(rule)
22
+ end
23
+
24
+ class WithSelector < Regexp
25
+ def selector?
26
+ true
27
+ end
28
+
29
+ def contexts(node)
30
+ sels = @rule['selector']
31
+ node.css(sels).each { |e| yield(e) }
32
+ end
33
+
34
+ def applies?(html, node)
35
+ enum_for(:contexts, node).any? { |e| applies_to_string?(e.to_html) }
36
+ end
37
+
38
+ def apply(node)
39
+ contexts(node) { |e| e.replace(gsub!(e.to_html)) }
40
+ end
41
+ end
42
+
43
+ protected
44
+ def gsub!(str)
45
+ re = ::Regexp.new(@rule['pattern'])
46
+ sub = @rule['substitute'] || ''
47
+ str.gsub!(re, sub)
48
+ str
49
+ end
50
+
51
+ def applies_to_string?(str)
52
+ gsub!(str.dup) != str
53
+ end
54
+ end
55
+ end
56
+ end
@@ -1,19 +1,31 @@
1
+ require 'sitediff/exception'
1
2
  require 'typhoeus'
3
+ require 'addressable/uri'
2
4
 
3
5
  class SiteDiff
4
- class SiteDiffReadFailure < Exception; end
6
+ class SiteDiffReadFailure < SiteDiffException; end
5
7
 
6
8
  class UriWrapper
7
9
  # This lets us treat errors or content as one object
8
- class ReadResult < Struct.new(:content, :error)
9
- def initialize(cont, err = nil)
10
- super(cont, err)
10
+ class ReadResult
11
+ attr_accessor :content, :error_code, :error
12
+
13
+ def initialize(content = nil)
14
+ @content = content
15
+ @error = nil
16
+ @error_code = nil
17
+ end
18
+
19
+ def self.error(err, code = nil)
20
+ res = new
21
+ res.error_code = code
22
+ res.error = err
23
+ return res
11
24
  end
12
- def self.error(err); new(nil, err); end
13
25
  end
14
26
 
15
27
  def initialize(uri)
16
- @uri = uri.respond_to?(:scheme) ? uri : URI.parse(uri)
28
+ @uri = uri.respond_to?(:scheme) ? uri : Addressable::URI.parse(uri)
17
29
  # remove trailing '/'s from local URIs
18
30
  @uri.path.gsub!(/\/*$/, '') if local?
19
31
  end
@@ -95,7 +107,7 @@ class SiteDiff
95
107
  req.on_failure do |resp|
96
108
  msg = 'Unknown Error'
97
109
  msg = resp.status_message if resp and resp.status_message
98
- yield ReadResult.error("HTTP error #{@uri}: #{msg}")
110
+ yield ReadResult.error("HTTP error #{@uri}: #{msg}", resp.response_code)
99
111
  end
100
112
 
101
113
  req
@@ -0,0 +1,82 @@
1
+ require 'webrick'
2
+
3
+ class SiteDiff
4
+ class Webserver
5
+ # Simple webserver for testing purposes
6
+ DEFAULT_PORT = 13080
7
+
8
+ attr_accessor :ports
9
+
10
+ # Serve a list of directories
11
+ def initialize(start_port, dirs, opts = {})
12
+ start_port ||= DEFAULT_PORT
13
+ @ports = (start_port...(start_port + dirs.size)).to_a
14
+ @dirs = dirs
15
+ @opts = opts
16
+
17
+ setup
18
+ start_servers
19
+
20
+ if block_given?
21
+ yield self
22
+ kill
23
+ end
24
+ end
25
+
26
+ def kill
27
+ @threads.each { |t| t.kill }
28
+ end
29
+
30
+ def wait
31
+ @threads.each { |t| t.join }
32
+ end
33
+
34
+ def uris
35
+ ports.map { |p| "http://localhost:#{p}" }
36
+ end
37
+
38
+ protected
39
+ def setup
40
+ @server_opts = {}
41
+ if @opts[:quiet]
42
+ @server_opts[:Logger] = WEBrick::Log.new(IO::NULL)
43
+ @server_opts[:AccessLog] = []
44
+ end
45
+ end
46
+
47
+ def server(opts)
48
+ WEBrick::HTTPServer.new(opts)
49
+ end
50
+
51
+ def start_servers
52
+ @threads = []
53
+ @dirs.each_with_index do |dir, idx|
54
+ @server_opts[:Port] = @ports[idx]
55
+ @server_opts[:DocumentRoot] = dir
56
+ srv = server(@server_opts)
57
+ @threads << Thread.new { srv.start }
58
+ end
59
+ end
60
+
61
+ public
62
+
63
+ class FixtureServer < Webserver
64
+ PORT = DEFAULT_PORT + 1
65
+ BASE = 'spec/fixtures/ruby-doc.org'
66
+ NAMES = %w[core-1.9.3 core-2.0]
67
+
68
+ def initialize(port = PORT, base = BASE, names = NAMES)
69
+ dirs = names.map { |n| File.join(base, n) }
70
+ super(port, dirs, :quiet => true)
71
+ end
72
+
73
+ def before
74
+ uris.first
75
+ end
76
+ def after
77
+ uris.last
78
+ end
79
+ end
80
+
81
+ end
82
+ end
@@ -0,0 +1,98 @@
1
+ require 'sitediff'
2
+ require 'sitediff/webserver'
3
+ require 'erb'
4
+
5
+ class SiteDiff
6
+ class Webserver
7
+ class ResultServer < Webserver
8
+ # Display a page from the cache
9
+ class CacheServlet < WEBrick::HTTPServlet::AbstractServlet
10
+ def initialize(server, cache)
11
+ @cache = cache
12
+ end
13
+
14
+ def do_GET(req, res)
15
+ path = req.path_info
16
+ md = %r[^/([^/]+)(/.*)$].match(path) or
17
+ raise WEBrick::HTTPStatus::NotFound
18
+ tag, path = *md.captures
19
+ r = @cache.get(tag.to_sym, path) or
20
+ raise WEBrick::HTTPStatus::NotFound
21
+
22
+ raise WEBrick::HTTPStatus[r.error_code] if r.error_code
23
+ raise WEBrick::HTTPStatus::InternalServerError, r.error if r.error
24
+
25
+ res['content-type'] = 'text/html'
26
+ res.body = r.content
27
+ end
28
+ end
29
+
30
+ # Display two pages side by side
31
+ class SideBySideServlet < WEBrick::HTTPServlet::AbstractServlet
32
+ def initialize(server, cache, settings)
33
+ @cache = cache
34
+ @settings = settings
35
+ end
36
+
37
+ def urls(path)
38
+ %w[before after].map do |tag|
39
+ base = @settings[tag]
40
+ base = "/cache/#{tag}" if @settings['cached'].include? tag
41
+ base + path
42
+ end
43
+ end
44
+
45
+ def do_GET(req, res)
46
+ path = req.path_info
47
+ before, after = *urls(path)
48
+
49
+ res['content-type'] = 'text/html'
50
+ erb = File.join(SiteDiff::FILES_DIR, 'sidebyside.html.erb')
51
+ res.body = ERB.new(File.read(erb)).result(binding)
52
+ end
53
+ end
54
+
55
+ def initialize(port, dir, opts = {})
56
+ @settings = YAML.load_file(File.join(dir, SiteDiff::SETTINGS_FILE))
57
+ @cache = opts[:cache]
58
+ super(port, [dir], opts)
59
+ end
60
+
61
+ def server(opts)
62
+ dir = opts.delete(:DocumentRoot)
63
+ srv = super(opts)
64
+ srv.mount_proc('/') do |req, res|
65
+ res.set_redirect(WEBrick::HTTPStatus::Found,
66
+ "/files/#{SiteDiff::REPORT_FILE}")
67
+ end
68
+
69
+ srv.mount('/files', WEBrick::HTTPServlet::FileHandler, dir, true)
70
+ srv.mount('/cache', CacheServlet, @cache)
71
+ srv.mount('/sidebyside', SideBySideServlet, @cache, @settings)
72
+ return srv
73
+ end
74
+
75
+ def setup
76
+ super
77
+ root = uris.first
78
+ puts "Serving at #{root}"
79
+ open_in_browser(root) if @opts[:browse]
80
+ end
81
+
82
+ def open_in_browser(url)
83
+ commands = %w[xdg-open open]
84
+ cmd = commands.find { |c| which(c) }
85
+ system(cmd, url) if cmd
86
+ return cmd
87
+ end
88
+
89
+ def which(cmd)
90
+ ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
91
+ file = File.join(path, cmd)
92
+ return file if File.executable?(file)
93
+ end
94
+ return nil
95
+ end
96
+ end
97
+ end
98
+ end
metadata CHANGED
@@ -1,7 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitediff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Alex Dergachev
@@ -10,122 +11,166 @@ authors:
10
11
  autorequire:
11
12
  bindir: bin
12
13
  cert_chain: []
13
- date: 2015-04-21 00:00:00.000000000 Z
14
+ date: 2015-05-05 00:00:00.000000000 Z
14
15
  dependencies:
15
16
  - !ruby/object:Gem::Dependency
16
17
  name: thor
17
18
  requirement: !ruby/object:Gem::Requirement
19
+ none: false
18
20
  requirements:
19
- - - '>='
21
+ - - ! '>='
20
22
  - !ruby/object:Gem::Version
21
23
  version: '0'
22
24
  type: :runtime
23
25
  prerelease: false
24
26
  version_requirements: !ruby/object:Gem::Requirement
27
+ none: false
25
28
  requirements:
26
- - - '>='
29
+ - - ! '>='
27
30
  - !ruby/object:Gem::Version
28
31
  version: '0'
29
32
  - !ruby/object:Gem::Dependency
30
33
  name: nokogiri
31
34
  requirement: !ruby/object:Gem::Requirement
35
+ none: false
32
36
  requirements:
33
- - - '>='
37
+ - - ! '>='
34
38
  - !ruby/object:Gem::Version
35
39
  version: '0'
36
40
  type: :runtime
37
41
  prerelease: false
38
42
  version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
39
44
  requirements:
40
- - - '>='
45
+ - - ! '>='
41
46
  - !ruby/object:Gem::Version
42
47
  version: '0'
43
48
  - !ruby/object:Gem::Dependency
44
49
  name: diffy
45
50
  requirement: !ruby/object:Gem::Requirement
51
+ none: false
46
52
  requirements:
47
- - - '>='
53
+ - - ! '>='
48
54
  - !ruby/object:Gem::Version
49
55
  version: '0'
50
56
  type: :runtime
51
57
  prerelease: false
52
58
  version_requirements: !ruby/object:Gem::Requirement
59
+ none: false
53
60
  requirements:
54
- - - '>='
61
+ - - ! '>='
55
62
  - !ruby/object:Gem::Version
56
63
  version: '0'
57
64
  - !ruby/object:Gem::Dependency
58
65
  name: typhoeus
59
66
  requirement: !ruby/object:Gem::Requirement
67
+ none: false
60
68
  requirements:
61
- - - '>='
69
+ - - ! '>='
62
70
  - !ruby/object:Gem::Version
63
71
  version: '0'
64
72
  type: :runtime
65
73
  prerelease: false
66
74
  version_requirements: !ruby/object:Gem::Requirement
75
+ none: false
67
76
  requirements:
68
- - - '>='
77
+ - - ! '>='
69
78
  - !ruby/object:Gem::Version
70
79
  version: '0'
71
80
  - !ruby/object:Gem::Dependency
72
81
  name: rainbow
73
82
  requirement: !ruby/object:Gem::Requirement
83
+ none: false
74
84
  requirements:
75
- - - '>='
85
+ - - ! '>='
76
86
  - !ruby/object:Gem::Version
77
87
  version: '0'
78
88
  type: :runtime
79
89
  prerelease: false
80
90
  version_requirements: !ruby/object:Gem::Requirement
91
+ none: false
81
92
  requirements:
82
- - - '>='
93
+ - - ! '>='
83
94
  - !ruby/object:Gem::Version
84
95
  version: '0'
85
- description: |
86
- SiteDiff makes it easy to see differences between two versions of a website. It accepts a set of paths to compare two versions of the site together with potential normalization/sanitization rules. From the provided paths and configuration SiteDiff generates an HTML report of all the status of HTML comparison between the given paths together with a readable diff-like HTML for each specified path containing the differences between the two versions of the site. It is useful tool for QAing re-deployments, site upgrades, etc.
96
+ - !ruby/object:Gem::Dependency
97
+ name: addressable
98
+ requirement: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ! '>='
110
+ - !ruby/object:Gem::Version
111
+ version: '0'
112
+ description: ! 'SiteDiff makes it easy to see differences between two versions of
113
+ a website. It accepts a set of paths to compare two versions of the site together
114
+ with potential normalization/sanitization rules. From the provided paths and configuration
115
+ SiteDiff generates an HTML report of all the status of HTML comparison between the
116
+ given paths together with a readable diff-like HTML for each specified path containing
117
+ the differences between the two versions of the site. It is useful tool for QAing
118
+ re-deployments, site upgrades, etc.
119
+
120
+ '
87
121
  email: alex@evolvingweb.ca
88
122
  executables:
89
123
  - sitediff
90
124
  extensions: []
91
125
  extra_rdoc_files: []
92
126
  files:
127
+ - lib/sitediff.rb
128
+ - lib/sitediff/crawler.rb
129
+ - lib/sitediff/sanitize/dom_transform.rb
130
+ - lib/sitediff/sanitize/regexp.rb
131
+ - lib/sitediff/config/creator.rb
93
132
  - lib/sitediff/cli.rb
133
+ - lib/sitediff/fetch.rb
134
+ - lib/sitediff/exception.rb
135
+ - lib/sitediff/cache.rb
94
136
  - lib/sitediff/config.rb
95
- - lib/sitediff/diff.rb
137
+ - lib/sitediff/uriwrapper.rb
96
138
  - lib/sitediff/result.rb
97
139
  - lib/sitediff/sanitize.rb
98
- - lib/sitediff/uriwrapper.rb
99
- - lib/sitediff/util/cache.rb
100
- - lib/sitediff/util/webserver.rb
101
- - lib/sitediff.rb
140
+ - lib/sitediff/webserver/resultserver.rb
141
+ - lib/sitediff/rules.rb
142
+ - lib/sitediff/webserver.rb
143
+ - lib/sitediff/diff.rb
102
144
  - lib/sitediff/files/diff.html.erb
145
+ - lib/sitediff/files/sidebyside.html.erb
146
+ - lib/sitediff/files/sitediff.css
103
147
  - lib/sitediff/files/html_report.html.erb
104
148
  - lib/sitediff/files/pretty_print.xsl
105
- - lib/sitediff/files/sitediff.css
149
+ - lib/sitediff/files/rules/drupal.yaml
106
150
  - bin/sitediff
107
151
  homepage: https://github.com/evolvingweb/sitediff/
108
152
  licenses:
109
153
  - GPL-2
110
- metadata: {}
111
154
  post_install_message:
112
155
  rdoc_options: []
113
156
  require_paths:
114
157
  - lib
115
158
  required_ruby_version: !ruby/object:Gem::Requirement
159
+ none: false
116
160
  requirements:
117
- - - '>='
161
+ - - ! '>='
118
162
  - !ruby/object:Gem::Version
119
163
  version: 1.9.3
120
164
  required_rubygems_version: !ruby/object:Gem::Requirement
165
+ none: false
121
166
  requirements:
122
- - - '>='
167
+ - - ! '>='
123
168
  - !ruby/object:Gem::Version
124
169
  version: '0'
125
170
  requirements: []
126
171
  rubyforge_project:
127
- rubygems_version: 2.0.14
172
+ rubygems_version: 1.8.23
128
173
  signing_key:
129
- specification_version: 4
174
+ specification_version: 3
130
175
  summary: Compare two versions of a site with ease!
131
176
  test_files: []