sitediff 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,92 @@
1
+ require 'sitediff/sanitize'
2
+ require 'nokogiri'
3
+
4
+ class SiteDiff
5
+ class Sanitizer
6
+
7
+ # Currently supported transforms:
8
+ #
9
+ # * { :type => "unwrap_root" }
10
+ # * { :type => "unwrap", :selector => "div.field-item" }
11
+ # * { :type => "remove", :selector => "div.extra-stuff" }
12
+ # * { :type => "remove_class", :class => 'class1' }
13
+ class DomTransform
14
+
15
+ Transforms = {}
16
+
17
+ def initialize(rule)
18
+ @rule = rule
19
+ end
20
+
21
+ # Often an array or scalar are both ok values. Turn either into an array.
22
+ def to_array(v)
23
+ return [v].flatten
24
+ end
25
+
26
+ def targets(node)
27
+ selectors = to_array(@rule['selector'])
28
+ selectors.each do |sel|
29
+ node.css(sel).each { |n| yield n }
30
+ end
31
+ end
32
+
33
+ def apply(node)
34
+ targets(node) { |t| process(t) }
35
+ end
36
+
37
+ def self.register(name)
38
+ Transforms[name] = self
39
+ end
40
+
41
+ def self.create(rule)
42
+ type = rule['type'] or
43
+ raise InvalidSanitization, "DOM transform needs a type"
44
+ transform = Transforms[type] or
45
+ raise InvalidSanitization, "No DOM transform named #{type}"
46
+ return transform.new(rule)
47
+ end
48
+
49
+ # Remove elements matching 'selector'
50
+ class Remove < DomTransform
51
+ register "remove"
52
+ def process(node)
53
+ node.remove
54
+ end
55
+ end
56
+
57
+ # Unwrap elements matching 'selector'
58
+ class Unwrap < DomTransform
59
+ register "unwrap"
60
+ def process(node)
61
+ node.add_next_sibling(node.children)
62
+ node.remove
63
+ end
64
+ end
65
+
66
+ # Remove classes from elements matching selector
67
+ class RemoveClass < DomTransform
68
+ register "remove_class"
69
+ def process(node)
70
+ classes = to_array(@rule['class'])
71
+
72
+ # Must call remove_class on a NodeSet!
73
+ ns = Nokogiri::XML::NodeSet.new(node.document, [node])
74
+ classes.each do |class_name|
75
+ ns.remove_class(class_name)
76
+ end
77
+ end
78
+ end
79
+
80
+ # Unwrap the root element
81
+ class UnwrapRoot < DomTransform
82
+ register "unwrap_root"
83
+ def apply(node)
84
+ node.children.size == 1 or
85
+ raise InvalidSanitization, "Multiple root elements in unwrap_root"
86
+ node.children = node.children[0].children
87
+ end
88
+ end
89
+
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,56 @@
1
+ class SiteDiff
2
+ class Sanitizer
3
+ class Regexp
4
+ def initialize(rule)
5
+ @rule = rule
6
+ end
7
+
8
+ def selector?
9
+ false
10
+ end
11
+
12
+ def applies?(html, node)
13
+ applies_to_string?(html)
14
+ end
15
+
16
+ def apply(html)
17
+ gsub!(html)
18
+ end
19
+
20
+ def self.create(rule)
21
+ rule['selector'] ? WithSelector.new(rule) : new(rule)
22
+ end
23
+
24
+ class WithSelector < Regexp
25
+ def selector?
26
+ true
27
+ end
28
+
29
+ def contexts(node)
30
+ sels = @rule['selector']
31
+ node.css(sels).each { |e| yield(e) }
32
+ end
33
+
34
+ def applies?(html, node)
35
+ enum_for(:contexts, node).any? { |e| applies_to_string?(e.to_html) }
36
+ end
37
+
38
+ def apply(node)
39
+ contexts(node) { |e| e.replace(gsub!(e.to_html)) }
40
+ end
41
+ end
42
+
43
+ protected
44
+ def gsub!(str)
45
+ re = ::Regexp.new(@rule['pattern'])
46
+ sub = @rule['substitute'] || ''
47
+ str.gsub!(re, sub)
48
+ str
49
+ end
50
+
51
+ def applies_to_string?(str)
52
+ gsub!(str.dup) != str
53
+ end
54
+ end
55
+ end
56
+ end
@@ -1,19 +1,31 @@
1
+ require 'sitediff/exception'
1
2
  require 'typhoeus'
3
+ require 'addressable/uri'
2
4
 
3
5
  class SiteDiff
4
- class SiteDiffReadFailure < Exception; end
6
+ class SiteDiffReadFailure < SiteDiffException; end
5
7
 
6
8
  class UriWrapper
7
9
  # This lets us treat errors or content as one object
8
- class ReadResult < Struct.new(:content, :error)
9
- def initialize(cont, err = nil)
10
- super(cont, err)
10
+ class ReadResult
11
+ attr_accessor :content, :error_code, :error
12
+
13
+ def initialize(content = nil)
14
+ @content = content
15
+ @error = nil
16
+ @error_code = nil
17
+ end
18
+
19
+ def self.error(err, code = nil)
20
+ res = new
21
+ res.error_code = code
22
+ res.error = err
23
+ return res
11
24
  end
12
- def self.error(err); new(nil, err); end
13
25
  end
14
26
 
15
27
  def initialize(uri)
16
- @uri = uri.respond_to?(:scheme) ? uri : URI.parse(uri)
28
+ @uri = uri.respond_to?(:scheme) ? uri : Addressable::URI.parse(uri)
17
29
  # remove trailing '/'s from local URIs
18
30
  @uri.path.gsub!(/\/*$/, '') if local?
19
31
  end
@@ -95,7 +107,7 @@ class SiteDiff
95
107
  req.on_failure do |resp|
96
108
  msg = 'Unknown Error'
97
109
  msg = resp.status_message if resp and resp.status_message
98
- yield ReadResult.error("HTTP error #{@uri}: #{msg}")
110
+ yield ReadResult.error("HTTP error #{@uri}: #{msg}", resp.response_code)
99
111
  end
100
112
 
101
113
  req
@@ -0,0 +1,82 @@
1
+ require 'webrick'
2
+
3
+ class SiteDiff
4
+ class Webserver
5
+ # Simple webserver for testing purposes
6
+ DEFAULT_PORT = 13080
7
+
8
+ attr_accessor :ports
9
+
10
+ # Serve a list of directories
11
+ def initialize(start_port, dirs, opts = {})
12
+ start_port ||= DEFAULT_PORT
13
+ @ports = (start_port...(start_port + dirs.size)).to_a
14
+ @dirs = dirs
15
+ @opts = opts
16
+
17
+ setup
18
+ start_servers
19
+
20
+ if block_given?
21
+ yield self
22
+ kill
23
+ end
24
+ end
25
+
26
+ def kill
27
+ @threads.each { |t| t.kill }
28
+ end
29
+
30
+ def wait
31
+ @threads.each { |t| t.join }
32
+ end
33
+
34
+ def uris
35
+ ports.map { |p| "http://localhost:#{p}" }
36
+ end
37
+
38
+ protected
39
+ def setup
40
+ @server_opts = {}
41
+ if @opts[:quiet]
42
+ @server_opts[:Logger] = WEBrick::Log.new(IO::NULL)
43
+ @server_opts[:AccessLog] = []
44
+ end
45
+ end
46
+
47
+ def server(opts)
48
+ WEBrick::HTTPServer.new(opts)
49
+ end
50
+
51
+ def start_servers
52
+ @threads = []
53
+ @dirs.each_with_index do |dir, idx|
54
+ @server_opts[:Port] = @ports[idx]
55
+ @server_opts[:DocumentRoot] = dir
56
+ srv = server(@server_opts)
57
+ @threads << Thread.new { srv.start }
58
+ end
59
+ end
60
+
61
+ public
62
+
63
+ class FixtureServer < Webserver
64
+ PORT = DEFAULT_PORT + 1
65
+ BASE = 'spec/fixtures/ruby-doc.org'
66
+ NAMES = %w[core-1.9.3 core-2.0]
67
+
68
+ def initialize(port = PORT, base = BASE, names = NAMES)
69
+ dirs = names.map { |n| File.join(base, n) }
70
+ super(port, dirs, :quiet => true)
71
+ end
72
+
73
+ def before
74
+ uris.first
75
+ end
76
+ def after
77
+ uris.last
78
+ end
79
+ end
80
+
81
+ end
82
+ end
@@ -0,0 +1,98 @@
1
+ require 'sitediff'
2
+ require 'sitediff/webserver'
3
+ require 'erb'
4
+
5
+ class SiteDiff
6
+ class Webserver
7
+ class ResultServer < Webserver
8
+ # Display a page from the cache
9
+ class CacheServlet < WEBrick::HTTPServlet::AbstractServlet
10
+ def initialize(server, cache)
11
+ @cache = cache
12
+ end
13
+
14
+ def do_GET(req, res)
15
+ path = req.path_info
16
+ md = %r[^/([^/]+)(/.*)$].match(path) or
17
+ raise WEBrick::HTTPStatus::NotFound
18
+ tag, path = *md.captures
19
+ r = @cache.get(tag.to_sym, path) or
20
+ raise WEBrick::HTTPStatus::NotFound
21
+
22
+ raise WEBrick::HTTPStatus[r.error_code] if r.error_code
23
+ raise WEBrick::HTTPStatus::InternalServerError, r.error if r.error
24
+
25
+ res['content-type'] = 'text/html'
26
+ res.body = r.content
27
+ end
28
+ end
29
+
30
+ # Display two pages side by side
31
+ class SideBySideServlet < WEBrick::HTTPServlet::AbstractServlet
32
+ def initialize(server, cache, settings)
33
+ @cache = cache
34
+ @settings = settings
35
+ end
36
+
37
+ def urls(path)
38
+ %w[before after].map do |tag|
39
+ base = @settings[tag]
40
+ base = "/cache/#{tag}" if @settings['cached'].include? tag
41
+ base + path
42
+ end
43
+ end
44
+
45
+ def do_GET(req, res)
46
+ path = req.path_info
47
+ before, after = *urls(path)
48
+
49
+ res['content-type'] = 'text/html'
50
+ erb = File.join(SiteDiff::FILES_DIR, 'sidebyside.html.erb')
51
+ res.body = ERB.new(File.read(erb)).result(binding)
52
+ end
53
+ end
54
+
55
+ def initialize(port, dir, opts = {})
56
+ @settings = YAML.load_file(File.join(dir, SiteDiff::SETTINGS_FILE))
57
+ @cache = opts[:cache]
58
+ super(port, [dir], opts)
59
+ end
60
+
61
+ def server(opts)
62
+ dir = opts.delete(:DocumentRoot)
63
+ srv = super(opts)
64
+ srv.mount_proc('/') do |req, res|
65
+ res.set_redirect(WEBrick::HTTPStatus::Found,
66
+ "/files/#{SiteDiff::REPORT_FILE}")
67
+ end
68
+
69
+ srv.mount('/files', WEBrick::HTTPServlet::FileHandler, dir, true)
70
+ srv.mount('/cache', CacheServlet, @cache)
71
+ srv.mount('/sidebyside', SideBySideServlet, @cache, @settings)
72
+ return srv
73
+ end
74
+
75
+ def setup
76
+ super
77
+ root = uris.first
78
+ puts "Serving at #{root}"
79
+ open_in_browser(root) if @opts[:browse]
80
+ end
81
+
82
+ def open_in_browser(url)
83
+ commands = %w[xdg-open open]
84
+ cmd = commands.find { |c| which(c) }
85
+ system(cmd, url) if cmd
86
+ return cmd
87
+ end
88
+
89
+ def which(cmd)
90
+ ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
91
+ file = File.join(path, cmd)
92
+ return file if File.executable?(file)
93
+ end
94
+ return nil
95
+ end
96
+ end
97
+ end
98
+ end
metadata CHANGED
@@ -1,7 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitediff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Alex Dergachev
@@ -10,122 +11,166 @@ authors:
10
11
  autorequire:
11
12
  bindir: bin
12
13
  cert_chain: []
13
- date: 2015-04-21 00:00:00.000000000 Z
14
+ date: 2015-05-05 00:00:00.000000000 Z
14
15
  dependencies:
15
16
  - !ruby/object:Gem::Dependency
16
17
  name: thor
17
18
  requirement: !ruby/object:Gem::Requirement
19
+ none: false
18
20
  requirements:
19
- - - '>='
21
+ - - ! '>='
20
22
  - !ruby/object:Gem::Version
21
23
  version: '0'
22
24
  type: :runtime
23
25
  prerelease: false
24
26
  version_requirements: !ruby/object:Gem::Requirement
27
+ none: false
25
28
  requirements:
26
- - - '>='
29
+ - - ! '>='
27
30
  - !ruby/object:Gem::Version
28
31
  version: '0'
29
32
  - !ruby/object:Gem::Dependency
30
33
  name: nokogiri
31
34
  requirement: !ruby/object:Gem::Requirement
35
+ none: false
32
36
  requirements:
33
- - - '>='
37
+ - - ! '>='
34
38
  - !ruby/object:Gem::Version
35
39
  version: '0'
36
40
  type: :runtime
37
41
  prerelease: false
38
42
  version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
39
44
  requirements:
40
- - - '>='
45
+ - - ! '>='
41
46
  - !ruby/object:Gem::Version
42
47
  version: '0'
43
48
  - !ruby/object:Gem::Dependency
44
49
  name: diffy
45
50
  requirement: !ruby/object:Gem::Requirement
51
+ none: false
46
52
  requirements:
47
- - - '>='
53
+ - - ! '>='
48
54
  - !ruby/object:Gem::Version
49
55
  version: '0'
50
56
  type: :runtime
51
57
  prerelease: false
52
58
  version_requirements: !ruby/object:Gem::Requirement
59
+ none: false
53
60
  requirements:
54
- - - '>='
61
+ - - ! '>='
55
62
  - !ruby/object:Gem::Version
56
63
  version: '0'
57
64
  - !ruby/object:Gem::Dependency
58
65
  name: typhoeus
59
66
  requirement: !ruby/object:Gem::Requirement
67
+ none: false
60
68
  requirements:
61
- - - '>='
69
+ - - ! '>='
62
70
  - !ruby/object:Gem::Version
63
71
  version: '0'
64
72
  type: :runtime
65
73
  prerelease: false
66
74
  version_requirements: !ruby/object:Gem::Requirement
75
+ none: false
67
76
  requirements:
68
- - - '>='
77
+ - - ! '>='
69
78
  - !ruby/object:Gem::Version
70
79
  version: '0'
71
80
  - !ruby/object:Gem::Dependency
72
81
  name: rainbow
73
82
  requirement: !ruby/object:Gem::Requirement
83
+ none: false
74
84
  requirements:
75
- - - '>='
85
+ - - ! '>='
76
86
  - !ruby/object:Gem::Version
77
87
  version: '0'
78
88
  type: :runtime
79
89
  prerelease: false
80
90
  version_requirements: !ruby/object:Gem::Requirement
91
+ none: false
81
92
  requirements:
82
- - - '>='
93
+ - - ! '>='
83
94
  - !ruby/object:Gem::Version
84
95
  version: '0'
85
- description: |
86
- SiteDiff makes it easy to see differences between two versions of a website. It accepts a set of paths to compare two versions of the site together with potential normalization/sanitization rules. From the provided paths and configuration SiteDiff generates an HTML report of all the status of HTML comparison between the given paths together with a readable diff-like HTML for each specified path containing the differences between the two versions of the site. It is useful tool for QAing re-deployments, site upgrades, etc.
96
+ - !ruby/object:Gem::Dependency
97
+ name: addressable
98
+ requirement: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ! '>='
110
+ - !ruby/object:Gem::Version
111
+ version: '0'
112
+ description: ! 'SiteDiff makes it easy to see differences between two versions of
113
+ a website. It accepts a set of paths to compare two versions of the site together
114
+ with potential normalization/sanitization rules. From the provided paths and configuration
115
+ SiteDiff generates an HTML report of all the status of HTML comparison between the
116
+ given paths together with a readable diff-like HTML for each specified path containing
117
+ the differences between the two versions of the site. It is useful tool for QAing
118
+ re-deployments, site upgrades, etc.
119
+
120
+ '
87
121
  email: alex@evolvingweb.ca
88
122
  executables:
89
123
  - sitediff
90
124
  extensions: []
91
125
  extra_rdoc_files: []
92
126
  files:
127
+ - lib/sitediff.rb
128
+ - lib/sitediff/crawler.rb
129
+ - lib/sitediff/sanitize/dom_transform.rb
130
+ - lib/sitediff/sanitize/regexp.rb
131
+ - lib/sitediff/config/creator.rb
93
132
  - lib/sitediff/cli.rb
133
+ - lib/sitediff/fetch.rb
134
+ - lib/sitediff/exception.rb
135
+ - lib/sitediff/cache.rb
94
136
  - lib/sitediff/config.rb
95
- - lib/sitediff/diff.rb
137
+ - lib/sitediff/uriwrapper.rb
96
138
  - lib/sitediff/result.rb
97
139
  - lib/sitediff/sanitize.rb
98
- - lib/sitediff/uriwrapper.rb
99
- - lib/sitediff/util/cache.rb
100
- - lib/sitediff/util/webserver.rb
101
- - lib/sitediff.rb
140
+ - lib/sitediff/webserver/resultserver.rb
141
+ - lib/sitediff/rules.rb
142
+ - lib/sitediff/webserver.rb
143
+ - lib/sitediff/diff.rb
102
144
  - lib/sitediff/files/diff.html.erb
145
+ - lib/sitediff/files/sidebyside.html.erb
146
+ - lib/sitediff/files/sitediff.css
103
147
  - lib/sitediff/files/html_report.html.erb
104
148
  - lib/sitediff/files/pretty_print.xsl
105
- - lib/sitediff/files/sitediff.css
149
+ - lib/sitediff/files/rules/drupal.yaml
106
150
  - bin/sitediff
107
151
  homepage: https://github.com/evolvingweb/sitediff/
108
152
  licenses:
109
153
  - GPL-2
110
- metadata: {}
111
154
  post_install_message:
112
155
  rdoc_options: []
113
156
  require_paths:
114
157
  - lib
115
158
  required_ruby_version: !ruby/object:Gem::Requirement
159
+ none: false
116
160
  requirements:
117
- - - '>='
161
+ - - ! '>='
118
162
  - !ruby/object:Gem::Version
119
163
  version: 1.9.3
120
164
  required_rubygems_version: !ruby/object:Gem::Requirement
165
+ none: false
121
166
  requirements:
122
- - - '>='
167
+ - - ! '>='
123
168
  - !ruby/object:Gem::Version
124
169
  version: '0'
125
170
  requirements: []
126
171
  rubyforge_project:
127
- rubygems_version: 2.0.14
172
+ rubygems_version: 1.8.23
128
173
  signing_key:
129
- specification_version: 4
174
+ specification_version: 3
130
175
  summary: Compare two versions of a site with ease!
131
176
  test_files: []