sitediff 0.0.6 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,41 +2,62 @@
2
2
 
3
3
  class SiteDiff
4
4
  class Sanitizer
5
+ # Regular Expression Object.
5
6
  class Regexp
7
+ ##
8
+ # Creates a RegExp object.
6
9
  def initialize(rule)
7
10
  @rule = rule
8
11
  end
9
12
 
13
+ ##
14
+ # Whether the RegExp has a selector.
10
15
  def selector?
11
16
  false
12
17
  end
13
18
 
19
+ ##
20
+ # Whether the RegExp applies to the given markup.
14
21
  def applies?(html, _node)
15
22
  applies_to_string?(html)
16
23
  end
17
24
 
25
+ ##
26
+ # Applies the RegExp to the markup.
18
27
  def apply(html)
19
28
  gsub!(html)
20
29
  end
21
30
 
31
+ ##
32
+ # Creates a RegExp object as per rule.
22
33
  def self.create(rule)
23
34
  rule['selector'] ? WithSelector.new(rule) : new(rule)
24
35
  end
25
36
 
37
+ ##
38
+ # A RegExp with selector.
26
39
  class WithSelector < Regexp
40
+ ##
41
+ # Whether the RegExp has a selector.
27
42
  def selector?
28
43
  true
29
44
  end
30
45
 
46
+ ##
47
+ # TODO: Document what this method does.
31
48
  def contexts(node)
32
- sels = @rule['selector']
33
- node.css(sels).each { |e| yield(e) }
49
+ selectors = @rule['selector']
50
+ node.css(selectors).each { |e| yield(e) }
34
51
  end
35
52
 
53
+ ##
54
+ # Whether the RegExp applies to the given markup.
36
55
  def applies?(_html, node)
37
56
  enum_for(:contexts, node).any? { |e| applies_to_string?(e.to_html) }
38
57
  end
39
58
 
59
+ ##
60
+ # Applies the RegExp to the markup.
40
61
  def apply(node)
41
62
  contexts(node) { |e| e.replace(gsub!(e.to_html)) }
42
63
  end
@@ -7,10 +7,14 @@ require 'addressable/uri'
7
7
  class SiteDiff
8
8
  class SiteDiffReadFailure < SiteDiffException; end
9
9
 
10
+ # SiteDiff URI Wrapper.
10
11
  class UriWrapper
12
+ # TODO: Move these CURL OPTS to Config.DEFAULT_CONFIG.
11
13
  DEFAULT_CURL_OPTS = {
12
- connecttimeout: 3, # Don't hang on servers that don't exist
13
- followlocation: true, # Follow HTTP redirects (code 301 and 302)
14
+ # Don't hang on servers that don't exist.
15
+ connecttimeout: 3,
16
+ # Follow HTTP redirects (code 301 and 302).
17
+ followlocation: true,
14
18
  headers: {
15
19
  'User-Agent' => 'Sitediff - https://github.com/evolvingweb/sitediff'
16
20
  }
@@ -20,6 +24,8 @@ class SiteDiff
20
24
  class ReadResult
21
25
  attr_accessor :encoding, :content, :error_code, :error
22
26
 
27
+ ##
28
+ # Creates a ReadResult.
23
29
  def initialize(content = nil, encoding = 'utf-8')
24
30
  @content = content
25
31
  @encoding = encoding
@@ -27,14 +33,18 @@ class SiteDiff
27
33
  @error_code = nil
28
34
  end
29
35
 
30
- def self.error(err, code = nil)
36
+ ##
37
+ # Creates a ReadResult with an error.
38
+ def self.error(message, code = nil)
31
39
  res = new
32
40
  res.error_code = code
33
- res.error = err
41
+ res.error = message
34
42
  res
35
43
  end
36
44
  end
37
45
 
46
+ ##
47
+ # Creates a UriWrapper.
38
48
  def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug = true)
39
49
  @uri = uri.respond_to?(:scheme) ? uri : Addressable::URI.parse(uri)
40
50
  # remove trailing '/'s from local URIs
@@ -43,14 +53,20 @@ class SiteDiff
43
53
  @debug = debug
44
54
  end
45
55
 
56
+ ##
57
+ # Returns the "user" part of the URI.
46
58
  def user
47
59
  @uri.user
48
60
  end
49
61
 
62
+ ##
63
+ # Returns the "password" part of the URI.
50
64
  def password
51
65
  @uri.password
52
66
  end
53
67
 
68
+ ##
69
+ # Converts the URI to a string.
54
70
  def to_s
55
71
  uri = @uri.dup
56
72
  uri.user = nil
@@ -58,19 +74,22 @@ class SiteDiff
58
74
  uri.to_s
59
75
  end
60
76
 
77
+ ##
61
78
  # Is this a local filesystem path?
62
79
  def local?
63
80
  @uri.scheme.nil?
64
81
  end
65
82
 
83
+ ## What does this one do?
66
84
  # FIXME: this is not used anymore
67
- def +(path)
85
+ def +(other)
68
86
  # 'path' for SiteDiff includes (parts of) path, query, and fragment.
69
87
  sep = ''
70
88
  sep = '/' if local? || @uri.path.empty?
71
- self.class.new(@uri.to_s + sep + path)
89
+ self.class.new(@uri.to_s + sep + other)
72
90
  end
73
91
 
92
+ ##
74
93
  # Reads a file and yields to the completion handler, see .queue()
75
94
  def read_file
76
95
  File.open(@uri.to_s, 'r:UTF-8') { |f| yield ReadResult.new(f.read) }
@@ -114,25 +133,35 @@ class SiteDiff
114
133
  rescue ArgumentError => e
115
134
  raise if @debug
116
135
 
117
- yield ReadResult.error("Parsing error for #{@uri}: #{e.message}")
118
- rescue => e
136
+ yield ReadResult.error(
137
+ "Parsing error for #{@uri}: #{e.message}"
138
+ )
139
+ rescue StandardError => e
119
140
  raise if @debug
120
141
 
121
- yield ReadResult.error("Unknown parsing error for #{@uri}: #{e.message}")
142
+ yield ReadResult.error(
143
+ "Unknown parsing error for #{@uri}: #{e.message}"
144
+ )
122
145
  end
123
146
  end
124
147
 
125
148
  req.on_failure do |resp|
126
149
  if resp&.status_message
127
150
  msg = resp.status_message
128
- yield ReadResult.error("HTTP error when loading #{@uri}: #{msg}",
129
- resp.response_code)
151
+ yield ReadResult.error(
152
+ "HTTP error when loading #{@uri}: #{msg}",
153
+ resp.response_code
154
+ )
130
155
  elsif (msg = resp.options[:return_code])
131
- yield ReadResult.error("Connection error when loading #{@uri}: #{msg}",
132
- resp.response_code)
156
+ yield ReadResult.error(
157
+ "Connection error when loading #{@uri}: #{msg}",
158
+ resp.response_code
159
+ )
133
160
  else
134
- yield ReadResult.error("Unknown error when loading #{@uri}: #{msg}",
135
- resp.response_code)
161
+ yield ReadResult.error(
162
+ "Unknown error when loading #{@uri}: #{msg}",
163
+ resp.response_code
164
+ )
136
165
  end
137
166
  end
138
167
 
@@ -152,5 +181,17 @@ class SiteDiff
152
181
  hydra.queue(typhoeus_request(&handler))
153
182
  end
154
183
  end
184
+
185
+ ##
186
+ # Canonicalize a path.
187
+ #
188
+ # @param [String] path
189
+ # A base relative path. Example: /foo/bar
190
+ def self.canonicalize(path)
191
+ # Ignore trailing slashes for all paths except "/" (front page).
192
+ path = path.chomp('/') unless path == '/'
193
+ # If the path is empty, assume that it's the front page.
194
+ path.empty? ? '/' : path
195
+ end
155
196
  end
156
197
  end
@@ -3,13 +3,15 @@
3
3
  require 'webrick'
4
4
 
5
5
  class SiteDiff
6
+ # SiteDiff Web Server.
6
7
  class Webserver
7
- # Simple webserver for testing purposes
8
+ # Simple web server for testing purposes.
8
9
  DEFAULT_PORT = 13_080
9
10
 
10
11
  attr_accessor :ports
11
12
 
12
- # Serve a list of directories
13
+ ##
14
+ # Serve a list of directories.
13
15
  def initialize(start_port, dirs, opts = {})
14
16
  start_port ||= DEFAULT_PORT
15
17
  @ports = (start_port...(start_port + dirs.size)).to_a
@@ -25,14 +27,20 @@ class SiteDiff
25
27
  end
26
28
  end
27
29
 
30
+ ##
31
+ # Kills the server.
28
32
  def kill
29
33
  @threads.each(&:kill)
30
34
  end
31
35
 
36
+ ##
37
+ # Waits for the server.
32
38
  def wait
33
39
  @threads.each(&:join)
34
40
  end
35
41
 
42
+ ##
43
+ # Maps URIs to defined ports and returns a list of URIs.
36
44
  def uris
37
45
  ports.map { |p| "http://localhost:#{p}" }
38
46
  end
@@ -63,9 +71,10 @@ class SiteDiff
63
71
 
64
72
  public
65
73
 
74
+ # SiteDiff Fixture Server.
66
75
  class FixtureServer < Webserver
67
76
  PORT = DEFAULT_PORT + 1
68
- BASE = 'spec/fixtures/ruby-doc.org'
77
+ BASE = 'spec/sites/ruby-doc.org'
69
78
  NAMES = %w[core-1.9.3 core-2.0].freeze
70
79
 
71
80
  def initialize(port = PORT, base = BASE, names = NAMES)
@@ -1,18 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'sitediff'
4
+ require 'sitediff/report'
4
5
  require 'sitediff/webserver'
5
6
  require 'erb'
6
7
 
7
8
  class SiteDiff
8
9
  class Webserver
10
+ # SiteDiff Result Server.
9
11
  class ResultServer < Webserver
10
12
  # Display a page from the cache
11
13
  class CacheServlet < WEBrick::HTTPServlet::AbstractServlet
14
+ ##
15
+ # Creates a Cache Servlet.
12
16
  def initialize(_server, cache)
13
17
  @cache = cache
14
18
  end
15
19
 
20
+ ##
21
+ # Performs a GET request.
16
22
  def do_GET(req, res)
17
23
  path = req.path_info
18
24
  (md = %r{^/([^/]+)(/.*)$}.match(path)) ||
@@ -29,13 +35,18 @@ class SiteDiff
29
35
  end
30
36
  end
31
37
 
32
- # Display two pages side by side
38
+ ##
39
+ # Display two pages side by side.
33
40
  class SideBySideServlet < WEBrick::HTTPServlet::AbstractServlet
41
+ ##
42
+ # Creates a Side By Side Servlet.
34
43
  def initialize(_server, cache, settings)
35
44
  @cache = cache
36
45
  @settings = settings
37
46
  end
38
47
 
48
+ ##
49
+ # Generates URLs for a given path.
39
50
  def urls(path)
40
51
  %w[before after].map do |tag|
41
52
  base = @settings[tag]
@@ -44,6 +55,8 @@ class SiteDiff
44
55
  end
45
56
  end
46
57
 
58
+ ##
59
+ # Performs a GET request.
47
60
  def do_GET(req, res)
48
61
  path = req.path_info
49
62
  before, after = *urls(path)
@@ -54,52 +67,29 @@ class SiteDiff
54
67
  end
55
68
  end
56
69
 
57
- # Run sitediff command from browser. Probably dangerous in general.
58
- class RunServlet < WEBrick::HTTPServlet::AbstractServlet
59
- def initialize(_server, dir)
60
- @dir = dir
61
- end
62
-
63
- def do_GET(req, res)
64
- path = req.path_info
65
- if path != '/diff'
66
- res['content-type'] = 'text/plain'
67
- res.body = 'ERROR: Only /run/diff is supported by the /run API at the moment'
68
- return
69
- end
70
- # Thor assumes only one command is called and some values like
71
- # `options` are share across all SiteDiff::Cli instances so
72
- # we can't just call SiteDiff::Cli.new().diff
73
- # This is likely to go very wrong depending on how `sitediff serve`
74
- # was actually called
75
- cmd = "#{$PROGRAM_NAME} diff -C #{@dir} --cached=all"
76
- system(cmd)
77
-
78
- # Could also add a message to indicate success/failure
79
- # But for the moment, all our files are static
80
- res.set_redirect(WEBrick::HTTPStatus::Found,
81
- "/files/#{SiteDiff::REPORT_FILE}")
82
- end
83
- end
84
-
70
+ ##
71
+ # Creates a Result Server.
85
72
  def initialize(port, dir, opts = {})
86
- unless File.exist?(File.join(dir, SiteDiff::SETTINGS_FILE))
73
+ unless File.exist?(File.join(dir, Report::SETTINGS_FILE))
87
74
  raise SiteDiffException,
88
75
  "Please run 'sitediff diff' before running 'sitediff serve'"
89
76
  end
90
77
 
91
- @settings = YAML.load_file(File.join(dir, SiteDiff::SETTINGS_FILE))
78
+ @settings = YAML.load_file(File.join(dir, Report::SETTINGS_FILE))
79
+ puts @settings
92
80
  @cache = opts[:cache]
93
81
  super(port, [dir], opts)
94
82
  end
95
83
 
84
+ ##
85
+ # TODO: Document what this method does.
96
86
  def server(opts)
97
87
  dir = opts.delete(:DocumentRoot)
98
88
  srv = super(opts)
99
89
  srv.mount_proc('/') do |req, res|
100
90
  if req.path == '/'
101
91
  res.set_redirect(WEBrick::HTTPStatus::Found,
102
- "/files/#{SiteDiff::REPORT_FILE}")
92
+ "/files/#{Report::REPORT_FILE_HTML}")
103
93
  else
104
94
  res.set_redirect(WEBrick::HTTPStatus::TemporaryRedirect,
105
95
  "#{@settings['after']}#{req.path}")
@@ -109,10 +99,11 @@ class SiteDiff
109
99
  srv.mount('/files', WEBrick::HTTPServlet::FileHandler, dir, true)
110
100
  srv.mount('/cache', CacheServlet, @cache)
111
101
  srv.mount('/sidebyside', SideBySideServlet, @cache, @settings)
112
- srv.mount('/run', RunServlet, dir)
113
102
  srv
114
103
  end
115
104
 
105
+ ##
106
+ # Sets up the server.
116
107
  def setup
117
108
  super
118
109
  root = uris.first
@@ -120,6 +111,8 @@ class SiteDiff
120
111
  open_in_browser(root) if @opts[:browse]
121
112
  end
122
113
 
114
+ ##
115
+ # Opens a URL in a browser.
123
116
  def open_in_browser(url)
124
117
  commands = %w[xdg-open open]
125
118
  cmd = commands.find { |c| which(c) }
@@ -127,6 +120,8 @@ class SiteDiff
127
120
  cmd
128
121
  end
129
122
 
123
+ ##
124
+ # TODO: Document what this method does.
130
125
  def which(cmd)
131
126
  ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
132
127
  file = File.join(path, cmd)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitediff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Dergachev
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2019-04-02 00:00:00.000000000 Z
13
+ date: 2020-06-08 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: pkg-config
@@ -26,6 +26,20 @@ dependencies:
26
26
  - - "~>"
27
27
  - !ruby/object:Gem::Version
28
28
  version: '1.1'
29
+ - !ruby/object:Gem::Dependency
30
+ name: minitar
31
+ requirement: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - "~>"
34
+ - !ruby/object:Gem::Version
35
+ version: '0.9'
36
+ type: :runtime
37
+ prerelease: false
38
+ version_requirements: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - "~>"
41
+ - !ruby/object:Gem::Version
42
+ version: '0.9'
29
43
  - !ruby/object:Gem::Dependency
30
44
  name: thor
31
45
  requirement: !ruby/object:Gem::Requirement
@@ -72,16 +86,16 @@ dependencies:
72
86
  name: nokogiri
73
87
  requirement: !ruby/object:Gem::Requirement
74
88
  requirements:
75
- - - "~>"
89
+ - - ">="
76
90
  - !ruby/object:Gem::Version
77
- version: 1.8.2
91
+ version: 1.10.4
78
92
  type: :runtime
79
93
  prerelease: false
80
94
  version_requirements: !ruby/object:Gem::Requirement
81
95
  requirements:
82
- - - "~>"
96
+ - - ">="
83
97
  - !ruby/object:Gem::Version
84
- version: 1.8.2
98
+ version: 1.10.4
85
99
  - !ruby/object:Gem::Dependency
86
100
  name: addressable
87
101
  requirement: !ruby/object:Gem::Requirement
@@ -102,14 +116,14 @@ dependencies:
102
116
  requirements:
103
117
  - - "~>"
104
118
  - !ruby/object:Gem::Version
105
- version: 3.2.0
119
+ version: 3.3.0
106
120
  type: :runtime
107
121
  prerelease: false
108
122
  version_requirements: !ruby/object:Gem::Requirement
109
123
  requirements:
110
124
  - - "~>"
111
125
  - !ruby/object:Gem::Version
112
- version: 3.2.0
126
+ version: 3.3.0
113
127
  description: " SiteDiff makes it easy to see differences between two versions of
114
128
  a website. It accepts a set of paths to compare two versions of the site together
115
129
  with potential normalization/sanitization rules. From the provided paths and configuration
@@ -129,28 +143,32 @@ files:
129
143
  - lib/sitediff/cli.rb
130
144
  - lib/sitediff/config.rb
131
145
  - lib/sitediff/config/creator.rb
146
+ - lib/sitediff/config/preset.rb
132
147
  - lib/sitediff/crawler.rb
133
148
  - lib/sitediff/diff.rb
134
149
  - lib/sitediff/exception.rb
135
150
  - lib/sitediff/fetch.rb
136
151
  - lib/sitediff/files/diff.html.erb
137
- - lib/sitediff/files/html_report.html.erb
152
+ - lib/sitediff/files/jquery.min.js
153
+ - lib/sitediff/files/normalize.css
138
154
  - lib/sitediff/files/pretty_print.xsl
139
- - lib/sitediff/files/rules/drupal.yaml
155
+ - lib/sitediff/files/report.html.erb
140
156
  - lib/sitediff/files/sidebyside.html.erb
141
157
  - lib/sitediff/files/sitediff.css
158
+ - lib/sitediff/files/sitediff.js
159
+ - lib/sitediff/report.rb
142
160
  - lib/sitediff/result.rb
143
- - lib/sitediff/rules.rb
144
161
  - lib/sitediff/sanitize.rb
145
162
  - lib/sitediff/sanitize/dom_transform.rb
146
163
  - lib/sitediff/sanitize/regexp.rb
147
164
  - lib/sitediff/uriwrapper.rb
148
165
  - lib/sitediff/webserver.rb
149
166
  - lib/sitediff/webserver/resultserver.rb
150
- homepage: https://github.com/evolvingweb/sitediff/
167
+ homepage: https://sitediff.io/
151
168
  licenses:
152
169
  - GPL-2.0
153
- metadata: {}
170
+ metadata:
171
+ source_code_uri: https://github.com/evolvingweb/sitediff
154
172
  post_install_message:
155
173
  rdoc_options: []
156
174
  require_paths:
@@ -159,15 +177,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
159
177
  requirements:
160
178
  - - ">="
161
179
  - !ruby/object:Gem::Version
162
- version: '2.3'
180
+ version: '2.4'
163
181
  required_rubygems_version: !ruby/object:Gem::Requirement
164
182
  requirements:
165
183
  - - ">="
166
184
  - !ruby/object:Gem::Version
167
185
  version: '0'
168
186
  requirements: []
169
- rubyforge_project:
170
- rubygems_version: 2.5.2.3
187
+ rubygems_version: 3.1.2
171
188
  signing_key:
172
189
  specification_version: 4
173
190
  summary: Compare two versions of a site with ease!