sitediff 0.0.6 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,41 +2,62 @@
2
2
 
3
3
  class SiteDiff
4
4
  class Sanitizer
5
+ # Regular Expression Object.
5
6
  class Regexp
7
+ ##
8
+ # Creates a RegExp object.
6
9
  def initialize(rule)
7
10
  @rule = rule
8
11
  end
9
12
 
13
+ ##
14
+ # Whether the RegExp has a selector.
10
15
  def selector?
11
16
  false
12
17
  end
13
18
 
19
+ ##
20
+ # Whether the RegExp applies to the given markup.
14
21
  def applies?(html, _node)
15
22
  applies_to_string?(html)
16
23
  end
17
24
 
25
+ ##
26
+ # Applies the RegExp to the markup.
18
27
  def apply(html)
19
28
  gsub!(html)
20
29
  end
21
30
 
31
+ ##
32
+ # Creates a RegExp object as per rule.
22
33
  def self.create(rule)
23
34
  rule['selector'] ? WithSelector.new(rule) : new(rule)
24
35
  end
25
36
 
37
+ ##
38
+ # A RegExp with selector.
26
39
  class WithSelector < Regexp
40
+ ##
41
+ # Whether the RegExp has a selector.
27
42
  def selector?
28
43
  true
29
44
  end
30
45
 
46
+ ##
47
+ # TODO: Document what this method does.
31
48
  def contexts(node)
32
- sels = @rule['selector']
33
- node.css(sels).each { |e| yield(e) }
49
+ selectors = @rule['selector']
50
+ node.css(selectors).each { |e| yield(e) }
34
51
  end
35
52
 
53
+ ##
54
+ # Whether the RegExp applies to the given markup.
36
55
  def applies?(_html, node)
37
56
  enum_for(:contexts, node).any? { |e| applies_to_string?(e.to_html) }
38
57
  end
39
58
 
59
+ ##
60
+ # Applies the RegExp to the markup.
40
61
  def apply(node)
41
62
  contexts(node) { |e| e.replace(gsub!(e.to_html)) }
42
63
  end
@@ -7,10 +7,14 @@ require 'addressable/uri'
7
7
  class SiteDiff
8
8
  class SiteDiffReadFailure < SiteDiffException; end
9
9
 
10
+ # SiteDiff URI Wrapper.
10
11
  class UriWrapper
12
+ # TODO: Move these CURL OPTS to Config.DEFAULT_CONFIG.
11
13
  DEFAULT_CURL_OPTS = {
12
- connecttimeout: 3, # Don't hang on servers that don't exist
13
- followlocation: true, # Follow HTTP redirects (code 301 and 302)
14
+ # Don't hang on servers that don't exist.
15
+ connecttimeout: 3,
16
+ # Follow HTTP redirects (code 301 and 302).
17
+ followlocation: true,
14
18
  headers: {
15
19
  'User-Agent' => 'Sitediff - https://github.com/evolvingweb/sitediff'
16
20
  }
@@ -20,6 +24,8 @@ class SiteDiff
20
24
  class ReadResult
21
25
  attr_accessor :encoding, :content, :error_code, :error
22
26
 
27
+ ##
28
+ # Creates a ReadResult.
23
29
  def initialize(content = nil, encoding = 'utf-8')
24
30
  @content = content
25
31
  @encoding = encoding
@@ -27,14 +33,18 @@ class SiteDiff
27
33
  @error_code = nil
28
34
  end
29
35
 
30
- def self.error(err, code = nil)
36
+ ##
37
+ # Creates a ReadResult with an error.
38
+ def self.error(message, code = nil)
31
39
  res = new
32
40
  res.error_code = code
33
- res.error = err
41
+ res.error = message
34
42
  res
35
43
  end
36
44
  end
37
45
 
46
+ ##
47
+ # Creates a UriWrapper.
38
48
  def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug = true)
39
49
  @uri = uri.respond_to?(:scheme) ? uri : Addressable::URI.parse(uri)
40
50
  # remove trailing '/'s from local URIs
@@ -43,14 +53,20 @@ class SiteDiff
43
53
  @debug = debug
44
54
  end
45
55
 
56
+ ##
57
+ # Returns the "user" part of the URI.
46
58
  def user
47
59
  @uri.user
48
60
  end
49
61
 
62
+ ##
63
+ # Returns the "password" part of the URI.
50
64
  def password
51
65
  @uri.password
52
66
  end
53
67
 
68
+ ##
69
+ # Converts the URI to a string.
54
70
  def to_s
55
71
  uri = @uri.dup
56
72
  uri.user = nil
@@ -58,19 +74,22 @@ class SiteDiff
58
74
  uri.to_s
59
75
  end
60
76
 
77
+ ##
61
78
  # Is this a local filesystem path?
62
79
  def local?
63
80
  @uri.scheme.nil?
64
81
  end
65
82
 
83
+ ## What does this one do?
66
84
  # FIXME: this is not used anymore
67
- def +(path)
85
+ def +(other)
68
86
  # 'path' for SiteDiff includes (parts of) path, query, and fragment.
69
87
  sep = ''
70
88
  sep = '/' if local? || @uri.path.empty?
71
- self.class.new(@uri.to_s + sep + path)
89
+ self.class.new(@uri.to_s + sep + other)
72
90
  end
73
91
 
92
+ ##
74
93
  # Reads a file and yields to the completion handler, see .queue()
75
94
  def read_file
76
95
  File.open(@uri.to_s, 'r:UTF-8') { |f| yield ReadResult.new(f.read) }
@@ -114,25 +133,35 @@ class SiteDiff
114
133
  rescue ArgumentError => e
115
134
  raise if @debug
116
135
 
117
- yield ReadResult.error("Parsing error for #{@uri}: #{e.message}")
118
- rescue => e
136
+ yield ReadResult.error(
137
+ "Parsing error for #{@uri}: #{e.message}"
138
+ )
139
+ rescue StandardError => e
119
140
  raise if @debug
120
141
 
121
- yield ReadResult.error("Unknown parsing error for #{@uri}: #{e.message}")
142
+ yield ReadResult.error(
143
+ "Unknown parsing error for #{@uri}: #{e.message}"
144
+ )
122
145
  end
123
146
  end
124
147
 
125
148
  req.on_failure do |resp|
126
149
  if resp&.status_message
127
150
  msg = resp.status_message
128
- yield ReadResult.error("HTTP error when loading #{@uri}: #{msg}",
129
- resp.response_code)
151
+ yield ReadResult.error(
152
+ "HTTP error when loading #{@uri}: #{msg}",
153
+ resp.response_code
154
+ )
130
155
  elsif (msg = resp.options[:return_code])
131
- yield ReadResult.error("Connection error when loading #{@uri}: #{msg}",
132
- resp.response_code)
156
+ yield ReadResult.error(
157
+ "Connection error when loading #{@uri}: #{msg}",
158
+ resp.response_code
159
+ )
133
160
  else
134
- yield ReadResult.error("Unknown error when loading #{@uri}: #{msg}",
135
- resp.response_code)
161
+ yield ReadResult.error(
162
+ "Unknown error when loading #{@uri}: #{msg}",
163
+ resp.response_code
164
+ )
136
165
  end
137
166
  end
138
167
 
@@ -152,5 +181,17 @@ class SiteDiff
152
181
  hydra.queue(typhoeus_request(&handler))
153
182
  end
154
183
  end
184
+
185
+ ##
186
+ # Canonicalize a path.
187
+ #
188
+ # @param [String] path
189
+ # A base relative path. Example: /foo/bar
190
+ def self.canonicalize(path)
191
+ # Ignore trailing slashes for all paths except "/" (front page).
192
+ path = path.chomp('/') unless path == '/'
193
+ # If the path is empty, assume that it's the front page.
194
+ path.empty? ? '/' : path
195
+ end
155
196
  end
156
197
  end
@@ -3,13 +3,15 @@
3
3
  require 'webrick'
4
4
 
5
5
  class SiteDiff
6
+ # SiteDiff Web Server.
6
7
  class Webserver
7
- # Simple webserver for testing purposes
8
+ # Simple web server for testing purposes.
8
9
  DEFAULT_PORT = 13_080
9
10
 
10
11
  attr_accessor :ports
11
12
 
12
- # Serve a list of directories
13
+ ##
14
+ # Serve a list of directories.
13
15
  def initialize(start_port, dirs, opts = {})
14
16
  start_port ||= DEFAULT_PORT
15
17
  @ports = (start_port...(start_port + dirs.size)).to_a
@@ -25,14 +27,20 @@ class SiteDiff
25
27
  end
26
28
  end
27
29
 
30
+ ##
31
+ # Kills the server.
28
32
  def kill
29
33
  @threads.each(&:kill)
30
34
  end
31
35
 
36
+ ##
37
+ # Waits for the server.
32
38
  def wait
33
39
  @threads.each(&:join)
34
40
  end
35
41
 
42
+ ##
43
+ # Maps URIs to defined ports and returns a list of URIs.
36
44
  def uris
37
45
  ports.map { |p| "http://localhost:#{p}" }
38
46
  end
@@ -63,9 +71,10 @@ class SiteDiff
63
71
 
64
72
  public
65
73
 
74
+ # SiteDiff Fixture Server.
66
75
  class FixtureServer < Webserver
67
76
  PORT = DEFAULT_PORT + 1
68
- BASE = 'spec/fixtures/ruby-doc.org'
77
+ BASE = 'spec/sites/ruby-doc.org'
69
78
  NAMES = %w[core-1.9.3 core-2.0].freeze
70
79
 
71
80
  def initialize(port = PORT, base = BASE, names = NAMES)
@@ -1,18 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'sitediff'
4
+ require 'sitediff/report'
4
5
  require 'sitediff/webserver'
5
6
  require 'erb'
6
7
 
7
8
  class SiteDiff
8
9
  class Webserver
10
+ # SiteDiff Result Server.
9
11
  class ResultServer < Webserver
10
12
  # Display a page from the cache
11
13
  class CacheServlet < WEBrick::HTTPServlet::AbstractServlet
14
+ ##
15
+ # Creates a Cache Servlet.
12
16
  def initialize(_server, cache)
13
17
  @cache = cache
14
18
  end
15
19
 
20
+ ##
21
+ # Performs a GET request.
16
22
  def do_GET(req, res)
17
23
  path = req.path_info
18
24
  (md = %r{^/([^/]+)(/.*)$}.match(path)) ||
@@ -29,13 +35,18 @@ class SiteDiff
29
35
  end
30
36
  end
31
37
 
32
- # Display two pages side by side
38
+ ##
39
+ # Display two pages side by side.
33
40
  class SideBySideServlet < WEBrick::HTTPServlet::AbstractServlet
41
+ ##
42
+ # Creates a Side By Side Servlet.
34
43
  def initialize(_server, cache, settings)
35
44
  @cache = cache
36
45
  @settings = settings
37
46
  end
38
47
 
48
+ ##
49
+ # Generates URLs for a given path.
39
50
  def urls(path)
40
51
  %w[before after].map do |tag|
41
52
  base = @settings[tag]
@@ -44,6 +55,8 @@ class SiteDiff
44
55
  end
45
56
  end
46
57
 
58
+ ##
59
+ # Performs a GET request.
47
60
  def do_GET(req, res)
48
61
  path = req.path_info
49
62
  before, after = *urls(path)
@@ -54,52 +67,29 @@ class SiteDiff
54
67
  end
55
68
  end
56
69
 
57
- # Run sitediff command from browser. Probably dangerous in general.
58
- class RunServlet < WEBrick::HTTPServlet::AbstractServlet
59
- def initialize(_server, dir)
60
- @dir = dir
61
- end
62
-
63
- def do_GET(req, res)
64
- path = req.path_info
65
- if path != '/diff'
66
- res['content-type'] = 'text/plain'
67
- res.body = 'ERROR: Only /run/diff is supported by the /run API at the moment'
68
- return
69
- end
70
- # Thor assumes only one command is called and some values like
71
- # `options` are share across all SiteDiff::Cli instances so
72
- # we can't just call SiteDiff::Cli.new().diff
73
- # This is likely to go very wrong depending on how `sitediff serve`
74
- # was actually called
75
- cmd = "#{$PROGRAM_NAME} diff -C #{@dir} --cached=all"
76
- system(cmd)
77
-
78
- # Could also add a message to indicate success/failure
79
- # But for the moment, all our files are static
80
- res.set_redirect(WEBrick::HTTPStatus::Found,
81
- "/files/#{SiteDiff::REPORT_FILE}")
82
- end
83
- end
84
-
70
+ ##
71
+ # Creates a Result Server.
85
72
  def initialize(port, dir, opts = {})
86
- unless File.exist?(File.join(dir, SiteDiff::SETTINGS_FILE))
73
+ unless File.exist?(File.join(dir, Report::SETTINGS_FILE))
87
74
  raise SiteDiffException,
88
75
  "Please run 'sitediff diff' before running 'sitediff serve'"
89
76
  end
90
77
 
91
- @settings = YAML.load_file(File.join(dir, SiteDiff::SETTINGS_FILE))
78
+ @settings = YAML.load_file(File.join(dir, Report::SETTINGS_FILE))
79
+ puts @settings
92
80
  @cache = opts[:cache]
93
81
  super(port, [dir], opts)
94
82
  end
95
83
 
84
+ ##
85
+ # TODO: Document what this method does.
96
86
  def server(opts)
97
87
  dir = opts.delete(:DocumentRoot)
98
88
  srv = super(opts)
99
89
  srv.mount_proc('/') do |req, res|
100
90
  if req.path == '/'
101
91
  res.set_redirect(WEBrick::HTTPStatus::Found,
102
- "/files/#{SiteDiff::REPORT_FILE}")
92
+ "/files/#{Report::REPORT_FILE_HTML}")
103
93
  else
104
94
  res.set_redirect(WEBrick::HTTPStatus::TemporaryRedirect,
105
95
  "#{@settings['after']}#{req.path}")
@@ -109,10 +99,11 @@ class SiteDiff
109
99
  srv.mount('/files', WEBrick::HTTPServlet::FileHandler, dir, true)
110
100
  srv.mount('/cache', CacheServlet, @cache)
111
101
  srv.mount('/sidebyside', SideBySideServlet, @cache, @settings)
112
- srv.mount('/run', RunServlet, dir)
113
102
  srv
114
103
  end
115
104
 
105
+ ##
106
+ # Sets up the server.
116
107
  def setup
117
108
  super
118
109
  root = uris.first
@@ -120,6 +111,8 @@ class SiteDiff
120
111
  open_in_browser(root) if @opts[:browse]
121
112
  end
122
113
 
114
+ ##
115
+ # Opens a URL in a browser.
123
116
  def open_in_browser(url)
124
117
  commands = %w[xdg-open open]
125
118
  cmd = commands.find { |c| which(c) }
@@ -127,6 +120,8 @@ class SiteDiff
127
120
  cmd
128
121
  end
129
122
 
123
+ ##
124
+ # TODO: Document what this method does.
130
125
  def which(cmd)
131
126
  ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
132
127
  file = File.join(path, cmd)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitediff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Dergachev
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2019-04-02 00:00:00.000000000 Z
13
+ date: 2020-06-08 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: pkg-config
@@ -26,6 +26,20 @@ dependencies:
26
26
  - - "~>"
27
27
  - !ruby/object:Gem::Version
28
28
  version: '1.1'
29
+ - !ruby/object:Gem::Dependency
30
+ name: minitar
31
+ requirement: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - "~>"
34
+ - !ruby/object:Gem::Version
35
+ version: '0.9'
36
+ type: :runtime
37
+ prerelease: false
38
+ version_requirements: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - "~>"
41
+ - !ruby/object:Gem::Version
42
+ version: '0.9'
29
43
  - !ruby/object:Gem::Dependency
30
44
  name: thor
31
45
  requirement: !ruby/object:Gem::Requirement
@@ -72,16 +86,16 @@ dependencies:
72
86
  name: nokogiri
73
87
  requirement: !ruby/object:Gem::Requirement
74
88
  requirements:
75
- - - "~>"
89
+ - - ">="
76
90
  - !ruby/object:Gem::Version
77
- version: 1.8.2
91
+ version: 1.10.4
78
92
  type: :runtime
79
93
  prerelease: false
80
94
  version_requirements: !ruby/object:Gem::Requirement
81
95
  requirements:
82
- - - "~>"
96
+ - - ">="
83
97
  - !ruby/object:Gem::Version
84
- version: 1.8.2
98
+ version: 1.10.4
85
99
  - !ruby/object:Gem::Dependency
86
100
  name: addressable
87
101
  requirement: !ruby/object:Gem::Requirement
@@ -102,14 +116,14 @@ dependencies:
102
116
  requirements:
103
117
  - - "~>"
104
118
  - !ruby/object:Gem::Version
105
- version: 3.2.0
119
+ version: 3.3.0
106
120
  type: :runtime
107
121
  prerelease: false
108
122
  version_requirements: !ruby/object:Gem::Requirement
109
123
  requirements:
110
124
  - - "~>"
111
125
  - !ruby/object:Gem::Version
112
- version: 3.2.0
126
+ version: 3.3.0
113
127
  description: " SiteDiff makes it easy to see differences between two versions of
114
128
  a website. It accepts a set of paths to compare two versions of the site together
115
129
  with potential normalization/sanitization rules. From the provided paths and configuration
@@ -129,28 +143,32 @@ files:
129
143
  - lib/sitediff/cli.rb
130
144
  - lib/sitediff/config.rb
131
145
  - lib/sitediff/config/creator.rb
146
+ - lib/sitediff/config/preset.rb
132
147
  - lib/sitediff/crawler.rb
133
148
  - lib/sitediff/diff.rb
134
149
  - lib/sitediff/exception.rb
135
150
  - lib/sitediff/fetch.rb
136
151
  - lib/sitediff/files/diff.html.erb
137
- - lib/sitediff/files/html_report.html.erb
152
+ - lib/sitediff/files/jquery.min.js
153
+ - lib/sitediff/files/normalize.css
138
154
  - lib/sitediff/files/pretty_print.xsl
139
- - lib/sitediff/files/rules/drupal.yaml
155
+ - lib/sitediff/files/report.html.erb
140
156
  - lib/sitediff/files/sidebyside.html.erb
141
157
  - lib/sitediff/files/sitediff.css
158
+ - lib/sitediff/files/sitediff.js
159
+ - lib/sitediff/report.rb
142
160
  - lib/sitediff/result.rb
143
- - lib/sitediff/rules.rb
144
161
  - lib/sitediff/sanitize.rb
145
162
  - lib/sitediff/sanitize/dom_transform.rb
146
163
  - lib/sitediff/sanitize/regexp.rb
147
164
  - lib/sitediff/uriwrapper.rb
148
165
  - lib/sitediff/webserver.rb
149
166
  - lib/sitediff/webserver/resultserver.rb
150
- homepage: https://github.com/evolvingweb/sitediff/
167
+ homepage: https://sitediff.io/
151
168
  licenses:
152
169
  - GPL-2.0
153
- metadata: {}
170
+ metadata:
171
+ source_code_uri: https://github.com/evolvingweb/sitediff
154
172
  post_install_message:
155
173
  rdoc_options: []
156
174
  require_paths:
@@ -159,15 +177,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
159
177
  requirements:
160
178
  - - ">="
161
179
  - !ruby/object:Gem::Version
162
- version: '2.3'
180
+ version: '2.4'
163
181
  required_rubygems_version: !ruby/object:Gem::Requirement
164
182
  requirements:
165
183
  - - ">="
166
184
  - !ruby/object:Gem::Version
167
185
  version: '0'
168
186
  requirements: []
169
- rubyforge_project:
170
- rubygems_version: 2.5.2.3
187
+ rubygems_version: 3.1.2
171
188
  signing_key:
172
189
  specification_version: 4
173
190
  summary: Compare two versions of a site with ease!