sitediff 1.1.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.eslintignore +1 -0
- data/.eslintrc.json +28 -0
- data/.project +11 -0
- data/.rubocop.yml +179 -0
- data/.rubocop_todo.yml +51 -0
- data/CHANGELOG.md +28 -0
- data/Dockerfile +33 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +85 -0
- data/INSTALLATION.md +146 -0
- data/LICENSE +339 -0
- data/README.md +810 -0
- data/Rakefile +12 -0
- data/Thorfile +135 -0
- data/config/.gitkeep +0 -0
- data/config/sanitize_domains.example.yaml +8 -0
- data/config/sitediff.example.yaml +81 -0
- data/docker-compose.test.yml +3 -0
- data/lib/sitediff/api.rb +17 -6
- data/lib/sitediff/cache.rb +5 -3
- data/lib/sitediff/cli.rb +4 -3
- data/lib/sitediff/config/creator.rb +13 -13
- data/lib/sitediff/config/preset.rb +6 -6
- data/lib/sitediff/config.rb +9 -9
- data/lib/sitediff/crawler.rb +12 -2
- data/lib/sitediff/diff.rb +1 -1
- data/lib/sitediff/fetch.rb +2 -2
- data/lib/sitediff/files/report.html.erb +1 -1
- data/lib/sitediff/presets/drupal.yaml +63 -0
- data/lib/sitediff/report.rb +6 -6
- data/lib/sitediff/result.rb +5 -5
- data/lib/sitediff/sanitize/dom_transform.rb +2 -2
- data/lib/sitediff/sanitize/regexp.rb +2 -2
- data/lib/sitediff/sanitize.rb +5 -5
- data/lib/sitediff/uriwrapper.rb +8 -10
- data/lib/sitediff/webserver/resultserver.rb +2 -0
- data/lib/sitediff/webserver.rb +3 -0
- data/lib/sitediff.rb +9 -9
- data/misc/sitediff - overview report.png +0 -0
- data/misc/sitediff - page report.png +0 -0
- data/package-lock.json +878 -0
- data/package.json +25 -0
- data/sitediff.gemspec +51 -0
- metadata +62 -18
data/Rakefile
ADDED
data/Thorfile
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
# TODO: Determine the utility of this file.
|
5
|
+
|
6
|
+
LIB_DIR = File.join(File.dirname(__FILE__), 'lib')
|
7
|
+
$LOAD_PATH << LIB_DIR
|
8
|
+
require 'sitediff/webserver'
|
9
|
+
require 'sitediff/webserver/resultserver'
|
10
|
+
|
11
|
+
# Thor Base class.
|
12
|
+
class Base < Thor
|
13
|
+
method_options local: true
|
14
|
+
# Adds the option to all Base subclasses.
|
15
|
+
# method_options() takes different arguments than option().
|
16
|
+
def initialize(*args)
|
17
|
+
super(*args)
|
18
|
+
@local = options['local']
|
19
|
+
end
|
20
|
+
|
21
|
+
# gives us run()
|
22
|
+
include Thor::Actions
|
23
|
+
|
24
|
+
# Thor, by default, exits with 0 no matter what!
|
25
|
+
def self.exit_on_failure?
|
26
|
+
true
|
27
|
+
end
|
28
|
+
|
29
|
+
protected
|
30
|
+
|
31
|
+
def executable(gem)
|
32
|
+
gem = './bin/sitediff' if (gem == 'sitediff') && @local
|
33
|
+
"#{'bundle exec' if @local} #{gem}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Thor for Docker.
|
38
|
+
class Docker < Base
|
39
|
+
IMAGE = 'evolvingweb/sitediff'
|
40
|
+
|
41
|
+
desc 'build', 'Build a docker image for sitediff'
|
42
|
+
# Make a build image for docker.
|
43
|
+
def build
|
44
|
+
run "docker build -t #{IMAGE} . "
|
45
|
+
end
|
46
|
+
|
47
|
+
desc 'run', 'Run a rake task (or a login shell if none given) inside docker'
|
48
|
+
# NOTE: We can't override run() (which is reserved by Thor). Luckily, Thor only
|
49
|
+
# checks for the first N necessary characters to match a command with a
|
50
|
+
# method. Cf. Thor::normalize_command_name()
|
51
|
+
def run_(task = 'bash')
|
52
|
+
docker_opts = ['-t', "-v #{File.dirname(__FILE__)}:/sitediff"]
|
53
|
+
finish_exec(task, docker_opts)
|
54
|
+
end
|
55
|
+
|
56
|
+
desc 'compose', 'Run a task inside docker without volume mounting (not supported with compose)'
|
57
|
+
# Run a task inside docker without volume mounting.
|
58
|
+
def compose(task = 'bash')
|
59
|
+
docker_opts = ['-t']
|
60
|
+
finish_exec(task, docker_opts)
|
61
|
+
end
|
62
|
+
|
63
|
+
no_commands do
|
64
|
+
# Finished exec
|
65
|
+
def finish_exec(task, docker_opts)
|
66
|
+
if task == 'bash'
|
67
|
+
cmd = 'bash'
|
68
|
+
docker_opts << '-i'
|
69
|
+
else
|
70
|
+
# pass down the local flag to docker command
|
71
|
+
cmd = "#{executable('thor')} #{task} #{@local ? '--local' : '--no-local'}"
|
72
|
+
end
|
73
|
+
puts "docker run #{docker_opts.join(' ')} #{IMAGE} #{cmd}"
|
74
|
+
run "docker run #{docker_opts.join(' ')} #{IMAGE} #{cmd}"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Thor for Spec.
|
80
|
+
class Spec < Base
|
81
|
+
desc 'unit', 'run RSpec unit tests'
|
82
|
+
# Run RSpec unit tests.
|
83
|
+
def unit
|
84
|
+
puts "#{executable('rspec')} spec/unit"
|
85
|
+
run "#{executable('rspec')} spec/unit"
|
86
|
+
end
|
87
|
+
|
88
|
+
desc 'fixture', 'run RSpec integration tests'
|
89
|
+
# Run RSpec integration tests.
|
90
|
+
def fixture
|
91
|
+
puts "#{executable('rspec')} spec/unit"
|
92
|
+
run "#{executable('rspec')} spec/fixtures"
|
93
|
+
end
|
94
|
+
|
95
|
+
desc 'all', 'runs both unit and fixture tests', hide: true
|
96
|
+
# hidden task to lump together multiple tasks
|
97
|
+
def all
|
98
|
+
unit
|
99
|
+
fixture
|
100
|
+
end
|
101
|
+
default_task :all
|
102
|
+
end
|
103
|
+
|
104
|
+
# Thor for fixtures.
|
105
|
+
class Fixture < Base
|
106
|
+
desc 'local', 'Run a sitediff test case'
|
107
|
+
# Run a sitediff test case.
|
108
|
+
def local
|
109
|
+
run "#{executable('sitediff')} diff --cached=none spec/fixtures/cli/config.yaml"
|
110
|
+
end
|
111
|
+
|
112
|
+
desc 'http', 'Run a sitediff test case, using web servers'
|
113
|
+
# Run a sitediff test case, using web servers.
|
114
|
+
def http
|
115
|
+
cmd = "#{executable('sitediff')} diff --cached=none spec/fixtures/cli/config.yaml"
|
116
|
+
http_fixtures(cmd).kill
|
117
|
+
end
|
118
|
+
|
119
|
+
desc 'serve', 'Serve the result of the fixture test'
|
120
|
+
# Serve the result of the fixture test.
|
121
|
+
def serve
|
122
|
+
cmd = "#{executable('sitediff')} diff --cached=none --paths-file=spec/sites/ruby-doc.org/paths.txt spec/unit/cli/config.yaml"
|
123
|
+
http_fixtures(cmd)
|
124
|
+
SiteDiff::Webserver::ResultServer.new(nil, 'sitediff', quiet: true).wait
|
125
|
+
end
|
126
|
+
|
127
|
+
private
|
128
|
+
|
129
|
+
# HTTP Fixtures.
|
130
|
+
def http_fixtures(cmd)
|
131
|
+
serv = SiteDiff::Webserver::FixtureServer.new
|
132
|
+
run "#{cmd} --before #{serv.before} --after #{serv.after}"
|
133
|
+
serv
|
134
|
+
end
|
135
|
+
end
|
data/config/.gitkeep
ADDED
File without changes
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# Include other configuration files, merging them with this file.
|
2
|
+
includes:
|
3
|
+
- extra-rules.yaml
|
4
|
+
|
5
|
+
# Settings.
|
6
|
+
#
|
7
|
+
# If you use "sitediff init" with the right parameters, it will generate
|
8
|
+
# this section for you.
|
9
|
+
settings:
|
10
|
+
# Crawl 2 levels deep.
|
11
|
+
depth: 2
|
12
|
+
# Wait for 250ms between requests.
|
13
|
+
interval: 250
|
14
|
+
# Make only 1 request at a time - no simultaneous requests.
|
15
|
+
# Concurrency has to be one when an interval is set.
|
16
|
+
concurrency: 1
|
17
|
+
# Don't follow links to PDF files.
|
18
|
+
exclude: '.*\.pdf'
|
19
|
+
# Curl options, if any.
|
20
|
+
curl_opts:
|
21
|
+
max_recv_speed_large: 10000
|
22
|
+
|
23
|
+
# Rules under this element apply only to the 'before' site.
|
24
|
+
before:
|
25
|
+
# URL of the 'before' version of the site.
|
26
|
+
url: http://localhost/old
|
27
|
+
|
28
|
+
# Sanitizations and DOM transformations, just like the general ones
|
29
|
+
# demonstrated above, but applied only to the 'before' site.
|
30
|
+
dom_transform:
|
31
|
+
- title: Example
|
32
|
+
type: remove
|
33
|
+
selector: div.updates-required
|
34
|
+
|
35
|
+
# Rules under this element apply only to the 'after' site.
|
36
|
+
after:
|
37
|
+
# URL of the 'after' version of the site.
|
38
|
+
url: http://localhost/new
|
39
|
+
|
40
|
+
# The root element to compare.
|
41
|
+
#
|
42
|
+
# Usually, sitediff compares the HTML of the entire page. If you'd rather
|
43
|
+
# check just a subset of the page, specify a selector here. For example, the
|
44
|
+
# line below causes only the body to be compared, ignoring the HTML head.
|
45
|
+
selector: 'body'
|
46
|
+
|
47
|
+
# General regular expression rules, applied to both versions of the site.
|
48
|
+
sanitization:
|
49
|
+
# Normalize input tags containg random tokens.
|
50
|
+
- title: Remove form-build-id
|
51
|
+
pattern: '<input type="hidden" name="form_build_id" value="form-[a-zA-Z0-9_-]+" *\/?>'
|
52
|
+
substitute: '<input type="hidden" name="form_build_id" value="__form_build_id__">'
|
53
|
+
|
54
|
+
# Replace meta property="twitter:*" with meta name="twitter:*".
|
55
|
+
- title: Meta 'property' changed to 'name'
|
56
|
+
pattern: 'property="twitter:'
|
57
|
+
substitute: 'name="twitter:'
|
58
|
+
# 'selector' limits this rule to only within the selected elements.
|
59
|
+
selector: meta
|
60
|
+
# 'path' limits this rule to only certain pages.
|
61
|
+
path: /user
|
62
|
+
|
63
|
+
# General DOM transforms, applied to both versions of the site.
|
64
|
+
dom_transform:
|
65
|
+
# Remove article elements, replacing them with their content
|
66
|
+
- title: Unwrap article elements
|
67
|
+
type: unwrap
|
68
|
+
selector: article
|
69
|
+
|
70
|
+
# Remove classes from divs
|
71
|
+
- title: Remove classes bar and baz from divs
|
72
|
+
type: remove_class
|
73
|
+
selector: div
|
74
|
+
class:
|
75
|
+
- class-bar
|
76
|
+
- class-baz
|
77
|
+
|
78
|
+
# Remove a div ID.
|
79
|
+
- title: Remove block containing current time.
|
80
|
+
type: remove
|
81
|
+
selector: div#block-time
|
data/lib/sitediff/api.rb
CHANGED
@@ -111,8 +111,8 @@ class SiteDiff
|
|
111
111
|
sitediff = SiteDiff.new(
|
112
112
|
@config,
|
113
113
|
cache,
|
114
|
-
options[:verbose],
|
115
|
-
options[:debug]
|
114
|
+
verbose: options[:verbose],
|
115
|
+
debug: options[:debug]
|
116
116
|
)
|
117
117
|
num_failing = sitediff.run
|
118
118
|
exit_code = num_failing.positive? ? 2 : 0
|
@@ -168,7 +168,7 @@ class SiteDiff
|
|
168
168
|
@config.setting(:exclude),
|
169
169
|
@config.setting(:depth),
|
170
170
|
@config.curl_opts,
|
171
|
-
@debug
|
171
|
+
debug: @debug
|
172
172
|
) do |info|
|
173
173
|
SiteDiff.log "Visited #{info.uri}, cached."
|
174
174
|
after_crawl(tag, info)
|
@@ -254,12 +254,23 @@ class SiteDiff
|
|
254
254
|
result = info.read_result
|
255
255
|
|
256
256
|
# Write result to applicable cache.
|
257
|
-
@cache.set(tag, path, result)
|
258
|
-
|
259
|
-
@cache.set(:
|
257
|
+
# @cache.set(tag, path, result)
|
258
|
+
@cache.set(:before, path, result) if tag == 'before'
|
259
|
+
@cache.set(:after, path, result) if tag == 'after'
|
260
260
|
|
261
261
|
# TODO: Restore application of rules.
|
262
262
|
# @rules.handle_page(tag, res.content, info.document) if @rules && !res.error
|
263
263
|
end
|
264
|
+
|
265
|
+
def get_curl_opts(options)
|
266
|
+
# We do want string keys here
|
267
|
+
bool_hash = { 'true' => true, 'false' => false }
|
268
|
+
curl_opts = UriWrapper::DEFAULT_CURL_OPTS
|
269
|
+
.clone
|
270
|
+
.merge(options['curl_options'] || {})
|
271
|
+
.merge(options['curl_opts'] || {})
|
272
|
+
curl_opts.each { |k, v| curl_opts[k] = bool_hash.fetch(v, v) }
|
273
|
+
curl_opts
|
274
|
+
end
|
264
275
|
end
|
265
276
|
end
|
data/lib/sitediff/cache.rb
CHANGED
@@ -71,7 +71,8 @@ class SiteDiff
|
|
71
71
|
rescue Errno::EEXIST
|
72
72
|
curdir = filepath
|
73
73
|
curdir = curdir.parent until curdir.exist?
|
74
|
-
tempname = curdir.dirname
|
74
|
+
tempname = "#{curdir.dirname}/#{curdir.basename}.temporary"
|
75
|
+
# tempname = curdir.dirname + (curdir.basename.to_s + '.temporary')
|
75
76
|
# May cause problems if action is not atomic!
|
76
77
|
# Move existing file to dir/index.html first
|
77
78
|
# Not robust! Should generate an UUID or something.
|
@@ -82,10 +83,11 @@ class SiteDiff
|
|
82
83
|
filepath.dirname.mkpath
|
83
84
|
# Should only happen in strange situations such as when the path
|
84
85
|
# is foo/index.html/bar (i.e., index.html is a directory)
|
85
|
-
if (curdir
|
86
|
+
if File.exist?("#{curdir}/index.html")
|
86
87
|
SiteDiff.log "Overwriting file #{tempname}", :warning
|
87
88
|
end
|
88
|
-
|
89
|
+
File.rename(tempname, "#{curdir}/index.html")
|
90
|
+
# tempname.rename(curdir + 'index.html')
|
89
91
|
end
|
90
92
|
end
|
91
93
|
File.open(filename, 'w') { |file| file.write(Marshal.dump(result)) }
|
data/lib/sitediff/cli.rb
CHANGED
@@ -44,8 +44,8 @@ class SiteDiff
|
|
44
44
|
output = []
|
45
45
|
output.push("Sitediff CLI #{gemspec.version}")
|
46
46
|
if options[:verbose]
|
47
|
-
output.push(
|
48
|
-
output.push(
|
47
|
+
output.push("Website: #{gemspec.homepage}")
|
48
|
+
output.push("GitHub: #{gemspec.metadata['source_code_uri']}")
|
49
49
|
end
|
50
50
|
puts output.join("\n")
|
51
51
|
end
|
@@ -199,11 +199,12 @@ class SiteDiff
|
|
199
199
|
.merge(
|
200
200
|
{
|
201
201
|
after_url: urls.pop,
|
202
|
-
before_url: urls.pop,
|
202
|
+
before_url: urls.pop,
|
203
203
|
directory: get_dir(options['directory']),
|
204
204
|
curl_opts: get_curl_opts(options)
|
205
205
|
}
|
206
206
|
)
|
207
|
+
|
207
208
|
Api.init(api_options)
|
208
209
|
end
|
209
210
|
|
@@ -73,22 +73,22 @@ class SiteDiff
|
|
73
73
|
# Create a gitignore if we seem to be in git.
|
74
74
|
def make_gitignore(dir)
|
75
75
|
# Check if we're in git
|
76
|
-
unless dir.realpath.to_enum(:ascend).any? { |d| d
|
76
|
+
unless dir.realpath.to_enum(:ascend).any? { |d| Dir.exist?("#{d}/.git") }
|
77
77
|
return
|
78
78
|
end
|
79
79
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
80
|
+
f = File.open("#{dir}/.gitignore", 'w')
|
81
|
+
f.puts <<-GITIGNORE.gsub(/^\s+/, '')
|
82
|
+
# Directories.
|
83
|
+
diffs
|
84
|
+
snapshot
|
85
|
+
|
86
|
+
# Files.
|
87
|
+
settings.yaml
|
88
|
+
paths.txt
|
89
|
+
failures.txt
|
90
|
+
GITIGNORE
|
91
|
+
f.close
|
92
92
|
end
|
93
93
|
|
94
94
|
##
|
@@ -12,7 +12,7 @@ class SiteDiff
|
|
12
12
|
# Directory in which presets live.
|
13
13
|
#
|
14
14
|
# TODO: Move this outside "lib".
|
15
|
-
DIRECTORY =
|
15
|
+
DIRECTORY = "#{Pathname.new(__dir__).dirname}/presets".freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Reads preset rules.
|
@@ -27,7 +27,7 @@ class SiteDiff
|
|
27
27
|
|
28
28
|
# Load and cache preset config.
|
29
29
|
if @cache[name].nil?
|
30
|
-
exist? name, true
|
30
|
+
exist? name, exception: true
|
31
31
|
@cache[name] = Config.load_conf file(name)
|
32
32
|
end
|
33
33
|
|
@@ -43,7 +43,7 @@ class SiteDiff
|
|
43
43
|
# Load and cache preset names.
|
44
44
|
if @all.nil?
|
45
45
|
@all = []
|
46
|
-
pattern = DIRECTORY
|
46
|
+
pattern = "#{DIRECTORY}/*.yaml"
|
47
47
|
Dir.glob(pattern) do |file|
|
48
48
|
@all << File.basename(file, '.yaml')
|
49
49
|
end
|
@@ -54,8 +54,8 @@ class SiteDiff
|
|
54
54
|
|
55
55
|
##
|
56
56
|
# Checks whether a preset exists.
|
57
|
-
def self.exist?(name, exception
|
58
|
-
result = File.exist?
|
57
|
+
def self.exist?(name, exception: false)
|
58
|
+
result = File.exist?(file(name))
|
59
59
|
|
60
60
|
# Raise an exception, if required.
|
61
61
|
if exception && !result
|
@@ -68,7 +68,7 @@ class SiteDiff
|
|
68
68
|
##
|
69
69
|
# Returns the path to a preset file.
|
70
70
|
def self.file(name)
|
71
|
-
DIRECTORY + "
|
71
|
+
DIRECTORY + "/#{name}.yaml"
|
72
72
|
end
|
73
73
|
end
|
74
74
|
end
|
data/lib/sitediff/config.rb
CHANGED
@@ -107,7 +107,7 @@ class SiteDiff
|
|
107
107
|
conf[pos][key] += conf[key] if conf[key]
|
108
108
|
end
|
109
109
|
tools[:scalar].each { |key| conf[pos][key] ||= conf[key] }
|
110
|
-
conf[pos]['url'] ||= conf[pos
|
110
|
+
conf[pos]['url'] ||= conf["pos#{_url}"] if defined?(_url)
|
111
111
|
conf[pos]['curl_opts'] = conf['curl_opts']
|
112
112
|
end
|
113
113
|
|
@@ -260,8 +260,8 @@ class SiteDiff
|
|
260
260
|
end
|
261
261
|
|
262
262
|
# Get "before" site configuration.
|
263
|
-
def before(apply_preset
|
264
|
-
section
|
263
|
+
def before(apply_preset: false)
|
264
|
+
section(:before, with_preset: apply_preset)
|
265
265
|
end
|
266
266
|
|
267
267
|
# Get "before" site URL.
|
@@ -271,8 +271,8 @@ class SiteDiff
|
|
271
271
|
end
|
272
272
|
|
273
273
|
# Get "after" site configuration.
|
274
|
-
def after(apply_preset
|
275
|
-
section
|
274
|
+
def after(apply_preset: false)
|
275
|
+
section(:after, with_preset: apply_preset)
|
276
276
|
end
|
277
277
|
|
278
278
|
# Get "after" site URL.
|
@@ -431,7 +431,7 @@ class SiteDiff
|
|
431
431
|
end
|
432
432
|
|
433
433
|
# Validate preset.
|
434
|
-
Preset.exist? setting(:preset), true if setting(:preset)
|
434
|
+
Preset.exist? setting(:preset), exception: true if setting(:preset)
|
435
435
|
end
|
436
436
|
|
437
437
|
##
|
@@ -459,7 +459,7 @@ class SiteDiff
|
|
459
459
|
@return_value = string_param == '' ? nil : Regexp.new(string_param)
|
460
460
|
rescue SiteDiffException => e
|
461
461
|
@return_value = nil
|
462
|
-
SiteDiff.log
|
462
|
+
SiteDiff.log "Invalid RegExp: #{string_param}", :error
|
463
463
|
SiteDiff.log e.message, :error
|
464
464
|
# TODO: Use SiteDiff.log type :debug
|
465
465
|
# SiteDiff.log e.backtrace, :error if options[:verbose]
|
@@ -491,7 +491,7 @@ class SiteDiff
|
|
491
491
|
#
|
492
492
|
# @return [Hash|Nil]
|
493
493
|
# Section data or Nil.
|
494
|
-
def section(name, with_preset
|
494
|
+
def section(name, with_preset: false)
|
495
495
|
name = name.to_s if name.is_a? Symbol
|
496
496
|
|
497
497
|
# Validate section.
|
@@ -531,7 +531,7 @@ class SiteDiff
|
|
531
531
|
def self.load_raw_yaml(file)
|
532
532
|
# TODO: Only show this in verbose mode.
|
533
533
|
SiteDiff.log "Reading config file: #{Pathname.new(file).expand_path}"
|
534
|
-
conf = YAML.load_file(file) || {}
|
534
|
+
conf = YAML.load_file(file, permitted_classes: [Regexp]) || {}
|
535
535
|
|
536
536
|
unless conf.is_a? Hash
|
537
537
|
raise InvalidConfig, "Invalid configuration file: '#{file}'"
|
data/lib/sitediff/crawler.rb
CHANGED
@@ -21,7 +21,7 @@ class SiteDiff
|
|
21
21
|
exclude_regex,
|
22
22
|
depth = DEFAULT_DEPTH,
|
23
23
|
curl_opts = UriWrapper::DEFAULT_CURL_OPTS,
|
24
|
-
debug
|
24
|
+
debug: true,
|
25
25
|
&block)
|
26
26
|
@hydra = hydra
|
27
27
|
@base_uri = Addressable::URI.parse(base)
|
@@ -43,7 +43,7 @@ class SiteDiff
|
|
43
43
|
|
44
44
|
@found << rel
|
45
45
|
|
46
|
-
wrapper = UriWrapper.new(@base + rel, @curl_opts, @debug)
|
46
|
+
wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug)
|
47
47
|
wrapper.queue(@hydra) do |res|
|
48
48
|
fetched_uri(rel, depth, res)
|
49
49
|
end
|
@@ -104,6 +104,16 @@ class SiteDiff
|
|
104
104
|
|
105
105
|
# Make a link relative to @base_uri
|
106
106
|
def relativize_link(uri)
|
107
|
+
# fullPath = uri.path
|
108
|
+
# if uri.query
|
109
|
+
# fullPath += "?" + uri.query
|
110
|
+
# end
|
111
|
+
#
|
112
|
+
# if uri.fragment
|
113
|
+
# fullPath += "#" + uri.fragment
|
114
|
+
# end
|
115
|
+
# fullPath.gsub(@base_uri.path, "")
|
116
|
+
#
|
107
117
|
uri.path.slice(@base_uri.path.length, uri.path.length)
|
108
118
|
end
|
109
119
|
|
data/lib/sitediff/diff.rb
CHANGED
@@ -68,7 +68,7 @@ class SiteDiff
|
|
68
68
|
|
69
69
|
##
|
70
70
|
# Generates diff output for a single result.
|
71
|
-
def generate_diff_output(result, relative
|
71
|
+
def generate_diff_output(result, relative: false)
|
72
72
|
erb_path = File.join(SiteDiff::FILES_DIR, 'diff.html.erb')
|
73
73
|
ERB.new(File.read(erb_path)).result(binding)
|
74
74
|
end
|
data/lib/sitediff/fetch.rb
CHANGED
@@ -15,7 +15,7 @@ class SiteDiff
|
|
15
15
|
interval,
|
16
16
|
concurrency = 3,
|
17
17
|
curl_opts = nil,
|
18
|
-
debug
|
18
|
+
debug: true,
|
19
19
|
**tags)
|
20
20
|
@cache = cache
|
21
21
|
@interval = interval
|
@@ -51,7 +51,7 @@ class SiteDiff
|
|
51
51
|
results[tag] = UriWrapper::ReadResult.error('Not cached')
|
52
52
|
process_results(path, results)
|
53
53
|
else
|
54
|
-
uri = UriWrapper.new(base + path, @curl_opts, @debug)
|
54
|
+
uri = UriWrapper.new(base + path, @curl_opts, debug: @debug)
|
55
55
|
uri.queue(@hydra) do |resl|
|
56
56
|
# Insert delay to limit fetching rate
|
57
57
|
if @interval != 0
|
@@ -144,7 +144,7 @@
|
|
144
144
|
<% end %>
|
145
145
|
<% end %>
|
146
146
|
<% unless result.diff_url.nil? %>
|
147
|
-
<a href="<%= result.diff_url(relative) %>" class="button button-diff">View diff</a>
|
147
|
+
<a href="<%= result.diff_url(relative: relative) %>" class="button button-diff">View diff</a>
|
148
148
|
<% end %>
|
149
149
|
</div>
|
150
150
|
</td>
|
@@ -0,0 +1,63 @@
|
|
1
|
+
sanitization:
|
2
|
+
- title: Strip Drupal.settings
|
3
|
+
selector: script
|
4
|
+
pattern: '^(<script>)?jQuery.extend\(Drupal.settings.*$'
|
5
|
+
- title: Strip IE CSS/JS cache IDs
|
6
|
+
pattern: '("[^"]*ie\d?\.(js|css))\?[a-z0-9]{6}"'
|
7
|
+
substitute: '\1'
|
8
|
+
- title: Strip form build ID
|
9
|
+
selector: input
|
10
|
+
pattern: 'name="form_build_id" value="form-[-\w]{40,43}"'
|
11
|
+
substitute: 'name="form_build_id" value="form-DRUPAL_FORM_BUILD_ID"'
|
12
|
+
- title: Strip view DOM ID
|
13
|
+
pattern: '(class="view .*) view-dom-id-[a-f0-9]{32}"'
|
14
|
+
substitute: '\1 view-dom-id-DRUPAL_VIEW_DOM_ID"'
|
15
|
+
- title: Strip CSS aggregation filenames
|
16
|
+
selector: link[rel=stylesheet]
|
17
|
+
pattern: '(href="[^"]*/files/css/css_)[-\w]{40,43}\.css"'
|
18
|
+
substitute: '\1DRUPAL_AGGREGATED_CSS.css"'
|
19
|
+
- title: Strip JS aggregation filenames
|
20
|
+
selector: script
|
21
|
+
pattern: '(src="[^"]*/files/js/js_)[-\w]{40,43}\.js"'
|
22
|
+
substitute: '\1DRUPAL_AGGREGATED_JS.js"'
|
23
|
+
- title: Strip CSS/JS cache IDs
|
24
|
+
selector: style, script
|
25
|
+
pattern: '("[^"]*\.(js|css))\?[a-z0-9]{6}"'
|
26
|
+
substitute: '\1'
|
27
|
+
- title: Strip Drupal JS version tags
|
28
|
+
selector: script
|
29
|
+
pattern: '(src="[^"]*/misc/\w+\.js)?v=\d+\.\d+"'
|
30
|
+
substitute: '\1'
|
31
|
+
- title: Strip domain names from absolute URLs
|
32
|
+
pattern: 'http:\/\/[a-zA-Z0-9.:-]+'
|
33
|
+
substitute: '__domain__'
|
34
|
+
- title: Strip form build ID
|
35
|
+
selector: input
|
36
|
+
pattern: 'autocomplete="off" data-drupal-selector="form-[-\w]{40,43}"'
|
37
|
+
substitute: 'autocomplete="off" data-drupal-selector="form-DRUPAL_FORM_BUILD_ID"'
|
38
|
+
- title: Strip form build ID 2
|
39
|
+
selector: input
|
40
|
+
pattern: 'name="form_build_id" value="form-[-\w]{40,43}"'
|
41
|
+
substitute: 'name="form_build_id" value="form-DRUPAL_FORM_BUILD_ID"'
|
42
|
+
- title: Strip Drupal CSS link queries
|
43
|
+
selector: link
|
44
|
+
pattern: '\.css\?(\w*)'
|
45
|
+
substitute: '\.css'
|
46
|
+
- title: Strip Drupal JS link queries
|
47
|
+
selector: script
|
48
|
+
pattern: '\.js\?(\w*)'
|
49
|
+
substitute: '\.js'
|
50
|
+
- title: Strip Drupal View-DOM ID
|
51
|
+
pattern: 'view-dom-id-\w*'
|
52
|
+
substitute: 'view-dom-id-_ID_'
|
53
|
+
- title: Strip Drupal View-DOM ID 2
|
54
|
+
pattern: '(views?_dom_id"?:"?)\w*'
|
55
|
+
substitute: '\1_ID_'
|
56
|
+
- title: Ignore Drupal CSS file names
|
57
|
+
selector: link
|
58
|
+
pattern: 'css_[-\w]{40,43}(\\|%5C)?\.css'
|
59
|
+
substitute: 'css__ID__.css'
|
60
|
+
- title: Ignore Drupal JS file names
|
61
|
+
selector: script
|
62
|
+
pattern: 'js_[-\w]{40,43}\\?\.js'
|
63
|
+
substitute: 'js__ID__.js'
|