sitediff 0.0.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/bin/sitediff +10 -4
- data/lib/sitediff.rb +179 -91
- data/lib/sitediff/cache.rb +106 -0
- data/lib/sitediff/cli.rb +391 -60
- data/lib/sitediff/config.rb +383 -37
- data/lib/sitediff/config/creator.rb +114 -0
- data/lib/sitediff/config/preset.rb +75 -0
- data/lib/sitediff/crawler.rb +131 -0
- data/lib/sitediff/diff.rb +57 -12
- data/lib/sitediff/exception.rb +5 -0
- data/lib/sitediff/fetch.rb +76 -0
- data/lib/sitediff/files/diff.html.erb +20 -2
- data/lib/sitediff/files/jquery.min.js +2 -0
- data/lib/sitediff/files/normalize.css +349 -0
- data/lib/sitediff/files/report.html.erb +144 -0
- data/lib/sitediff/files/sidebyside.html.erb +16 -0
- data/lib/sitediff/files/sitediff.css +236 -29
- data/lib/sitediff/files/sitediff.js +176 -0
- data/lib/sitediff/report.rb +238 -0
- data/lib/sitediff/result.rb +63 -26
- data/lib/sitediff/sanitize.rb +160 -141
- data/lib/sitediff/sanitize/dom_transform.rb +130 -0
- data/lib/sitediff/sanitize/regexp.rb +82 -0
- data/lib/sitediff/uriwrapper.rb +114 -35
- data/lib/sitediff/webserver.rb +94 -0
- data/lib/sitediff/webserver/resultserver.rb +134 -0
- metadata +103 -43
- data/lib/sitediff/files/html_report.html.erb +0 -47
- data/lib/sitediff/util/cache.rb +0 -32
- data/lib/sitediff/util/webserver.rb +0 -77
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1b7854497b5e81f48d810acec8106cbc66e33492d046e032e5516f76db26f142
|
4
|
+
data.tar.gz: a9349a79953237dd017600d49d38b8e734afc561f0ce09a1f8732e0e933530c9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e91f665f3c59b8a65f16c6942ec49b9cc9ee7fd12b0a777eadb844a0b9819ab1fd9485495bf2c757ca7342a6198dcccb5ae546c4ddf2682f234d015b64309b2
|
7
|
+
data.tar.gz: 64b7980bbbade8710b6069af19a67083678c2bd5fa99674df3360c1c6a3ddf8a15de7c5be4e8349ec298fc1c0dc27535b816089cd4f8852b8c8633861d72a178
|
data/bin/sitediff
CHANGED
@@ -1,10 +1,16 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
# when run as gem, $0 is /usr/local/bin/sitediff not this file
|
4
|
-
if $
|
5
|
-
$LOAD_PATH.unshift File.expand_path('
|
5
|
+
if $PROGRAM_NAME == __FILE__
|
6
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __dir__)
|
6
7
|
end
|
7
8
|
|
8
|
-
require 'sitediff'
|
9
|
+
require 'sitediff/cli'
|
9
10
|
|
10
|
-
|
11
|
+
begin
|
12
|
+
SiteDiff::Cli.start
|
13
|
+
rescue Interrupt
|
14
|
+
puts("\n")
|
15
|
+
SiteDiff.log('Stopping. Interrupted by user.')
|
16
|
+
end
|
data/lib/sitediff.rb
CHANGED
@@ -1,130 +1,218 @@
|
|
1
1
|
#!/bin/env ruby
|
2
|
-
|
3
|
-
|
4
|
-
require 'sitediff/
|
5
|
-
require 'sitediff/
|
6
|
-
require 'sitediff/
|
7
|
-
require '
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'sitediff/config'
|
5
|
+
require 'sitediff/diff'
|
6
|
+
require 'sitediff/fetch'
|
7
|
+
require 'sitediff/result'
|
8
|
+
require 'sitediff/report'
|
9
|
+
require 'pathname'
|
8
10
|
require 'rainbow'
|
11
|
+
require 'rubygems'
|
12
|
+
require 'yaml'
|
9
13
|
|
14
|
+
# SiteDiff Object.
|
10
15
|
class SiteDiff
|
11
|
-
|
16
|
+
attr_reader :config, :results
|
17
|
+
|
18
|
+
# SiteDiff installation directory.
|
19
|
+
ROOT_DIR = File.dirname(File.dirname(__FILE__))
|
20
|
+
|
21
|
+
# Path to misc files. Ex: *.erb, *.css.
|
12
22
|
FILES_DIR = File.join(File.dirname(__FILE__), 'sitediff', 'files')
|
13
23
|
|
14
|
-
#
|
15
|
-
|
16
|
-
|
17
|
-
#
|
18
|
-
#
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
when :failure
|
27
|
-
bg = :red
|
28
|
-
when :error
|
29
|
-
bg = :yellow
|
24
|
+
# Logs a message.
|
25
|
+
#
|
26
|
+
# Label will be colorized and message will not.
|
27
|
+
# Type dictates the color: can be :success, :error, or :failure.
|
28
|
+
#
|
29
|
+
# TODO: Only print :debug messages in debug mode.
|
30
|
+
def self.log(message, type = :info, label = nil)
|
31
|
+
# Prepare label.
|
32
|
+
label ||= type unless type == :info
|
33
|
+
label = label.to_s
|
34
|
+
unless label.empty?
|
35
|
+
# Colorize label.
|
30
36
|
fg = :black
|
37
|
+
bg = :blue
|
38
|
+
|
39
|
+
case type
|
40
|
+
when :info
|
41
|
+
bg = :cyan
|
42
|
+
when :success
|
43
|
+
bg = :green
|
44
|
+
when :error
|
45
|
+
bg = :red
|
46
|
+
when :warning
|
47
|
+
bg = :yellow
|
48
|
+
end
|
49
|
+
|
50
|
+
label = '[' + label.to_s + ']'
|
51
|
+
label = Rainbow(label)
|
52
|
+
label = label.bg(bg) if bg
|
53
|
+
label = label.fg(fg) if fg
|
54
|
+
|
55
|
+
# Add a space after the label.
|
56
|
+
label += ' '
|
31
57
|
end
|
32
|
-
|
33
|
-
|
34
|
-
label = label.fg(fg) if fg
|
35
|
-
puts label + ' ' + str
|
58
|
+
|
59
|
+
puts label + message
|
36
60
|
end
|
37
61
|
|
38
|
-
|
62
|
+
##
|
63
|
+
# Returns the "before" site's URL.
|
64
|
+
#
|
65
|
+
# TODO: Remove in favor of config.before_url.
|
39
66
|
def before
|
40
67
|
@config.before['url']
|
41
68
|
end
|
69
|
+
|
70
|
+
##
|
71
|
+
# Returns the "after" site's URL.
|
72
|
+
#
|
73
|
+
# TODO: Remove in favor of config.after_url.
|
42
74
|
def after
|
43
75
|
@config.after['url']
|
44
76
|
end
|
45
77
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
78
|
+
# Initialize SiteDiff.
|
79
|
+
def initialize(config, cache, verbose = true, debug = false)
|
80
|
+
@cache = cache
|
81
|
+
@verbose = verbose
|
82
|
+
@debug = debug
|
83
|
+
|
84
|
+
# Check for single-site mode
|
85
|
+
validate_opts = {}
|
86
|
+
if !config.before['url'] && @cache.tag?(:before)
|
87
|
+
unless @cache.read_tags.include?(:before)
|
88
|
+
raise SiteDiffException,
|
89
|
+
"A cached 'before' is required for single-site mode"
|
90
|
+
end
|
91
|
+
validate_opts[:need_before] = false
|
54
92
|
end
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
config.validate
|
93
|
+
config.validate(validate_opts)
|
94
|
+
# Configure diff.
|
95
|
+
Diff.diff_config(config)
|
59
96
|
@config = config
|
60
|
-
self.cache = cache
|
61
97
|
end
|
62
98
|
|
63
|
-
# Sanitize
|
64
|
-
def sanitize(
|
65
|
-
|
99
|
+
# Sanitize HTML.
|
100
|
+
def sanitize(path, read_results)
|
101
|
+
%i[before after].map do |tag|
|
102
|
+
html = read_results[tag].content
|
103
|
+
# TODO: See why encoding is empty while running tests.
|
104
|
+
#
|
105
|
+
# The presence of an "encoding" value used to be used to determine
|
106
|
+
# if the sanitizer would be called. However, encoding turns up blank
|
107
|
+
# during rspec tests for some reason.
|
108
|
+
encoding = read_results[tag].encoding
|
109
|
+
if encoding || html.length.positive?
|
110
|
+
section = @config.send(tag, true)
|
111
|
+
Sanitizer.new(html, section, path: path).sanitize
|
112
|
+
else
|
113
|
+
html
|
114
|
+
end
|
115
|
+
end
|
66
116
|
end
|
67
117
|
|
68
|
-
|
118
|
+
##
|
119
|
+
# Process a set of read results.
|
69
120
|
#
|
70
|
-
#
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
end
|
89
|
-
diff.log
|
90
|
-
@results[path] = diff
|
121
|
+
# This is the callback that processes items fetched by the Fetcher.
|
122
|
+
def process_results(path, read_results)
|
123
|
+
error = (read_results[:before].error || read_results[:after].error)
|
124
|
+
if error
|
125
|
+
diff = Result.new(path, nil, nil, nil, nil, error)
|
126
|
+
else
|
127
|
+
begin
|
128
|
+
diff = Result.new(
|
129
|
+
path,
|
130
|
+
*sanitize(path, read_results),
|
131
|
+
read_results[:before].encoding,
|
132
|
+
read_results[:after].encoding,
|
133
|
+
nil
|
134
|
+
)
|
135
|
+
rescue StandardError => e
|
136
|
+
raise if @debug
|
137
|
+
|
138
|
+
Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
|
91
139
|
end
|
92
140
|
end
|
141
|
+
@results[path] = diff
|
142
|
+
|
143
|
+
# Print results in order!
|
144
|
+
while (next_diff = @results[@ordered.first])
|
145
|
+
next_diff.log(@verbose)
|
146
|
+
@ordered.shift
|
147
|
+
end
|
93
148
|
end
|
94
149
|
|
95
|
-
|
150
|
+
##
|
151
|
+
# Compute diff as per config.
|
152
|
+
#
|
153
|
+
# @return [Integer]
|
154
|
+
# Number of paths which have diffs.
|
96
155
|
def run
|
97
|
-
# Map of path -> Result object,
|
156
|
+
# Map of path -> Result object, populated by process_results
|
98
157
|
@results = {}
|
158
|
+
@ordered = @config.paths.dup
|
99
159
|
|
100
|
-
|
101
|
-
|
102
|
-
|
160
|
+
unless @cache.read_tags.empty?
|
161
|
+
SiteDiff.log('Using sites from cache: ' + @cache.read_tags.sort.join(', '))
|
162
|
+
end
|
163
|
+
|
164
|
+
# TODO: Fix this after config merge refactor!
|
165
|
+
# Not quite right. We are not passing @config.before or @config.after
|
166
|
+
# so passing this instead but @config.after['curl_opts'] is ignored.
|
167
|
+
curl_opts = @config.setting :curl_opts
|
168
|
+
config_curl_opts = @config.before['curl_opts']
|
169
|
+
curl_opts = config_curl_opts.clone.merge(curl_opts) if config_curl_opts
|
170
|
+
fetcher = Fetch.new(
|
171
|
+
@cache,
|
172
|
+
@config.paths,
|
173
|
+
@config.setting(:interval),
|
174
|
+
@config.setting(:concurrency),
|
175
|
+
curl_opts,
|
176
|
+
@debug,
|
177
|
+
before: @config.before_url,
|
178
|
+
after: @config.after_url
|
179
|
+
)
|
180
|
+
|
181
|
+
# Run the Fetcher with "process results" as a callback.
|
182
|
+
fetcher.run(&method(:process_results))
|
103
183
|
|
104
184
|
# Order by original path order
|
105
|
-
@results = @config.paths.map { |
|
185
|
+
@results = @config.paths.map { |path| @results[path] }
|
186
|
+
results.map { |r| r unless r.success? }.compact.length
|
106
187
|
end
|
107
188
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
diff_dir = File.join(dir, DIFFS_DIR)
|
116
|
-
FileUtils.rm_rf(diff_dir)
|
117
|
-
results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
|
118
|
-
SiteDiff::log "All diff files were dumped inside #{dir}"
|
119
|
-
|
120
|
-
# store failing paths
|
121
|
-
SiteDiff::log "Writing failures to #{failing_paths}"
|
122
|
-
File.open(failing_paths, 'w') do |f|
|
123
|
-
results.each { |r| f.puts r.path unless r.success? }
|
189
|
+
##
|
190
|
+
# Get a reporter object to help with report generation.
|
191
|
+
def report
|
192
|
+
if @results.nil?
|
193
|
+
raise SiteDiffException(
|
194
|
+
'No results detected. Run SiteDiff.run before SiteDiff.report.'
|
195
|
+
)
|
124
196
|
end
|
125
197
|
|
126
|
-
|
127
|
-
|
128
|
-
|
198
|
+
Report.new(@config, @cache, @results)
|
199
|
+
end
|
200
|
+
|
201
|
+
##
|
202
|
+
# Get SiteDiff gemspec.
|
203
|
+
def self.gemspec
|
204
|
+
file = ROOT_DIR + '/sitediff.gemspec'
|
205
|
+
Gem::Specification.load(file)
|
206
|
+
end
|
207
|
+
|
208
|
+
##
|
209
|
+
# Ensures that a directory exists and returns a Pathname for it.
|
210
|
+
#
|
211
|
+
# @param [String] dir
|
212
|
+
# path/to/directory
|
213
|
+
def self.ensure_dir(dir)
|
214
|
+
dir = Pathname.new(dir) unless dir.is_a? Pathname
|
215
|
+
dir.mkpath unless dir.directory?
|
216
|
+
dir
|
129
217
|
end
|
130
218
|
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'set'
|
4
|
+
require 'fileutils'
|
5
|
+
|
6
|
+
class SiteDiff
|
7
|
+
# SiteDiff Cache Handler.
|
8
|
+
class Cache
|
9
|
+
attr_accessor :read_tags, :write_tags
|
10
|
+
|
11
|
+
##
|
12
|
+
# Creates a Cache object.
|
13
|
+
def initialize(opts = {})
|
14
|
+
@create = opts[:create]
|
15
|
+
|
16
|
+
# Read and Write tags are sets that can contain :before and :after.
|
17
|
+
# They indicate whether we should use the cache for reading or writing.
|
18
|
+
@read_tags = Set.new
|
19
|
+
@write_tags = Set.new
|
20
|
+
|
21
|
+
# The directory used by the cache for storage.
|
22
|
+
@dir = opts[:directory] || '.'
|
23
|
+
end
|
24
|
+
|
25
|
+
##
|
26
|
+
# Is a tag cached?
|
27
|
+
# TODO: Rename it to is_cached? as it makes more sense.
|
28
|
+
def tag?(tag)
|
29
|
+
File.directory?(File.join(@dir, 'snapshot', tag.to_s))
|
30
|
+
end
|
31
|
+
|
32
|
+
##
|
33
|
+
# Get data from cache.
|
34
|
+
def get(tag, path)
|
35
|
+
return nil unless @read_tags.include? tag
|
36
|
+
|
37
|
+
filename = File.join(
|
38
|
+
@dir,
|
39
|
+
'snapshot',
|
40
|
+
tag.to_s,
|
41
|
+
*path.split(File::SEPARATOR)
|
42
|
+
)
|
43
|
+
|
44
|
+
filename = File.join(filename, 'index.html') if File.directory?(filename)
|
45
|
+
return nil unless File.file? filename
|
46
|
+
|
47
|
+
Marshal.load(File.read(filename))
|
48
|
+
end
|
49
|
+
|
50
|
+
##
|
51
|
+
# Set data to cache.
|
52
|
+
def set(tag, path, result)
|
53
|
+
return unless @write_tags.include? tag
|
54
|
+
|
55
|
+
filename = File.join(
|
56
|
+
@dir,
|
57
|
+
'snapshot',
|
58
|
+
tag.to_s,
|
59
|
+
*path.split(File::SEPARATOR)
|
60
|
+
)
|
61
|
+
|
62
|
+
filename = File.join(filename, 'index.html') if File.directory?(filename)
|
63
|
+
filepath = Pathname.new(filename)
|
64
|
+
unless filepath.dirname.directory?
|
65
|
+
begin
|
66
|
+
filepath.dirname.mkpath
|
67
|
+
rescue Errno::EEXIST
|
68
|
+
curdir = filepath
|
69
|
+
curdir = curdir.parent until curdir.exist?
|
70
|
+
tempname = curdir.dirname + (curdir.basename.to_s + '.temporary')
|
71
|
+
# May cause problems if action is not atomic!
|
72
|
+
# Move existing file to dir/index.html first
|
73
|
+
# Not robust! Should generate an UUID or something.
|
74
|
+
if File.exist?(tempname)
|
75
|
+
SiteDiff.log "Overwriting file #{tempname}", :warning
|
76
|
+
end
|
77
|
+
curdir.rename(tempname)
|
78
|
+
filepath.dirname.mkpath
|
79
|
+
# Should only happen in strange situations such as when the path
|
80
|
+
# is foo/index.html/bar (i.e., index.html is a directory)
|
81
|
+
if (curdir + 'index.html').exist?
|
82
|
+
SiteDiff.log "Overwriting file #{tempname}", :warning
|
83
|
+
end
|
84
|
+
tempname.rename(curdir + 'index.html')
|
85
|
+
end
|
86
|
+
end
|
87
|
+
File.open(filename, 'w') { |file| file.write(Marshal.dump(result)) }
|
88
|
+
end
|
89
|
+
|
90
|
+
##
|
91
|
+
# TODO: Document this or remove it if unused.
|
92
|
+
def key(tag, path)
|
93
|
+
# Ensure encoding stays the same!
|
94
|
+
Marshal.dump([tag, path.encode('UTF-8')])
|
95
|
+
end
|
96
|
+
|
97
|
+
##
|
98
|
+
# Ensures that a directory exists.
|
99
|
+
def get_dir(directory)
|
100
|
+
# Create the dir. Must go before cache initialization!
|
101
|
+
@dir = Pathname.new(directory || '.')
|
102
|
+
@dir.mkpath unless @dir.directory?
|
103
|
+
@dir.to_s
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|