sitediff 0.0.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/bin/sitediff +10 -4
- data/lib/sitediff.rb +179 -91
- data/lib/sitediff/cache.rb +106 -0
- data/lib/sitediff/cli.rb +391 -60
- data/lib/sitediff/config.rb +383 -37
- data/lib/sitediff/config/creator.rb +114 -0
- data/lib/sitediff/config/preset.rb +75 -0
- data/lib/sitediff/crawler.rb +131 -0
- data/lib/sitediff/diff.rb +57 -12
- data/lib/sitediff/exception.rb +5 -0
- data/lib/sitediff/fetch.rb +76 -0
- data/lib/sitediff/files/diff.html.erb +20 -2
- data/lib/sitediff/files/jquery.min.js +2 -0
- data/lib/sitediff/files/normalize.css +349 -0
- data/lib/sitediff/files/report.html.erb +144 -0
- data/lib/sitediff/files/sidebyside.html.erb +16 -0
- data/lib/sitediff/files/sitediff.css +236 -29
- data/lib/sitediff/files/sitediff.js +176 -0
- data/lib/sitediff/report.rb +238 -0
- data/lib/sitediff/result.rb +63 -26
- data/lib/sitediff/sanitize.rb +160 -141
- data/lib/sitediff/sanitize/dom_transform.rb +130 -0
- data/lib/sitediff/sanitize/regexp.rb +82 -0
- data/lib/sitediff/uriwrapper.rb +114 -35
- data/lib/sitediff/webserver.rb +94 -0
- data/lib/sitediff/webserver/resultserver.rb +134 -0
- metadata +103 -43
- data/lib/sitediff/files/html_report.html.erb +0 -47
- data/lib/sitediff/util/cache.rb +0 -32
- data/lib/sitediff/util/webserver.rb +0 -77
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1b7854497b5e81f48d810acec8106cbc66e33492d046e032e5516f76db26f142
|
4
|
+
data.tar.gz: a9349a79953237dd017600d49d38b8e734afc561f0ce09a1f8732e0e933530c9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e91f665f3c59b8a65f16c6942ec49b9cc9ee7fd12b0a777eadb844a0b9819ab1fd9485495bf2c757ca7342a6198dcccb5ae546c4ddf2682f234d015b64309b2
|
7
|
+
data.tar.gz: 64b7980bbbade8710b6069af19a67083678c2bd5fa99674df3360c1c6a3ddf8a15de7c5be4e8349ec298fc1c0dc27535b816089cd4f8852b8c8633861d72a178
|
data/bin/sitediff
CHANGED
@@ -1,10 +1,16 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
# when run as gem, $0 is /usr/local/bin/sitediff not this file
|
4
|
-
if $
|
5
|
-
$LOAD_PATH.unshift File.expand_path('
|
5
|
+
if $PROGRAM_NAME == __FILE__
|
6
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __dir__)
|
6
7
|
end
|
7
8
|
|
8
|
-
require 'sitediff'
|
9
|
+
require 'sitediff/cli'
|
9
10
|
|
10
|
-
|
11
|
+
begin
|
12
|
+
SiteDiff::Cli.start
|
13
|
+
rescue Interrupt
|
14
|
+
puts("\n")
|
15
|
+
SiteDiff.log('Stopping. Interrupted by user.')
|
16
|
+
end
|
data/lib/sitediff.rb
CHANGED
@@ -1,130 +1,218 @@
|
|
1
1
|
#!/bin/env ruby
|
2
|
-
|
3
|
-
|
4
|
-
require 'sitediff/
|
5
|
-
require 'sitediff/
|
6
|
-
require 'sitediff/
|
7
|
-
require '
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'sitediff/config'
|
5
|
+
require 'sitediff/diff'
|
6
|
+
require 'sitediff/fetch'
|
7
|
+
require 'sitediff/result'
|
8
|
+
require 'sitediff/report'
|
9
|
+
require 'pathname'
|
8
10
|
require 'rainbow'
|
11
|
+
require 'rubygems'
|
12
|
+
require 'yaml'
|
9
13
|
|
14
|
+
# SiteDiff Object.
|
10
15
|
class SiteDiff
|
11
|
-
|
16
|
+
attr_reader :config, :results
|
17
|
+
|
18
|
+
# SiteDiff installation directory.
|
19
|
+
ROOT_DIR = File.dirname(File.dirname(__FILE__))
|
20
|
+
|
21
|
+
# Path to misc files. Ex: *.erb, *.css.
|
12
22
|
FILES_DIR = File.join(File.dirname(__FILE__), 'sitediff', 'files')
|
13
23
|
|
14
|
-
#
|
15
|
-
|
16
|
-
|
17
|
-
#
|
18
|
-
#
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
when :failure
|
27
|
-
bg = :red
|
28
|
-
when :error
|
29
|
-
bg = :yellow
|
24
|
+
# Logs a message.
|
25
|
+
#
|
26
|
+
# Label will be colorized and message will not.
|
27
|
+
# Type dictates the color: can be :success, :error, or :failure.
|
28
|
+
#
|
29
|
+
# TODO: Only print :debug messages in debug mode.
|
30
|
+
def self.log(message, type = :info, label = nil)
|
31
|
+
# Prepare label.
|
32
|
+
label ||= type unless type == :info
|
33
|
+
label = label.to_s
|
34
|
+
unless label.empty?
|
35
|
+
# Colorize label.
|
30
36
|
fg = :black
|
37
|
+
bg = :blue
|
38
|
+
|
39
|
+
case type
|
40
|
+
when :info
|
41
|
+
bg = :cyan
|
42
|
+
when :success
|
43
|
+
bg = :green
|
44
|
+
when :error
|
45
|
+
bg = :red
|
46
|
+
when :warning
|
47
|
+
bg = :yellow
|
48
|
+
end
|
49
|
+
|
50
|
+
label = '[' + label.to_s + ']'
|
51
|
+
label = Rainbow(label)
|
52
|
+
label = label.bg(bg) if bg
|
53
|
+
label = label.fg(fg) if fg
|
54
|
+
|
55
|
+
# Add a space after the label.
|
56
|
+
label += ' '
|
31
57
|
end
|
32
|
-
|
33
|
-
|
34
|
-
label = label.fg(fg) if fg
|
35
|
-
puts label + ' ' + str
|
58
|
+
|
59
|
+
puts label + message
|
36
60
|
end
|
37
61
|
|
38
|
-
|
62
|
+
##
|
63
|
+
# Returns the "before" site's URL.
|
64
|
+
#
|
65
|
+
# TODO: Remove in favor of config.before_url.
|
39
66
|
def before
|
40
67
|
@config.before['url']
|
41
68
|
end
|
69
|
+
|
70
|
+
##
|
71
|
+
# Returns the "after" site's URL.
|
72
|
+
#
|
73
|
+
# TODO: Remove in favor of config.after_url.
|
42
74
|
def after
|
43
75
|
@config.after['url']
|
44
76
|
end
|
45
77
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
78
|
+
# Initialize SiteDiff.
|
79
|
+
def initialize(config, cache, verbose = true, debug = false)
|
80
|
+
@cache = cache
|
81
|
+
@verbose = verbose
|
82
|
+
@debug = debug
|
83
|
+
|
84
|
+
# Check for single-site mode
|
85
|
+
validate_opts = {}
|
86
|
+
if !config.before['url'] && @cache.tag?(:before)
|
87
|
+
unless @cache.read_tags.include?(:before)
|
88
|
+
raise SiteDiffException,
|
89
|
+
"A cached 'before' is required for single-site mode"
|
90
|
+
end
|
91
|
+
validate_opts[:need_before] = false
|
54
92
|
end
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
config.validate
|
93
|
+
config.validate(validate_opts)
|
94
|
+
# Configure diff.
|
95
|
+
Diff.diff_config(config)
|
59
96
|
@config = config
|
60
|
-
self.cache = cache
|
61
97
|
end
|
62
98
|
|
63
|
-
# Sanitize
|
64
|
-
def sanitize(
|
65
|
-
|
99
|
+
# Sanitize HTML.
|
100
|
+
def sanitize(path, read_results)
|
101
|
+
%i[before after].map do |tag|
|
102
|
+
html = read_results[tag].content
|
103
|
+
# TODO: See why encoding is empty while running tests.
|
104
|
+
#
|
105
|
+
# The presence of an "encoding" value used to be used to determine
|
106
|
+
# if the sanitizer would be called. However, encoding turns up blank
|
107
|
+
# during rspec tests for some reason.
|
108
|
+
encoding = read_results[tag].encoding
|
109
|
+
if encoding || html.length.positive?
|
110
|
+
section = @config.send(tag, true)
|
111
|
+
Sanitizer.new(html, section, path: path).sanitize
|
112
|
+
else
|
113
|
+
html
|
114
|
+
end
|
115
|
+
end
|
66
116
|
end
|
67
117
|
|
68
|
-
|
118
|
+
##
|
119
|
+
# Process a set of read results.
|
69
120
|
#
|
70
|
-
#
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
end
|
89
|
-
diff.log
|
90
|
-
@results[path] = diff
|
121
|
+
# This is the callback that processes items fetched by the Fetcher.
|
122
|
+
def process_results(path, read_results)
|
123
|
+
error = (read_results[:before].error || read_results[:after].error)
|
124
|
+
if error
|
125
|
+
diff = Result.new(path, nil, nil, nil, nil, error)
|
126
|
+
else
|
127
|
+
begin
|
128
|
+
diff = Result.new(
|
129
|
+
path,
|
130
|
+
*sanitize(path, read_results),
|
131
|
+
read_results[:before].encoding,
|
132
|
+
read_results[:after].encoding,
|
133
|
+
nil
|
134
|
+
)
|
135
|
+
rescue StandardError => e
|
136
|
+
raise if @debug
|
137
|
+
|
138
|
+
Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
|
91
139
|
end
|
92
140
|
end
|
141
|
+
@results[path] = diff
|
142
|
+
|
143
|
+
# Print results in order!
|
144
|
+
while (next_diff = @results[@ordered.first])
|
145
|
+
next_diff.log(@verbose)
|
146
|
+
@ordered.shift
|
147
|
+
end
|
93
148
|
end
|
94
149
|
|
95
|
-
|
150
|
+
##
|
151
|
+
# Compute diff as per config.
|
152
|
+
#
|
153
|
+
# @return [Integer]
|
154
|
+
# Number of paths which have diffs.
|
96
155
|
def run
|
97
|
-
# Map of path -> Result object,
|
156
|
+
# Map of path -> Result object, populated by process_results
|
98
157
|
@results = {}
|
158
|
+
@ordered = @config.paths.dup
|
99
159
|
|
100
|
-
|
101
|
-
|
102
|
-
|
160
|
+
unless @cache.read_tags.empty?
|
161
|
+
SiteDiff.log('Using sites from cache: ' + @cache.read_tags.sort.join(', '))
|
162
|
+
end
|
163
|
+
|
164
|
+
# TODO: Fix this after config merge refactor!
|
165
|
+
# Not quite right. We are not passing @config.before or @config.after
|
166
|
+
# so passing this instead but @config.after['curl_opts'] is ignored.
|
167
|
+
curl_opts = @config.setting :curl_opts
|
168
|
+
config_curl_opts = @config.before['curl_opts']
|
169
|
+
curl_opts = config_curl_opts.clone.merge(curl_opts) if config_curl_opts
|
170
|
+
fetcher = Fetch.new(
|
171
|
+
@cache,
|
172
|
+
@config.paths,
|
173
|
+
@config.setting(:interval),
|
174
|
+
@config.setting(:concurrency),
|
175
|
+
curl_opts,
|
176
|
+
@debug,
|
177
|
+
before: @config.before_url,
|
178
|
+
after: @config.after_url
|
179
|
+
)
|
180
|
+
|
181
|
+
# Run the Fetcher with "process results" as a callback.
|
182
|
+
fetcher.run(&method(:process_results))
|
103
183
|
|
104
184
|
# Order by original path order
|
105
|
-
@results = @config.paths.map { |
|
185
|
+
@results = @config.paths.map { |path| @results[path] }
|
186
|
+
results.map { |r| r unless r.success? }.compact.length
|
106
187
|
end
|
107
188
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
diff_dir = File.join(dir, DIFFS_DIR)
|
116
|
-
FileUtils.rm_rf(diff_dir)
|
117
|
-
results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
|
118
|
-
SiteDiff::log "All diff files were dumped inside #{dir}"
|
119
|
-
|
120
|
-
# store failing paths
|
121
|
-
SiteDiff::log "Writing failures to #{failing_paths}"
|
122
|
-
File.open(failing_paths, 'w') do |f|
|
123
|
-
results.each { |r| f.puts r.path unless r.success? }
|
189
|
+
##
|
190
|
+
# Get a reporter object to help with report generation.
|
191
|
+
def report
|
192
|
+
if @results.nil?
|
193
|
+
raise SiteDiffException(
|
194
|
+
'No results detected. Run SiteDiff.run before SiteDiff.report.'
|
195
|
+
)
|
124
196
|
end
|
125
197
|
|
126
|
-
|
127
|
-
|
128
|
-
|
198
|
+
Report.new(@config, @cache, @results)
|
199
|
+
end
|
200
|
+
|
201
|
+
##
|
202
|
+
# Get SiteDiff gemspec.
|
203
|
+
def self.gemspec
|
204
|
+
file = ROOT_DIR + '/sitediff.gemspec'
|
205
|
+
Gem::Specification.load(file)
|
206
|
+
end
|
207
|
+
|
208
|
+
##
|
209
|
+
# Ensures that a directory exists and returns a Pathname for it.
|
210
|
+
#
|
211
|
+
# @param [String] dir
|
212
|
+
# path/to/directory
|
213
|
+
def self.ensure_dir(dir)
|
214
|
+
dir = Pathname.new(dir) unless dir.is_a? Pathname
|
215
|
+
dir.mkpath unless dir.directory?
|
216
|
+
dir
|
129
217
|
end
|
130
218
|
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'set'
|
4
|
+
require 'fileutils'
|
5
|
+
|
6
|
+
class SiteDiff
|
7
|
+
# SiteDiff Cache Handler.
|
8
|
+
class Cache
|
9
|
+
attr_accessor :read_tags, :write_tags
|
10
|
+
|
11
|
+
##
|
12
|
+
# Creates a Cache object.
|
13
|
+
def initialize(opts = {})
|
14
|
+
@create = opts[:create]
|
15
|
+
|
16
|
+
# Read and Write tags are sets that can contain :before and :after.
|
17
|
+
# They indicate whether we should use the cache for reading or writing.
|
18
|
+
@read_tags = Set.new
|
19
|
+
@write_tags = Set.new
|
20
|
+
|
21
|
+
# The directory used by the cache for storage.
|
22
|
+
@dir = opts[:directory] || '.'
|
23
|
+
end
|
24
|
+
|
25
|
+
##
|
26
|
+
# Is a tag cached?
|
27
|
+
# TODO: Rename it to is_cached? as it makes more sense.
|
28
|
+
def tag?(tag)
|
29
|
+
File.directory?(File.join(@dir, 'snapshot', tag.to_s))
|
30
|
+
end
|
31
|
+
|
32
|
+
##
|
33
|
+
# Get data from cache.
|
34
|
+
def get(tag, path)
|
35
|
+
return nil unless @read_tags.include? tag
|
36
|
+
|
37
|
+
filename = File.join(
|
38
|
+
@dir,
|
39
|
+
'snapshot',
|
40
|
+
tag.to_s,
|
41
|
+
*path.split(File::SEPARATOR)
|
42
|
+
)
|
43
|
+
|
44
|
+
filename = File.join(filename, 'index.html') if File.directory?(filename)
|
45
|
+
return nil unless File.file? filename
|
46
|
+
|
47
|
+
Marshal.load(File.read(filename))
|
48
|
+
end
|
49
|
+
|
50
|
+
##
|
51
|
+
# Set data to cache.
|
52
|
+
def set(tag, path, result)
|
53
|
+
return unless @write_tags.include? tag
|
54
|
+
|
55
|
+
filename = File.join(
|
56
|
+
@dir,
|
57
|
+
'snapshot',
|
58
|
+
tag.to_s,
|
59
|
+
*path.split(File::SEPARATOR)
|
60
|
+
)
|
61
|
+
|
62
|
+
filename = File.join(filename, 'index.html') if File.directory?(filename)
|
63
|
+
filepath = Pathname.new(filename)
|
64
|
+
unless filepath.dirname.directory?
|
65
|
+
begin
|
66
|
+
filepath.dirname.mkpath
|
67
|
+
rescue Errno::EEXIST
|
68
|
+
curdir = filepath
|
69
|
+
curdir = curdir.parent until curdir.exist?
|
70
|
+
tempname = curdir.dirname + (curdir.basename.to_s + '.temporary')
|
71
|
+
# May cause problems if action is not atomic!
|
72
|
+
# Move existing file to dir/index.html first
|
73
|
+
# Not robust! Should generate an UUID or something.
|
74
|
+
if File.exist?(tempname)
|
75
|
+
SiteDiff.log "Overwriting file #{tempname}", :warning
|
76
|
+
end
|
77
|
+
curdir.rename(tempname)
|
78
|
+
filepath.dirname.mkpath
|
79
|
+
# Should only happen in strange situations such as when the path
|
80
|
+
# is foo/index.html/bar (i.e., index.html is a directory)
|
81
|
+
if (curdir + 'index.html').exist?
|
82
|
+
SiteDiff.log "Overwriting file #{tempname}", :warning
|
83
|
+
end
|
84
|
+
tempname.rename(curdir + 'index.html')
|
85
|
+
end
|
86
|
+
end
|
87
|
+
File.open(filename, 'w') { |file| file.write(Marshal.dump(result)) }
|
88
|
+
end
|
89
|
+
|
90
|
+
##
|
91
|
+
# TODO: Document this or remove it if unused.
|
92
|
+
def key(tag, path)
|
93
|
+
# Ensure encoding stays the same!
|
94
|
+
Marshal.dump([tag, path.encode('UTF-8')])
|
95
|
+
end
|
96
|
+
|
97
|
+
##
|
98
|
+
# Ensures that a directory exists.
|
99
|
+
def get_dir(directory)
|
100
|
+
# Create the dir. Must go before cache initialization!
|
101
|
+
@dir = Pathname.new(directory || '.')
|
102
|
+
@dir.mkpath unless @dir.directory?
|
103
|
+
@dir.to_s
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|