sitediff 0.0.2 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/sitediff +9 -3
- data/lib/sitediff.rb +153 -79
- data/lib/sitediff/api.rb +265 -0
- data/lib/sitediff/cache.rb +110 -47
- data/lib/sitediff/cli.rb +219 -165
- data/lib/sitediff/config.rb +439 -58
- data/lib/sitediff/config/creator.rb +93 -99
- data/lib/sitediff/config/preset.rb +75 -0
- data/lib/sitediff/crawler.rb +108 -72
- data/lib/sitediff/diff.rb +60 -12
- data/lib/sitediff/exception.rb +3 -1
- data/lib/sitediff/fetch.rb +62 -41
- data/lib/sitediff/files/diff.html.erb +20 -2
- data/lib/sitediff/files/jquery.min.js +2 -0
- data/lib/sitediff/files/normalize.css +349 -0
- data/lib/sitediff/files/report.html.erb +171 -0
- data/lib/sitediff/files/sidebyside.html.erb +5 -2
- data/lib/sitediff/files/sitediff.css +303 -30
- data/lib/sitediff/files/sitediff.js +367 -0
- data/lib/sitediff/report.rb +254 -0
- data/lib/sitediff/result.rb +59 -23
- data/lib/sitediff/sanitize.rb +222 -150
- data/lib/sitediff/sanitize/dom_transform.rb +111 -73
- data/lib/sitediff/sanitize/regexp.rb +69 -43
- data/lib/sitediff/uriwrapper.rb +104 -34
- data/lib/sitediff/webserver.rb +89 -77
- data/lib/sitediff/webserver/resultserver.rb +113 -77
- metadata +92 -76
- data/lib/sitediff/files/html_report.html.erb +0 -63
- data/lib/sitediff/files/rules/drupal.yaml +0 -33
- data/lib/sitediff/rules.rb +0 -65
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f75892f718764c8fd2c18d7f3f7e7cf8908d60ea07c2a765510c8ef409b9f0c1
|
4
|
+
data.tar.gz: 3b3744eca0dda04821152aab596fb67891204a1599b4db72e13b4af484693e65
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 97e9098b290742f1b3efe3c284e9392be95ffd0f7576df413a6ec612142b0573acf8b8b4d43369961c154d801db6284fcc1a8d69cea7da8ed99b64a0a1f1af75
|
7
|
+
data.tar.gz: c4b0e93bc4e0acb3d675c8d675d8f6235035aae72421794495f25223cb086eaa4c87d2cde63caa0eda257b0d91f374a0efbbb416ef8ee88c2f0ffde89a608831
|
data/bin/sitediff
CHANGED
@@ -1,10 +1,16 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
# when run as gem, $0 is /usr/local/bin/sitediff not this file
|
4
|
-
if $
|
5
|
-
$LOAD_PATH.unshift File.expand_path('
|
5
|
+
if $PROGRAM_NAME == __FILE__
|
6
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __dir__)
|
6
7
|
end
|
7
8
|
|
8
9
|
require 'sitediff/cli'
|
9
10
|
|
10
|
-
|
11
|
+
begin
|
12
|
+
SiteDiff::Cli.start
|
13
|
+
rescue Interrupt
|
14
|
+
puts("\n")
|
15
|
+
SiteDiff.log('Stopping. Interrupted by user.')
|
16
|
+
end
|
data/lib/sitediff.rb
CHANGED
@@ -1,146 +1,220 @@
|
|
1
1
|
#!/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
require 'sitediff/config'
|
5
|
+
require 'sitediff/diff'
|
3
6
|
require 'sitediff/fetch'
|
4
7
|
require 'sitediff/result'
|
8
|
+
require 'sitediff/report'
|
5
9
|
require 'pathname'
|
6
10
|
require 'rainbow'
|
11
|
+
require 'rubygems'
|
7
12
|
require 'yaml'
|
8
13
|
|
14
|
+
# SiteDiff Object.
|
9
15
|
class SiteDiff
|
10
|
-
|
16
|
+
attr_reader :config, :results
|
17
|
+
|
18
|
+
# SiteDiff installation directory.
|
19
|
+
ROOT_DIR = File.dirname(File.dirname(__FILE__))
|
20
|
+
|
21
|
+
# Path to misc files. Ex: *.erb, *.css.
|
11
22
|
FILES_DIR = File.join(File.dirname(__FILE__), 'sitediff', 'files')
|
12
23
|
|
13
|
-
#
|
14
|
-
|
15
|
-
|
16
|
-
#
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
bg = fg = nil
|
26
|
-
case type
|
27
|
-
when :info
|
28
|
-
when :diff_success
|
29
|
-
bg = :green
|
24
|
+
# Logs a message.
|
25
|
+
#
|
26
|
+
# Label will be colorized and message will not.
|
27
|
+
# Type dictates the color: can be :success, :error, or :failure.
|
28
|
+
#
|
29
|
+
# TODO: Only print :debug messages in debug mode.
|
30
|
+
def self.log(message, type = :info, label = nil)
|
31
|
+
# Prepare label.
|
32
|
+
label ||= type unless type == :info
|
33
|
+
label = label.to_s
|
34
|
+
unless label.empty?
|
35
|
+
# Colorize label.
|
30
36
|
fg = :black
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
37
|
+
bg = :blue
|
38
|
+
|
39
|
+
case type
|
40
|
+
when :info
|
41
|
+
bg = :cyan
|
42
|
+
when :success
|
43
|
+
bg = :green
|
44
|
+
when :error
|
45
|
+
bg = :red
|
46
|
+
when :warning
|
47
|
+
bg = :yellow
|
48
|
+
end
|
49
|
+
|
50
|
+
label = '[' + label.to_s + ']'
|
51
|
+
label = Rainbow(label)
|
52
|
+
label = label.bg(bg) if bg
|
53
|
+
label = label.fg(fg) if fg
|
54
|
+
|
55
|
+
# Add a space after the label.
|
56
|
+
label += ' '
|
38
57
|
end
|
39
|
-
|
40
|
-
|
41
|
-
label = label.fg(fg) if fg
|
42
|
-
puts label + ' ' + str
|
58
|
+
|
59
|
+
puts label + message
|
43
60
|
end
|
44
61
|
|
45
|
-
|
62
|
+
##
|
63
|
+
# Returns the "before" site's URL.
|
64
|
+
#
|
65
|
+
# TODO: Remove in favor of config.before_url.
|
46
66
|
def before
|
47
67
|
@config.before['url']
|
48
68
|
end
|
69
|
+
|
70
|
+
##
|
71
|
+
# Returns the "after" site's URL.
|
72
|
+
#
|
73
|
+
# TODO: Remove in favor of config.after_url.
|
49
74
|
def after
|
50
75
|
@config.after['url']
|
51
76
|
end
|
52
77
|
|
53
|
-
|
78
|
+
# Initialize SiteDiff.
|
79
|
+
def initialize(config, cache, verbose = true, debug = false)
|
54
80
|
@cache = cache
|
55
81
|
@verbose = verbose
|
82
|
+
@debug = debug
|
56
83
|
|
57
84
|
# Check for single-site mode
|
58
85
|
validate_opts = {}
|
59
86
|
if !config.before['url'] && @cache.tag?(:before)
|
60
|
-
|
61
|
-
|
62
|
-
|
87
|
+
unless @cache.read_tags.include?(:before)
|
88
|
+
raise SiteDiffException,
|
89
|
+
"A cached 'before' is required for single-site mode"
|
90
|
+
end
|
63
91
|
validate_opts[:need_before] = false
|
64
92
|
end
|
65
93
|
config.validate(validate_opts)
|
66
|
-
|
94
|
+
# Configure diff.
|
95
|
+
Diff.diff_config(config)
|
67
96
|
@config = config
|
68
97
|
end
|
69
98
|
|
70
|
-
# Sanitize HTML
|
99
|
+
# Sanitize HTML.
|
71
100
|
def sanitize(path, read_results)
|
72
|
-
[
|
101
|
+
%i[before after].map do |tag|
|
73
102
|
html = read_results[tag].content
|
74
|
-
|
75
|
-
|
103
|
+
# TODO: See why encoding is empty while running tests.
|
104
|
+
#
|
105
|
+
# The presence of an "encoding" value used to be used to determine
|
106
|
+
# if the sanitizer would be called. However, encoding turns up blank
|
107
|
+
# during rspec tests for some reason.
|
108
|
+
encoding = read_results[tag].encoding
|
109
|
+
if encoding || html.length.positive?
|
110
|
+
section = @config.send(tag, true)
|
111
|
+
opts = { path: path }
|
112
|
+
opts[:output] = @config.output if @config.output
|
113
|
+
Sanitizer.new(html, section, opts).sanitize
|
114
|
+
else
|
115
|
+
html
|
116
|
+
end
|
76
117
|
end
|
77
118
|
end
|
78
119
|
|
79
|
-
|
120
|
+
##
|
121
|
+
# Process a set of read results.
|
122
|
+
#
|
123
|
+
# This is the callback that processes items fetched by the Fetcher.
|
80
124
|
def process_results(path, read_results)
|
81
|
-
|
82
|
-
|
125
|
+
error = (read_results[:before].error || read_results[:after].error)
|
126
|
+
if error
|
127
|
+
diff = Result.new(path, nil, nil, nil, nil, error)
|
83
128
|
else
|
84
|
-
|
129
|
+
begin
|
130
|
+
diff = Result.new(
|
131
|
+
path,
|
132
|
+
*sanitize(path, read_results),
|
133
|
+
read_results[:before].encoding,
|
134
|
+
read_results[:after].encoding,
|
135
|
+
nil
|
136
|
+
)
|
137
|
+
rescue StandardError => e
|
138
|
+
raise if @debug
|
139
|
+
|
140
|
+
Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
|
141
|
+
end
|
85
142
|
end
|
86
143
|
@results[path] = diff
|
87
144
|
|
88
145
|
# Print results in order!
|
89
|
-
while next_diff = @results[@ordered.first]
|
146
|
+
while (next_diff = @results[@ordered.first])
|
90
147
|
next_diff.log(@verbose)
|
91
148
|
@ordered.shift
|
92
149
|
end
|
93
150
|
end
|
94
151
|
|
95
|
-
|
96
|
-
#
|
152
|
+
##
|
153
|
+
# Compute diff as per config.
|
154
|
+
#
|
155
|
+
# @return [Integer]
|
156
|
+
# Number of paths which have diffs.
|
97
157
|
def run
|
98
158
|
# Map of path -> Result object, populated by process_results
|
99
159
|
@results = {}
|
100
160
|
@ordered = @config.paths.dup
|
101
161
|
|
102
162
|
unless @cache.read_tags.empty?
|
103
|
-
SiteDiff.log(
|
104
|
-
@cache.read_tags.sort.join(', '))
|
163
|
+
SiteDiff.log('Using sites from cache: ' + @cache.read_tags.sort.join(', '))
|
105
164
|
end
|
106
165
|
|
107
|
-
|
108
|
-
|
109
|
-
|
166
|
+
# TODO: Fix this after config merge refactor!
|
167
|
+
# Not quite right. We are not passing @config.before or @config.after
|
168
|
+
# so passing this instead but @config.after['curl_opts'] is ignored.
|
169
|
+
curl_opts = @config.setting :curl_opts
|
170
|
+
config_curl_opts = @config.before['curl_opts']
|
171
|
+
curl_opts = config_curl_opts.clone.merge(curl_opts) if config_curl_opts
|
172
|
+
fetcher = Fetch.new(
|
173
|
+
@cache,
|
174
|
+
@config.paths,
|
175
|
+
@config.setting(:interval),
|
176
|
+
@config.setting(:concurrency),
|
177
|
+
curl_opts,
|
178
|
+
@debug,
|
179
|
+
before: @config.before_url,
|
180
|
+
after: @config.after_url
|
181
|
+
)
|
182
|
+
|
183
|
+
# Run the Fetcher with "process results" as a callback.
|
184
|
+
fetcher.run(&method(:process_results))
|
110
185
|
|
111
186
|
# Order by original path order
|
112
|
-
@results = @config.paths.map { |
|
113
|
-
|
187
|
+
@results = @config.paths.map { |path| @results[path] }
|
188
|
+
results.map { |r| r unless r.success? }.compact.length
|
114
189
|
end
|
115
190
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
# store diffs of each failing case, first wipe out existing diffs
|
124
|
-
diff_dir = dir + DIFFS_DIR
|
125
|
-
diff_dir.rmtree if diff_dir.exist?
|
126
|
-
results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
|
127
|
-
SiteDiff::log "All diff files were dumped inside #{dir.expand_path}"
|
128
|
-
|
129
|
-
# store failing paths
|
130
|
-
failures = dir + FAILURES_FILE
|
131
|
-
SiteDiff::log "Writing failures to #{failures.expand_path}"
|
132
|
-
failures.open('w') do |f|
|
133
|
-
results.each { |r| f.puts r.path unless r.success? }
|
191
|
+
##
|
192
|
+
# Get a reporter object to help with report generation.
|
193
|
+
def report
|
194
|
+
if @results.nil?
|
195
|
+
raise SiteDiffException(
|
196
|
+
'No results detected. Run SiteDiff.run before SiteDiff.report.'
|
197
|
+
)
|
134
198
|
end
|
135
199
|
|
136
|
-
|
137
|
-
|
138
|
-
@cache)
|
139
|
-
dir.+(REPORT_FILE).open('w') { |f| f.write(report) }
|
200
|
+
Report.new(@config, @cache, @results)
|
201
|
+
end
|
140
202
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
203
|
+
##
|
204
|
+
# Get SiteDiff gemspec.
|
205
|
+
def self.gemspec
|
206
|
+
file = ROOT_DIR + '/sitediff.gemspec'
|
207
|
+
Gem::Specification.load(file)
|
208
|
+
end
|
209
|
+
|
210
|
+
##
|
211
|
+
# Ensures that a directory exists and returns a Pathname for it.
|
212
|
+
#
|
213
|
+
# @param [String] dir
|
214
|
+
# path/to/directory
|
215
|
+
def self.ensure_dir(dir)
|
216
|
+
dir = Pathname.new(dir) unless dir.is_a? Pathname
|
217
|
+
dir.mkpath unless dir.directory?
|
218
|
+
dir
|
145
219
|
end
|
146
220
|
end
|
data/lib/sitediff/api.rb
ADDED
@@ -0,0 +1,265 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'sitediff'
|
4
|
+
require 'sitediff/cache'
|
5
|
+
require 'sitediff/config'
|
6
|
+
require 'sitediff/config/creator'
|
7
|
+
require 'sitediff/config/preset'
|
8
|
+
require 'sitediff/fetch'
|
9
|
+
require 'sitediff/webserver/resultserver'
|
10
|
+
|
11
|
+
class SiteDiff
|
12
|
+
##
|
13
|
+
# Sitediff API interface.
|
14
|
+
class Api
|
15
|
+
##
|
16
|
+
# Initializes new Api object.
|
17
|
+
def initialize(directory, config_file = nil)
|
18
|
+
@dir = get_dir(directory)
|
19
|
+
@config = SiteDiff::Config.new(config_file, @dir)
|
20
|
+
end
|
21
|
+
|
22
|
+
##
|
23
|
+
# Intialize a SiteDiff project.
|
24
|
+
#
|
25
|
+
# Calling:
|
26
|
+
# SiteDiff::Api.init(
|
27
|
+
# depth: 3,
|
28
|
+
# directory: 'sitediff',
|
29
|
+
# concurrency: 3,
|
30
|
+
# interval: 0,
|
31
|
+
# include: nil,
|
32
|
+
# exclude: '*.pdf',
|
33
|
+
# preset: 'drupal',
|
34
|
+
# curl_opts: {timeout: 60},
|
35
|
+
# crawl: false
|
36
|
+
# )
|
37
|
+
def self.init(options)
|
38
|
+
# Prepare a config object and write it to the file system.
|
39
|
+
creator = SiteDiff::Config::Creator.new(options[:debug], options[:before_url], options[:after_url])
|
40
|
+
include_regex = Config.create_regexp(options[:include])
|
41
|
+
exclude_regex = Config.create_regexp(options[:exclude])
|
42
|
+
creator.create(
|
43
|
+
depth: options[:depth],
|
44
|
+
directory: options[:directory],
|
45
|
+
concurrency: options[:concurrency],
|
46
|
+
interval: options[:interval],
|
47
|
+
include: include_regex,
|
48
|
+
exclude: exclude_regex,
|
49
|
+
preset: options[:preset],
|
50
|
+
curl_opts: options[:curl_opts]
|
51
|
+
)
|
52
|
+
SiteDiff.log "Created #{creator.config_file.expand_path}", :success
|
53
|
+
|
54
|
+
# TODO: implement crawl ^^^
|
55
|
+
# Discover paths, if enabled.
|
56
|
+
# if options[:crawl]
|
57
|
+
# crawl(creator.config_file)
|
58
|
+
# SiteDiff.log 'You can now run "sitediff diff".', :success
|
59
|
+
# else
|
60
|
+
# SiteDiff.log 'Run "sitediff crawl" to discover paths. You should then be able to run "sitediff diff".', :info
|
61
|
+
# end
|
62
|
+
end
|
63
|
+
|
64
|
+
##
|
65
|
+
# Diff the `before` and `after`.
|
66
|
+
#
|
67
|
+
# Calling:
|
68
|
+
# Api.diff(
|
69
|
+
# paths: options['paths'],
|
70
|
+
# paths_file: options['paths-file'],
|
71
|
+
# ignore_whitespace: options['ignore-whitespace'],
|
72
|
+
# export: options['export'],
|
73
|
+
# before: options['before'],
|
74
|
+
# after: options['after'],
|
75
|
+
# cached: options['cached'],
|
76
|
+
# verbose: options['verbose'],
|
77
|
+
# report_format: options['report-format'],
|
78
|
+
# before_report: options['before-report'],
|
79
|
+
# after_report: options['after-report'],
|
80
|
+
# cli_mode: false
|
81
|
+
# )
|
82
|
+
def diff(options)
|
83
|
+
@config.ignore_whitespace = options[:ignore_whitespace]
|
84
|
+
@config.export = options[:export]
|
85
|
+
# Apply "paths" override, if any.
|
86
|
+
if options[:paths]
|
87
|
+
@config.paths = options[:paths]
|
88
|
+
else
|
89
|
+
paths_file = options[:paths_file]
|
90
|
+
paths_file ||= File.join(@dir, Config::DEFAULT_PATHS_FILENAME)
|
91
|
+
paths_file = File.expand_path(paths_file)
|
92
|
+
|
93
|
+
paths_count = @config.paths_file_read(paths_file)
|
94
|
+
SiteDiff.log "Read #{paths_count} paths from: #{paths_file}"
|
95
|
+
end
|
96
|
+
|
97
|
+
# TODO: Why do we allow before and after override during diff?
|
98
|
+
@config.before['url'] = options[:before] if options[:before]
|
99
|
+
@config.after['url'] = options[:after] if options[:after]
|
100
|
+
|
101
|
+
# Prepare cache.
|
102
|
+
cache = SiteDiff::Cache.new(
|
103
|
+
create: options[:cached] != 'none',
|
104
|
+
directory: @dir
|
105
|
+
)
|
106
|
+
cache.read_tags << :before if %w[before all].include?(options[:cached])
|
107
|
+
cache.read_tags << :after if %w[after all].include?(options[:cached])
|
108
|
+
cache.write_tags << :before << :after
|
109
|
+
|
110
|
+
# Run sitediff.
|
111
|
+
sitediff = SiteDiff.new(
|
112
|
+
@config,
|
113
|
+
cache,
|
114
|
+
options[:verbose],
|
115
|
+
options[:debug]
|
116
|
+
)
|
117
|
+
num_failing = sitediff.run
|
118
|
+
exit_code = num_failing.positive? ? 2 : 0
|
119
|
+
|
120
|
+
# Generate HTML report.
|
121
|
+
if options[:report_format] == 'html' || @config.export
|
122
|
+
sitediff.report.generate_html(
|
123
|
+
@dir,
|
124
|
+
options[:before_report],
|
125
|
+
options[:after_report]
|
126
|
+
)
|
127
|
+
end
|
128
|
+
|
129
|
+
# Generate JSON report.
|
130
|
+
if options[:report_format] == 'json' && @config.export == false
|
131
|
+
sitediff.report.generate_json @dir
|
132
|
+
end
|
133
|
+
|
134
|
+
SiteDiff.log 'Run "sitediff serve" to see a report.' unless options[:export]
|
135
|
+
rescue Config::InvalidConfig => e
|
136
|
+
SiteDiff.log "Invalid configuration: #{e.message}", :error
|
137
|
+
SiteDiff.log e.backtrace, :error if options[:verbose]
|
138
|
+
rescue Config::ConfigNotFound => e
|
139
|
+
SiteDiff.log "Invalid configuration: #{e.message}", :error
|
140
|
+
SiteDiff.log e.backtrace, :error if options[:verbose]
|
141
|
+
else # no exception was raised
|
142
|
+
# Thor::Error --> exit(1), guaranteed by exit_on_failure?
|
143
|
+
# Failing diff --> exit(2), populated above
|
144
|
+
exit(exit_code) if options[:cli_mode]
|
145
|
+
end
|
146
|
+
|
147
|
+
##
|
148
|
+
# Crawl the `before` site to determine `paths`.
|
149
|
+
def crawl
|
150
|
+
# Prepare cache.
|
151
|
+
@cache = SiteDiff::Cache.new(
|
152
|
+
create: true,
|
153
|
+
directory: @dir
|
154
|
+
)
|
155
|
+
@cache.write_tags << :before << :after
|
156
|
+
|
157
|
+
# Crawl with Hydra to discover paths.
|
158
|
+
hydra = Typhoeus::Hydra.new(
|
159
|
+
max_concurrency: @config.setting(:concurrency)
|
160
|
+
)
|
161
|
+
@paths = {}
|
162
|
+
@config.roots.each do |tag, url|
|
163
|
+
Crawler.new(
|
164
|
+
hydra,
|
165
|
+
url,
|
166
|
+
@config.setting(:interval),
|
167
|
+
@config.setting(:include),
|
168
|
+
@config.setting(:exclude),
|
169
|
+
@config.setting(:depth),
|
170
|
+
@config.curl_opts,
|
171
|
+
@debug
|
172
|
+
) do |info|
|
173
|
+
SiteDiff.log "Visited #{info.uri}, cached."
|
174
|
+
after_crawl(tag, info)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
hydra.run
|
178
|
+
|
179
|
+
# Write paths to a file.
|
180
|
+
@paths = @paths.values.reduce(&:|).to_a.sort
|
181
|
+
@config.paths_file_write(@paths)
|
182
|
+
|
183
|
+
# Log output.
|
184
|
+
file = Pathname.new(@dir) + Config::DEFAULT_PATHS_FILENAME
|
185
|
+
SiteDiff.log ''
|
186
|
+
SiteDiff.log "#{@paths.length} page(s) found."
|
187
|
+
SiteDiff.log "Created #{file.expand_path}.", :success, 'done'
|
188
|
+
end
|
189
|
+
|
190
|
+
##
|
191
|
+
# Serves SiteDiff report for accessing in the browser.
|
192
|
+
#
|
193
|
+
# Calling:
|
194
|
+
# api.serve(browse: true, port: 13080)
|
195
|
+
def serve(options)
|
196
|
+
@cache = Cache.new(directory: @dir)
|
197
|
+
@cache.read_tags << :before << :after
|
198
|
+
|
199
|
+
SiteDiff::Webserver::ResultServer.new(
|
200
|
+
options[:port],
|
201
|
+
@dir,
|
202
|
+
browse: options[:browse],
|
203
|
+
cache: @cache,
|
204
|
+
config: @config
|
205
|
+
).wait
|
206
|
+
rescue SiteDiffException => e
|
207
|
+
SiteDiff.log e.message, :error
|
208
|
+
SiteDiff.log e.backtrace, :error if options[:verbose]
|
209
|
+
end
|
210
|
+
|
211
|
+
##
|
212
|
+
#
|
213
|
+
def store(options)
|
214
|
+
# TODO: Figure out how to remove this config.validate call.
|
215
|
+
@config.validate(need_before: false)
|
216
|
+
@config.paths_file_read
|
217
|
+
|
218
|
+
@cache = SiteDiff::Cache.new(directory: @dir, create: true)
|
219
|
+
@cache.write_tags << :before
|
220
|
+
|
221
|
+
base = options[:url] || @config.after['url']
|
222
|
+
fetcher = SiteDiff::Fetch.new(@cache,
|
223
|
+
@config.paths,
|
224
|
+
@config.setting(:interval),
|
225
|
+
@config.setting(:concurrency),
|
226
|
+
get_curl_opts(@config.settings),
|
227
|
+
options[:debug],
|
228
|
+
before: base)
|
229
|
+
fetcher.run do |path, _res|
|
230
|
+
SiteDiff.log "Visited #{path}, cached"
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
private
|
235
|
+
|
236
|
+
##
|
237
|
+
# Ensures that the given directory exists.
|
238
|
+
def get_dir(directory)
|
239
|
+
# Create the dir. Must go before cache initialization!
|
240
|
+
@dir = Pathname.new(directory || '.')
|
241
|
+
@dir.mkpath unless @dir.directory?
|
242
|
+
@dir.to_s
|
243
|
+
end
|
244
|
+
|
245
|
+
##
|
246
|
+
# Processes a crawled path.
|
247
|
+
def after_crawl(tag, info)
|
248
|
+
path = UriWrapper.canonicalize(info.relative)
|
249
|
+
|
250
|
+
# Register the path.
|
251
|
+
@paths[tag] = [] unless @paths[tag]
|
252
|
+
@paths[tag] << path
|
253
|
+
|
254
|
+
result = info.read_result
|
255
|
+
|
256
|
+
# Write result to applicable cache.
|
257
|
+
@cache.set(tag, path, result)
|
258
|
+
# If single-site, cache "after" as "before".
|
259
|
+
@cache.set(:before, path, result) unless @config.roots[:before]
|
260
|
+
|
261
|
+
# TODO: Restore application of rules.
|
262
|
+
# @rules.handle_page(tag, res.content, info.document) if @rules && !res.error
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|