sitediff 0.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/sitediff +9 -3
- data/lib/sitediff.rb +153 -79
- data/lib/sitediff/api.rb +265 -0
- data/lib/sitediff/cache.rb +110 -47
- data/lib/sitediff/cli.rb +219 -165
- data/lib/sitediff/config.rb +439 -58
- data/lib/sitediff/config/creator.rb +93 -99
- data/lib/sitediff/config/preset.rb +75 -0
- data/lib/sitediff/crawler.rb +108 -72
- data/lib/sitediff/diff.rb +60 -12
- data/lib/sitediff/exception.rb +3 -1
- data/lib/sitediff/fetch.rb +62 -41
- data/lib/sitediff/files/diff.html.erb +20 -2
- data/lib/sitediff/files/jquery.min.js +2 -0
- data/lib/sitediff/files/normalize.css +349 -0
- data/lib/sitediff/files/report.html.erb +171 -0
- data/lib/sitediff/files/sidebyside.html.erb +5 -2
- data/lib/sitediff/files/sitediff.css +303 -30
- data/lib/sitediff/files/sitediff.js +367 -0
- data/lib/sitediff/report.rb +254 -0
- data/lib/sitediff/result.rb +59 -23
- data/lib/sitediff/sanitize.rb +222 -150
- data/lib/sitediff/sanitize/dom_transform.rb +111 -73
- data/lib/sitediff/sanitize/regexp.rb +69 -43
- data/lib/sitediff/uriwrapper.rb +104 -34
- data/lib/sitediff/webserver.rb +89 -77
- data/lib/sitediff/webserver/resultserver.rb +113 -77
- metadata +92 -76
- data/lib/sitediff/files/html_report.html.erb +0 -63
- data/lib/sitediff/files/rules/drupal.yaml +0 -33
- data/lib/sitediff/rules.rb +0 -65
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f75892f718764c8fd2c18d7f3f7e7cf8908d60ea07c2a765510c8ef409b9f0c1
|
4
|
+
data.tar.gz: 3b3744eca0dda04821152aab596fb67891204a1599b4db72e13b4af484693e65
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 97e9098b290742f1b3efe3c284e9392be95ffd0f7576df413a6ec612142b0573acf8b8b4d43369961c154d801db6284fcc1a8d69cea7da8ed99b64a0a1f1af75
|
7
|
+
data.tar.gz: c4b0e93bc4e0acb3d675c8d675d8f6235035aae72421794495f25223cb086eaa4c87d2cde63caa0eda257b0d91f374a0efbbb416ef8ee88c2f0ffde89a608831
|
data/bin/sitediff
CHANGED
@@ -1,10 +1,16 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
# when run as gem, $0 is /usr/local/bin/sitediff not this file
|
4
|
-
if $
|
5
|
-
$LOAD_PATH.unshift File.expand_path('
|
5
|
+
if $PROGRAM_NAME == __FILE__
|
6
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __dir__)
|
6
7
|
end
|
7
8
|
|
8
9
|
require 'sitediff/cli'
|
9
10
|
|
10
|
-
|
11
|
+
begin
|
12
|
+
SiteDiff::Cli.start
|
13
|
+
rescue Interrupt
|
14
|
+
puts("\n")
|
15
|
+
SiteDiff.log('Stopping. Interrupted by user.')
|
16
|
+
end
|
data/lib/sitediff.rb
CHANGED
@@ -1,146 +1,220 @@
|
|
1
1
|
#!/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
require 'sitediff/config'
|
5
|
+
require 'sitediff/diff'
|
3
6
|
require 'sitediff/fetch'
|
4
7
|
require 'sitediff/result'
|
8
|
+
require 'sitediff/report'
|
5
9
|
require 'pathname'
|
6
10
|
require 'rainbow'
|
11
|
+
require 'rubygems'
|
7
12
|
require 'yaml'
|
8
13
|
|
14
|
+
# SiteDiff Object.
|
9
15
|
class SiteDiff
|
10
|
-
|
16
|
+
attr_reader :config, :results
|
17
|
+
|
18
|
+
# SiteDiff installation directory.
|
19
|
+
ROOT_DIR = File.dirname(File.dirname(__FILE__))
|
20
|
+
|
21
|
+
# Path to misc files. Ex: *.erb, *.css.
|
11
22
|
FILES_DIR = File.join(File.dirname(__FILE__), 'sitediff', 'files')
|
12
23
|
|
13
|
-
#
|
14
|
-
|
15
|
-
|
16
|
-
#
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
bg = fg = nil
|
26
|
-
case type
|
27
|
-
when :info
|
28
|
-
when :diff_success
|
29
|
-
bg = :green
|
24
|
+
# Logs a message.
|
25
|
+
#
|
26
|
+
# Label will be colorized and message will not.
|
27
|
+
# Type dictates the color: can be :success, :error, or :failure.
|
28
|
+
#
|
29
|
+
# TODO: Only print :debug messages in debug mode.
|
30
|
+
def self.log(message, type = :info, label = nil)
|
31
|
+
# Prepare label.
|
32
|
+
label ||= type unless type == :info
|
33
|
+
label = label.to_s
|
34
|
+
unless label.empty?
|
35
|
+
# Colorize label.
|
30
36
|
fg = :black
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
37
|
+
bg = :blue
|
38
|
+
|
39
|
+
case type
|
40
|
+
when :info
|
41
|
+
bg = :cyan
|
42
|
+
when :success
|
43
|
+
bg = :green
|
44
|
+
when :error
|
45
|
+
bg = :red
|
46
|
+
when :warning
|
47
|
+
bg = :yellow
|
48
|
+
end
|
49
|
+
|
50
|
+
label = '[' + label.to_s + ']'
|
51
|
+
label = Rainbow(label)
|
52
|
+
label = label.bg(bg) if bg
|
53
|
+
label = label.fg(fg) if fg
|
54
|
+
|
55
|
+
# Add a space after the label.
|
56
|
+
label += ' '
|
38
57
|
end
|
39
|
-
|
40
|
-
|
41
|
-
label = label.fg(fg) if fg
|
42
|
-
puts label + ' ' + str
|
58
|
+
|
59
|
+
puts label + message
|
43
60
|
end
|
44
61
|
|
45
|
-
|
62
|
+
##
|
63
|
+
# Returns the "before" site's URL.
|
64
|
+
#
|
65
|
+
# TODO: Remove in favor of config.before_url.
|
46
66
|
def before
|
47
67
|
@config.before['url']
|
48
68
|
end
|
69
|
+
|
70
|
+
##
|
71
|
+
# Returns the "after" site's URL.
|
72
|
+
#
|
73
|
+
# TODO: Remove in favor of config.after_url.
|
49
74
|
def after
|
50
75
|
@config.after['url']
|
51
76
|
end
|
52
77
|
|
53
|
-
|
78
|
+
# Initialize SiteDiff.
|
79
|
+
def initialize(config, cache, verbose = true, debug = false)
|
54
80
|
@cache = cache
|
55
81
|
@verbose = verbose
|
82
|
+
@debug = debug
|
56
83
|
|
57
84
|
# Check for single-site mode
|
58
85
|
validate_opts = {}
|
59
86
|
if !config.before['url'] && @cache.tag?(:before)
|
60
|
-
|
61
|
-
|
62
|
-
|
87
|
+
unless @cache.read_tags.include?(:before)
|
88
|
+
raise SiteDiffException,
|
89
|
+
"A cached 'before' is required for single-site mode"
|
90
|
+
end
|
63
91
|
validate_opts[:need_before] = false
|
64
92
|
end
|
65
93
|
config.validate(validate_opts)
|
66
|
-
|
94
|
+
# Configure diff.
|
95
|
+
Diff.diff_config(config)
|
67
96
|
@config = config
|
68
97
|
end
|
69
98
|
|
70
|
-
# Sanitize HTML
|
99
|
+
# Sanitize HTML.
|
71
100
|
def sanitize(path, read_results)
|
72
|
-
[
|
101
|
+
%i[before after].map do |tag|
|
73
102
|
html = read_results[tag].content
|
74
|
-
|
75
|
-
|
103
|
+
# TODO: See why encoding is empty while running tests.
|
104
|
+
#
|
105
|
+
# The presence of an "encoding" value used to be used to determine
|
106
|
+
# if the sanitizer would be called. However, encoding turns up blank
|
107
|
+
# during rspec tests for some reason.
|
108
|
+
encoding = read_results[tag].encoding
|
109
|
+
if encoding || html.length.positive?
|
110
|
+
section = @config.send(tag, true)
|
111
|
+
opts = { path: path }
|
112
|
+
opts[:output] = @config.output if @config.output
|
113
|
+
Sanitizer.new(html, section, opts).sanitize
|
114
|
+
else
|
115
|
+
html
|
116
|
+
end
|
76
117
|
end
|
77
118
|
end
|
78
119
|
|
79
|
-
|
120
|
+
##
|
121
|
+
# Process a set of read results.
|
122
|
+
#
|
123
|
+
# This is the callback that processes items fetched by the Fetcher.
|
80
124
|
def process_results(path, read_results)
|
81
|
-
|
82
|
-
|
125
|
+
error = (read_results[:before].error || read_results[:after].error)
|
126
|
+
if error
|
127
|
+
diff = Result.new(path, nil, nil, nil, nil, error)
|
83
128
|
else
|
84
|
-
|
129
|
+
begin
|
130
|
+
diff = Result.new(
|
131
|
+
path,
|
132
|
+
*sanitize(path, read_results),
|
133
|
+
read_results[:before].encoding,
|
134
|
+
read_results[:after].encoding,
|
135
|
+
nil
|
136
|
+
)
|
137
|
+
rescue StandardError => e
|
138
|
+
raise if @debug
|
139
|
+
|
140
|
+
Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
|
141
|
+
end
|
85
142
|
end
|
86
143
|
@results[path] = diff
|
87
144
|
|
88
145
|
# Print results in order!
|
89
|
-
while next_diff = @results[@ordered.first]
|
146
|
+
while (next_diff = @results[@ordered.first])
|
90
147
|
next_diff.log(@verbose)
|
91
148
|
@ordered.shift
|
92
149
|
end
|
93
150
|
end
|
94
151
|
|
95
|
-
|
96
|
-
#
|
152
|
+
##
|
153
|
+
# Compute diff as per config.
|
154
|
+
#
|
155
|
+
# @return [Integer]
|
156
|
+
# Number of paths which have diffs.
|
97
157
|
def run
|
98
158
|
# Map of path -> Result object, populated by process_results
|
99
159
|
@results = {}
|
100
160
|
@ordered = @config.paths.dup
|
101
161
|
|
102
162
|
unless @cache.read_tags.empty?
|
103
|
-
SiteDiff.log(
|
104
|
-
@cache.read_tags.sort.join(', '))
|
163
|
+
SiteDiff.log('Using sites from cache: ' + @cache.read_tags.sort.join(', '))
|
105
164
|
end
|
106
165
|
|
107
|
-
|
108
|
-
|
109
|
-
|
166
|
+
# TODO: Fix this after config merge refactor!
|
167
|
+
# Not quite right. We are not passing @config.before or @config.after
|
168
|
+
# so passing this instead but @config.after['curl_opts'] is ignored.
|
169
|
+
curl_opts = @config.setting :curl_opts
|
170
|
+
config_curl_opts = @config.before['curl_opts']
|
171
|
+
curl_opts = config_curl_opts.clone.merge(curl_opts) if config_curl_opts
|
172
|
+
fetcher = Fetch.new(
|
173
|
+
@cache,
|
174
|
+
@config.paths,
|
175
|
+
@config.setting(:interval),
|
176
|
+
@config.setting(:concurrency),
|
177
|
+
curl_opts,
|
178
|
+
@debug,
|
179
|
+
before: @config.before_url,
|
180
|
+
after: @config.after_url
|
181
|
+
)
|
182
|
+
|
183
|
+
# Run the Fetcher with "process results" as a callback.
|
184
|
+
fetcher.run(&method(:process_results))
|
110
185
|
|
111
186
|
# Order by original path order
|
112
|
-
@results = @config.paths.map { |
|
113
|
-
|
187
|
+
@results = @config.paths.map { |path| @results[path] }
|
188
|
+
results.map { |r| r unless r.success? }.compact.length
|
114
189
|
end
|
115
190
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
# store diffs of each failing case, first wipe out existing diffs
|
124
|
-
diff_dir = dir + DIFFS_DIR
|
125
|
-
diff_dir.rmtree if diff_dir.exist?
|
126
|
-
results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
|
127
|
-
SiteDiff::log "All diff files were dumped inside #{dir.expand_path}"
|
128
|
-
|
129
|
-
# store failing paths
|
130
|
-
failures = dir + FAILURES_FILE
|
131
|
-
SiteDiff::log "Writing failures to #{failures.expand_path}"
|
132
|
-
failures.open('w') do |f|
|
133
|
-
results.each { |r| f.puts r.path unless r.success? }
|
191
|
+
##
|
192
|
+
# Get a reporter object to help with report generation.
|
193
|
+
def report
|
194
|
+
if @results.nil?
|
195
|
+
raise SiteDiffException(
|
196
|
+
'No results detected. Run SiteDiff.run before SiteDiff.report.'
|
197
|
+
)
|
134
198
|
end
|
135
199
|
|
136
|
-
|
137
|
-
|
138
|
-
@cache)
|
139
|
-
dir.+(REPORT_FILE).open('w') { |f| f.write(report) }
|
200
|
+
Report.new(@config, @cache, @results)
|
201
|
+
end
|
140
202
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
203
|
+
##
|
204
|
+
# Get SiteDiff gemspec.
|
205
|
+
def self.gemspec
|
206
|
+
file = ROOT_DIR + '/sitediff.gemspec'
|
207
|
+
Gem::Specification.load(file)
|
208
|
+
end
|
209
|
+
|
210
|
+
##
|
211
|
+
# Ensures that a directory exists and returns a Pathname for it.
|
212
|
+
#
|
213
|
+
# @param [String] dir
|
214
|
+
# path/to/directory
|
215
|
+
def self.ensure_dir(dir)
|
216
|
+
dir = Pathname.new(dir) unless dir.is_a? Pathname
|
217
|
+
dir.mkpath unless dir.directory?
|
218
|
+
dir
|
145
219
|
end
|
146
220
|
end
|
data/lib/sitediff/api.rb
ADDED
@@ -0,0 +1,265 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'sitediff'
|
4
|
+
require 'sitediff/cache'
|
5
|
+
require 'sitediff/config'
|
6
|
+
require 'sitediff/config/creator'
|
7
|
+
require 'sitediff/config/preset'
|
8
|
+
require 'sitediff/fetch'
|
9
|
+
require 'sitediff/webserver/resultserver'
|
10
|
+
|
11
|
+
class SiteDiff
|
12
|
+
##
|
13
|
+
# Sitediff API interface.
|
14
|
+
class Api
|
15
|
+
##
|
16
|
+
# Initializes new Api object.
|
17
|
+
def initialize(directory, config_file = nil)
|
18
|
+
@dir = get_dir(directory)
|
19
|
+
@config = SiteDiff::Config.new(config_file, @dir)
|
20
|
+
end
|
21
|
+
|
22
|
+
##
|
23
|
+
# Intialize a SiteDiff project.
|
24
|
+
#
|
25
|
+
# Calling:
|
26
|
+
# SiteDiff::Api.init(
|
27
|
+
# depth: 3,
|
28
|
+
# directory: 'sitediff',
|
29
|
+
# concurrency: 3,
|
30
|
+
# interval: 0,
|
31
|
+
# include: nil,
|
32
|
+
# exclude: '*.pdf',
|
33
|
+
# preset: 'drupal',
|
34
|
+
# curl_opts: {timeout: 60},
|
35
|
+
# crawl: false
|
36
|
+
# )
|
37
|
+
def self.init(options)
|
38
|
+
# Prepare a config object and write it to the file system.
|
39
|
+
creator = SiteDiff::Config::Creator.new(options[:debug], options[:before_url], options[:after_url])
|
40
|
+
include_regex = Config.create_regexp(options[:include])
|
41
|
+
exclude_regex = Config.create_regexp(options[:exclude])
|
42
|
+
creator.create(
|
43
|
+
depth: options[:depth],
|
44
|
+
directory: options[:directory],
|
45
|
+
concurrency: options[:concurrency],
|
46
|
+
interval: options[:interval],
|
47
|
+
include: include_regex,
|
48
|
+
exclude: exclude_regex,
|
49
|
+
preset: options[:preset],
|
50
|
+
curl_opts: options[:curl_opts]
|
51
|
+
)
|
52
|
+
SiteDiff.log "Created #{creator.config_file.expand_path}", :success
|
53
|
+
|
54
|
+
# TODO: implement crawl ^^^
|
55
|
+
# Discover paths, if enabled.
|
56
|
+
# if options[:crawl]
|
57
|
+
# crawl(creator.config_file)
|
58
|
+
# SiteDiff.log 'You can now run "sitediff diff".', :success
|
59
|
+
# else
|
60
|
+
# SiteDiff.log 'Run "sitediff crawl" to discover paths. You should then be able to run "sitediff diff".', :info
|
61
|
+
# end
|
62
|
+
end
|
63
|
+
|
64
|
+
##
|
65
|
+
# Diff the `before` and `after`.
|
66
|
+
#
|
67
|
+
# Calling:
|
68
|
+
# Api.diff(
|
69
|
+
# paths: options['paths'],
|
70
|
+
# paths_file: options['paths-file'],
|
71
|
+
# ignore_whitespace: options['ignore-whitespace'],
|
72
|
+
# export: options['export'],
|
73
|
+
# before: options['before'],
|
74
|
+
# after: options['after'],
|
75
|
+
# cached: options['cached'],
|
76
|
+
# verbose: options['verbose'],
|
77
|
+
# report_format: options['report-format'],
|
78
|
+
# before_report: options['before-report'],
|
79
|
+
# after_report: options['after-report'],
|
80
|
+
# cli_mode: false
|
81
|
+
# )
|
82
|
+
def diff(options)
|
83
|
+
@config.ignore_whitespace = options[:ignore_whitespace]
|
84
|
+
@config.export = options[:export]
|
85
|
+
# Apply "paths" override, if any.
|
86
|
+
if options[:paths]
|
87
|
+
@config.paths = options[:paths]
|
88
|
+
else
|
89
|
+
paths_file = options[:paths_file]
|
90
|
+
paths_file ||= File.join(@dir, Config::DEFAULT_PATHS_FILENAME)
|
91
|
+
paths_file = File.expand_path(paths_file)
|
92
|
+
|
93
|
+
paths_count = @config.paths_file_read(paths_file)
|
94
|
+
SiteDiff.log "Read #{paths_count} paths from: #{paths_file}"
|
95
|
+
end
|
96
|
+
|
97
|
+
# TODO: Why do we allow before and after override during diff?
|
98
|
+
@config.before['url'] = options[:before] if options[:before]
|
99
|
+
@config.after['url'] = options[:after] if options[:after]
|
100
|
+
|
101
|
+
# Prepare cache.
|
102
|
+
cache = SiteDiff::Cache.new(
|
103
|
+
create: options[:cached] != 'none',
|
104
|
+
directory: @dir
|
105
|
+
)
|
106
|
+
cache.read_tags << :before if %w[before all].include?(options[:cached])
|
107
|
+
cache.read_tags << :after if %w[after all].include?(options[:cached])
|
108
|
+
cache.write_tags << :before << :after
|
109
|
+
|
110
|
+
# Run sitediff.
|
111
|
+
sitediff = SiteDiff.new(
|
112
|
+
@config,
|
113
|
+
cache,
|
114
|
+
options[:verbose],
|
115
|
+
options[:debug]
|
116
|
+
)
|
117
|
+
num_failing = sitediff.run
|
118
|
+
exit_code = num_failing.positive? ? 2 : 0
|
119
|
+
|
120
|
+
# Generate HTML report.
|
121
|
+
if options[:report_format] == 'html' || @config.export
|
122
|
+
sitediff.report.generate_html(
|
123
|
+
@dir,
|
124
|
+
options[:before_report],
|
125
|
+
options[:after_report]
|
126
|
+
)
|
127
|
+
end
|
128
|
+
|
129
|
+
# Generate JSON report.
|
130
|
+
if options[:report_format] == 'json' && @config.export == false
|
131
|
+
sitediff.report.generate_json @dir
|
132
|
+
end
|
133
|
+
|
134
|
+
SiteDiff.log 'Run "sitediff serve" to see a report.' unless options[:export]
|
135
|
+
rescue Config::InvalidConfig => e
|
136
|
+
SiteDiff.log "Invalid configuration: #{e.message}", :error
|
137
|
+
SiteDiff.log e.backtrace, :error if options[:verbose]
|
138
|
+
rescue Config::ConfigNotFound => e
|
139
|
+
SiteDiff.log "Invalid configuration: #{e.message}", :error
|
140
|
+
SiteDiff.log e.backtrace, :error if options[:verbose]
|
141
|
+
else # no exception was raised
|
142
|
+
# Thor::Error --> exit(1), guaranteed by exit_on_failure?
|
143
|
+
# Failing diff --> exit(2), populated above
|
144
|
+
exit(exit_code) if options[:cli_mode]
|
145
|
+
end
|
146
|
+
|
147
|
+
##
|
148
|
+
# Crawl the `before` site to determine `paths`.
|
149
|
+
def crawl
|
150
|
+
# Prepare cache.
|
151
|
+
@cache = SiteDiff::Cache.new(
|
152
|
+
create: true,
|
153
|
+
directory: @dir
|
154
|
+
)
|
155
|
+
@cache.write_tags << :before << :after
|
156
|
+
|
157
|
+
# Crawl with Hydra to discover paths.
|
158
|
+
hydra = Typhoeus::Hydra.new(
|
159
|
+
max_concurrency: @config.setting(:concurrency)
|
160
|
+
)
|
161
|
+
@paths = {}
|
162
|
+
@config.roots.each do |tag, url|
|
163
|
+
Crawler.new(
|
164
|
+
hydra,
|
165
|
+
url,
|
166
|
+
@config.setting(:interval),
|
167
|
+
@config.setting(:include),
|
168
|
+
@config.setting(:exclude),
|
169
|
+
@config.setting(:depth),
|
170
|
+
@config.curl_opts,
|
171
|
+
@debug
|
172
|
+
) do |info|
|
173
|
+
SiteDiff.log "Visited #{info.uri}, cached."
|
174
|
+
after_crawl(tag, info)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
hydra.run
|
178
|
+
|
179
|
+
# Write paths to a file.
|
180
|
+
@paths = @paths.values.reduce(&:|).to_a.sort
|
181
|
+
@config.paths_file_write(@paths)
|
182
|
+
|
183
|
+
# Log output.
|
184
|
+
file = Pathname.new(@dir) + Config::DEFAULT_PATHS_FILENAME
|
185
|
+
SiteDiff.log ''
|
186
|
+
SiteDiff.log "#{@paths.length} page(s) found."
|
187
|
+
SiteDiff.log "Created #{file.expand_path}.", :success, 'done'
|
188
|
+
end
|
189
|
+
|
190
|
+
##
|
191
|
+
# Serves SiteDiff report for accessing in the browser.
|
192
|
+
#
|
193
|
+
# Calling:
|
194
|
+
# api.serve(browse: true, port: 13080)
|
195
|
+
def serve(options)
|
196
|
+
@cache = Cache.new(directory: @dir)
|
197
|
+
@cache.read_tags << :before << :after
|
198
|
+
|
199
|
+
SiteDiff::Webserver::ResultServer.new(
|
200
|
+
options[:port],
|
201
|
+
@dir,
|
202
|
+
browse: options[:browse],
|
203
|
+
cache: @cache,
|
204
|
+
config: @config
|
205
|
+
).wait
|
206
|
+
rescue SiteDiffException => e
|
207
|
+
SiteDiff.log e.message, :error
|
208
|
+
SiteDiff.log e.backtrace, :error if options[:verbose]
|
209
|
+
end
|
210
|
+
|
211
|
+
##
|
212
|
+
#
|
213
|
+
def store(options)
|
214
|
+
# TODO: Figure out how to remove this config.validate call.
|
215
|
+
@config.validate(need_before: false)
|
216
|
+
@config.paths_file_read
|
217
|
+
|
218
|
+
@cache = SiteDiff::Cache.new(directory: @dir, create: true)
|
219
|
+
@cache.write_tags << :before
|
220
|
+
|
221
|
+
base = options[:url] || @config.after['url']
|
222
|
+
fetcher = SiteDiff::Fetch.new(@cache,
|
223
|
+
@config.paths,
|
224
|
+
@config.setting(:interval),
|
225
|
+
@config.setting(:concurrency),
|
226
|
+
get_curl_opts(@config.settings),
|
227
|
+
options[:debug],
|
228
|
+
before: base)
|
229
|
+
fetcher.run do |path, _res|
|
230
|
+
SiteDiff.log "Visited #{path}, cached"
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
private
|
235
|
+
|
236
|
+
##
|
237
|
+
# Ensures that the given directory exists.
|
238
|
+
def get_dir(directory)
|
239
|
+
# Create the dir. Must go before cache initialization!
|
240
|
+
@dir = Pathname.new(directory || '.')
|
241
|
+
@dir.mkpath unless @dir.directory?
|
242
|
+
@dir.to_s
|
243
|
+
end
|
244
|
+
|
245
|
+
##
|
246
|
+
# Processes a crawled path.
|
247
|
+
def after_crawl(tag, info)
|
248
|
+
path = UriWrapper.canonicalize(info.relative)
|
249
|
+
|
250
|
+
# Register the path.
|
251
|
+
@paths[tag] = [] unless @paths[tag]
|
252
|
+
@paths[tag] << path
|
253
|
+
|
254
|
+
result = info.read_result
|
255
|
+
|
256
|
+
# Write result to applicable cache.
|
257
|
+
@cache.set(tag, path, result)
|
258
|
+
# If single-site, cache "after" as "before".
|
259
|
+
@cache.set(:before, path, result) unless @config.roots[:before]
|
260
|
+
|
261
|
+
# TODO: Restore application of rules.
|
262
|
+
# @rules.handle_page(tag, res.content, info.document) if @rules && !res.error
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|