sitediff 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/sitediff +2 -3
- data/lib/sitediff.rb +35 -24
- data/lib/sitediff/cache.rb +53 -47
- data/lib/sitediff/cli.rb +127 -114
- data/lib/sitediff/config.rb +35 -59
- data/lib/sitediff/config/creator.rb +95 -90
- data/lib/sitediff/crawler.rb +83 -72
- data/lib/sitediff/diff.rb +7 -5
- data/lib/sitediff/exception.rb +3 -1
- data/lib/sitediff/fetch.rb +47 -41
- data/lib/sitediff/files/html_report.html.erb +3 -0
- data/lib/sitediff/files/rules/drupal.yaml +36 -6
- data/lib/sitediff/result.rb +13 -11
- data/lib/sitediff/rules.rb +47 -47
- data/lib/sitediff/sanitize.rb +145 -150
- data/lib/sitediff/sanitize/dom_transform.rb +73 -74
- data/lib/sitediff/sanitize/regexp.rb +55 -52
- data/lib/sitediff/uriwrapper.rb +37 -26
- data/lib/sitediff/webserver.rb +80 -77
- data/lib/sitediff/webserver/resultserver.rb +117 -76
- metadata +32 -44
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 93ab2ffc296a3c9de8ea835e47f435e0193e7854
|
4
|
+
data.tar.gz: 5a80c5bb738912114aeb029c4ff283ff890f2ce2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fd07a6ff9a14a8da965f4f5b09acb1c7f30be594d97f6383c3bec6545d1b5bb3fc26451a9943ed3fc3c81d261a0fa4ca6c5fcc5355d7263ae09700800af5af9d
|
7
|
+
data.tar.gz: bb940ede7b68b1e047dbda46d66fade88d06e0f54d9e257842a311f119efb4043939ea9b83021cdf2fd5c76ac3a23da92e7da91072cc5d4421a1717902093e17
|
data/bin/sitediff
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
# when run as gem, $0 is /usr/local/bin/sitediff not this file
|
4
|
-
if $
|
5
|
-
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
6
|
-
end
|
5
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __dir__) if $PROGRAM_NAME == __FILE__
|
7
6
|
|
8
7
|
require 'sitediff/cli'
|
9
8
|
|
data/lib/sitediff.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
#!/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
require 'sitediff/config'
|
3
5
|
require 'sitediff/fetch'
|
4
6
|
require 'sitediff/result'
|
@@ -20,11 +22,12 @@ class SiteDiff
|
|
20
22
|
|
21
23
|
# label will be colorized and str will not be.
|
22
24
|
# type dictates the color: can be :success, :error, or :failure
|
23
|
-
def self.log(str, type
|
25
|
+
def self.log(str, type = :info, label = nil)
|
24
26
|
label = label ? "[sitediff] #{label}" : '[sitediff]'
|
25
27
|
bg = fg = nil
|
26
28
|
case type
|
27
29
|
when :info
|
30
|
+
bg = fg = nil
|
28
31
|
when :diff_success
|
29
32
|
bg = :green
|
30
33
|
fg = :black
|
@@ -46,47 +49,50 @@ class SiteDiff
|
|
46
49
|
def before
|
47
50
|
@config.before['url']
|
48
51
|
end
|
52
|
+
|
49
53
|
def after
|
50
54
|
@config.after['url']
|
51
55
|
end
|
52
56
|
|
53
|
-
def initialize(config, cache, verbose=true)
|
57
|
+
def initialize(config, cache, concurrency, verbose = true)
|
54
58
|
@cache = cache
|
55
59
|
@verbose = verbose
|
56
60
|
|
57
61
|
# Check for single-site mode
|
58
62
|
validate_opts = {}
|
59
63
|
if !config.before['url'] && @cache.tag?(:before)
|
60
|
-
|
61
|
-
|
62
|
-
|
64
|
+
unless @cache.read_tags.include?(:before)
|
65
|
+
raise SiteDiffException,
|
66
|
+
"A cached 'before' is required for single-site mode"
|
67
|
+
end
|
63
68
|
validate_opts[:need_before] = false
|
64
69
|
end
|
65
70
|
config.validate(validate_opts)
|
66
71
|
|
72
|
+
@concurrency = concurrency
|
67
73
|
@config = config
|
68
74
|
end
|
69
75
|
|
70
76
|
# Sanitize HTML
|
71
77
|
def sanitize(path, read_results)
|
72
|
-
[
|
78
|
+
%i[before after].map do |tag|
|
73
79
|
html = read_results[tag].content
|
74
80
|
config = @config.send(tag)
|
75
|
-
Sanitizer.new(html, config, :
|
81
|
+
Sanitizer.new(html, config, path: path).sanitize
|
76
82
|
end
|
77
83
|
end
|
78
84
|
|
79
85
|
# Process a set of read results
|
80
86
|
def process_results(path, read_results)
|
81
|
-
if error = read_results[:before].error || read_results[:after].error
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
87
|
+
diff = if (error = (read_results[:before].error || read_results[:after].error))
|
88
|
+
Result.new(path, nil, nil, error)
|
89
|
+
else
|
90
|
+
Result.new(path, *sanitize(path, read_results), nil)
|
91
|
+
end
|
86
92
|
@results[path] = diff
|
87
93
|
|
88
94
|
# Print results in order!
|
89
|
-
while next_diff = @results[@ordered.first]
|
95
|
+
while (next_diff = @results[@ordered.first])
|
90
96
|
next_diff.log(@verbose)
|
91
97
|
@ordered.shift
|
92
98
|
end
|
@@ -94,23 +100,28 @@ class SiteDiff
|
|
94
100
|
|
95
101
|
# Perform the comparison, populate @results and return the number of failing
|
96
102
|
# paths (paths with non-zero diff).
|
97
|
-
def run
|
103
|
+
def run(curl_opts = {})
|
98
104
|
# Map of path -> Result object, populated by process_results
|
99
105
|
@results = {}
|
100
106
|
@ordered = @config.paths.dup
|
101
107
|
|
102
108
|
unless @cache.read_tags.empty?
|
103
|
-
SiteDiff.log(
|
109
|
+
SiteDiff.log('Using sites from cache: ' +
|
104
110
|
@cache.read_tags.sort.join(', '))
|
105
111
|
end
|
106
112
|
|
107
|
-
|
108
|
-
|
109
|
-
|
113
|
+
# TODO: Fix this after config merge refactor!
|
114
|
+
# Not quite right. We are not passing @config.before or @config.after
|
115
|
+
# so passing this instead but @config.after['curl_opts'] is ignored.
|
116
|
+
config_curl_opts = @config.before['curl_opts']
|
117
|
+
curl_opts = config_curl_opts.clone.merge(curl_opts) if config_curl_opts
|
118
|
+
fetcher = Fetch.new(@cache, @config.paths, @concurrency, curl_opts,
|
119
|
+
before: before, after: after)
|
120
|
+
fetcher.run(&method(:process_results))
|
110
121
|
|
111
122
|
# Order by original path order
|
112
123
|
@results = @config.paths.map { |p| @results[p] }
|
113
|
-
|
124
|
+
results.map { |r| r unless r.success? }.compact.length
|
114
125
|
end
|
115
126
|
|
116
127
|
# Dump results to disk
|
@@ -124,23 +135,23 @@ class SiteDiff
|
|
124
135
|
diff_dir = dir + DIFFS_DIR
|
125
136
|
diff_dir.rmtree if diff_dir.exist?
|
126
137
|
results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
|
127
|
-
SiteDiff
|
138
|
+
SiteDiff.log "All diff files were dumped inside #{dir.expand_path}"
|
128
139
|
|
129
140
|
# store failing paths
|
130
141
|
failures = dir + FAILURES_FILE
|
131
|
-
SiteDiff
|
142
|
+
SiteDiff.log "Writing failures to #{failures.expand_path}"
|
132
143
|
failures.open('w') do |f|
|
133
144
|
results.each { |r| f.puts r.path unless r.success? }
|
134
145
|
end
|
135
146
|
|
136
147
|
# create report of results
|
137
|
-
report = Diff
|
138
|
-
|
148
|
+
report = Diff.generate_html_report(results, report_before, report_after,
|
149
|
+
@cache)
|
139
150
|
dir.+(REPORT_FILE).open('w') { |f| f.write(report) }
|
140
151
|
|
141
152
|
# serve some settings
|
142
153
|
settings = { 'before' => report_before, 'after' => report_after,
|
143
|
-
|
154
|
+
'cached' => %w[before after] }
|
144
155
|
dir.+(SETTINGS_FILE).open('w') { |f| YAML.dump(settings, f) }
|
145
156
|
end
|
146
157
|
end
|
data/lib/sitediff/cache.rb
CHANGED
@@ -1,61 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'set'
|
4
|
+
require 'fileutils'
|
2
5
|
|
3
6
|
class SiteDiff
|
4
|
-
class Cache
|
5
|
-
|
7
|
+
class Cache
|
8
|
+
attr_accessor :read_tags, :write_tags
|
6
9
|
|
7
|
-
|
10
|
+
def initialize(opts = {})
|
11
|
+
@dir = opts[:dir] || '.'
|
12
|
+
@create = opts[:create]
|
13
|
+
@read_tags = Set.new
|
14
|
+
@write_tags = Set.new
|
15
|
+
end
|
8
16
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
@write_tags = Set.new
|
14
|
-
end
|
17
|
+
# Is a tag cached?
|
18
|
+
def tag?(tag)
|
19
|
+
File.directory?(File.join(@dir, 'snapshot', tag.to_s))
|
20
|
+
end
|
15
21
|
|
16
|
-
|
22
|
+
def get(tag, path)
|
23
|
+
return nil unless @read_tags.include? tag
|
17
24
|
|
18
|
-
|
19
|
-
def tag?(tag)
|
20
|
-
open
|
21
|
-
@dbm[tag.to_s]
|
22
|
-
end
|
25
|
+
filename = File.join(@dir, 'snapshot', tag.to_s, *path.split(File::SEPARATOR))
|
23
26
|
|
24
|
-
|
25
|
-
|
26
|
-
open or return nil
|
27
|
-
val = @dbm[key(tag, path)]
|
28
|
-
return val && Marshal.load(val)
|
29
|
-
end
|
27
|
+
filename = File.join(filename, 'index.html') if File.directory?(filename)
|
28
|
+
return nil unless File.file? filename
|
30
29
|
|
31
|
-
|
32
|
-
|
33
|
-
open or return
|
34
|
-
@dbm[tag.to_s] = 'TRUE'
|
35
|
-
@dbm[key(tag, path)] = Marshal.dump(result)
|
36
|
-
end
|
30
|
+
Marshal.load(File.read(filename))
|
31
|
+
end
|
37
32
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
end
|
33
|
+
def set(tag, path, result)
|
34
|
+
return unless @write_tags.include? tag
|
35
|
+
|
36
|
+
filename = File.join(@dir, 'snapshot', tag.to_s, *path.split(File::SEPARATOR))
|
43
37
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
38
|
+
filename = File.join(filename, 'index.html') if File.directory?(filename)
|
39
|
+
filepath = Pathname.new(filename)
|
40
|
+
unless filepath.dirname.directory?
|
41
|
+
begin
|
42
|
+
filepath.dirname.mkpath
|
43
|
+
rescue Errno::EEXIST
|
44
|
+
curdir = filepath
|
45
|
+
curdir = curdir.parent until curdir.exist?
|
46
|
+
tempname = curdir.dirname + (curdir.basename.to_s + '.temporary')
|
47
|
+
# May cause problems if action is not atomic!
|
48
|
+
# Move existing file to dir/index.html first
|
49
|
+
# Not robust! Should generate an UUID or something.
|
50
|
+
SiteDiff.log "Overwriting file #{tempname}", :warn if File.exist?(tempname)
|
51
|
+
curdir.rename(tempname)
|
52
|
+
filepath.dirname.mkpath
|
53
|
+
# Should only happen in strange situations such as when the path
|
54
|
+
# is foo/index.html/bar (i.e., index.html is a directory)
|
55
|
+
SiteDiff.log "Overwriting file #{tempname}", :warn if (curdir + 'index.html').exist?
|
56
|
+
tempname.rename(curdir + 'index.html')
|
57
|
+
end
|
58
|
+
end
|
59
|
+
File.open(filename, 'w') { |file| file.write(Marshal.dump(result)) }
|
60
|
+
end
|
61
|
+
|
62
|
+
def key(tag, path)
|
63
|
+
# Ensure encoding stays the same!
|
64
|
+
Marshal.dump([tag, path.encode('UTF-8')])
|
57
65
|
end
|
58
|
-
return true
|
59
66
|
end
|
60
67
|
end
|
61
|
-
end
|
data/lib/sitediff/cli.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'thor'
|
2
4
|
require 'sitediff'
|
3
5
|
require 'sitediff/cache'
|
@@ -9,9 +11,18 @@ require 'sitediff/webserver/resultserver'
|
|
9
11
|
class SiteDiff
|
10
12
|
class Cli < Thor
|
11
13
|
class_option 'directory',
|
12
|
-
|
13
|
-
|
14
|
-
|
14
|
+
type: :string,
|
15
|
+
aliases: '-C',
|
16
|
+
default: 'sitediff',
|
17
|
+
desc: 'Configuration directory'
|
18
|
+
class_option :curl_options,
|
19
|
+
type: :hash,
|
20
|
+
default: {},
|
21
|
+
desc: 'Options to be passed to curl'
|
22
|
+
class_option :insecure,
|
23
|
+
type: :boolean,
|
24
|
+
default: false,
|
25
|
+
desc: 'Ignore many HTTPS/SSL errors'
|
15
26
|
|
16
27
|
# Thor, by default, exits with 0 no matter what!
|
17
28
|
def self.exit_on_failure?
|
@@ -19,67 +30,67 @@ class SiteDiff
|
|
19
30
|
end
|
20
31
|
|
21
32
|
# Thor, by default, does not raise an error for use of unknown options.
|
22
|
-
def self.check_unknown_options?(
|
33
|
+
def self.check_unknown_options?(_config)
|
23
34
|
true
|
24
35
|
end
|
25
36
|
|
26
|
-
option 'dump-dir',
|
27
|
-
:type => :string,
|
28
|
-
:default => File.join('.', 'output'),
|
29
|
-
:desc => "Location to write the output to."
|
30
37
|
option 'paths-file',
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
38
|
+
type: :string,
|
39
|
+
desc: 'Paths are read (one at a line) from PATHS: ' \
|
40
|
+
'useful for iterating over sanitization rules',
|
41
|
+
aliases: '--paths-from-file'
|
35
42
|
option 'paths',
|
36
|
-
|
37
|
-
|
38
|
-
|
43
|
+
type: :array,
|
44
|
+
aliases: '-p',
|
45
|
+
desc: 'Specific path or paths to fetch'
|
39
46
|
option 'before',
|
40
|
-
|
41
|
-
|
42
|
-
|
47
|
+
type: :string,
|
48
|
+
desc: 'URL used to fetch the before HTML. Acts as a prefix to specified paths',
|
49
|
+
aliases: '--before-url'
|
43
50
|
option 'after',
|
44
|
-
|
45
|
-
|
46
|
-
|
51
|
+
type: :string,
|
52
|
+
desc: 'URL used to fetch the after HTML. Acts as a prefix to specified paths.',
|
53
|
+
aliases: '--after-url'
|
47
54
|
option 'before-report',
|
48
|
-
|
49
|
-
|
50
|
-
|
55
|
+
type: :string,
|
56
|
+
desc: 'Before URL to use for reporting purposes. Useful if port forwarding.',
|
57
|
+
aliases: '--before-url-report'
|
51
58
|
option 'after-report',
|
52
|
-
|
53
|
-
|
54
|
-
|
59
|
+
type: :string,
|
60
|
+
desc: 'After URL to use for reporting purposes. Useful if port forwarding.',
|
61
|
+
aliases: '--after-url-report'
|
55
62
|
option 'cached',
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
option '
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
63
|
+
type: :string,
|
64
|
+
enum: %w[none all before after],
|
65
|
+
default: 'before',
|
66
|
+
desc: 'Use the cached version of these sites, if available.'
|
67
|
+
option 'verbose',
|
68
|
+
type: :boolean,
|
69
|
+
aliases: '-v',
|
70
|
+
default: false,
|
71
|
+
desc: 'Show differences between versions for each page in terminal'
|
72
|
+
option :concurrency,
|
73
|
+
type: :numeric,
|
74
|
+
default: 3,
|
75
|
+
desc: 'Max number of concurrent connections made'
|
76
|
+
desc 'diff [OPTIONS] [CONFIGFILES]', 'Perform systematic diff on given URLs'
|
66
77
|
def diff(*config_files)
|
67
|
-
config =
|
78
|
+
config = SiteDiff::Config.new(config_files, options[:directory])
|
68
79
|
|
69
80
|
# override config based on options
|
70
81
|
paths = options['paths']
|
71
|
-
if paths_file = options['paths-file']
|
72
|
-
if paths
|
73
|
-
SiteDiff
|
74
|
-
exit
|
82
|
+
if (paths_file = options['paths-file'])
|
83
|
+
if paths
|
84
|
+
SiteDiff.log "Can't have both --paths-file and --paths", :error
|
85
|
+
exit(-1)
|
75
86
|
end
|
76
87
|
|
77
88
|
paths_file = Pathname.new(paths_file).expand_path
|
78
|
-
unless File.
|
89
|
+
unless File.exist? paths_file
|
79
90
|
raise Config::InvalidConfig,
|
80
|
-
|
91
|
+
"Paths file '#{paths_file}' not found!"
|
81
92
|
end
|
82
|
-
SiteDiff
|
93
|
+
SiteDiff.log "Reading paths from: #{paths_file}"
|
83
94
|
config.paths = File.readlines(paths_file)
|
84
95
|
end
|
85
96
|
config.paths = paths if paths
|
@@ -88,21 +99,21 @@ class SiteDiff
|
|
88
99
|
config.after['url'] = options['after'] if options['after']
|
89
100
|
|
90
101
|
# Setup cache
|
91
|
-
cache = SiteDiff::Cache.new(:
|
102
|
+
cache = SiteDiff::Cache.new(create: options['cached'] != 'none',
|
103
|
+
dir: options['directory'])
|
92
104
|
cache.read_tags << :before if %w[before all].include?(options['cached'])
|
93
105
|
cache.read_tags << :after if %w[after all].include?(options['cached'])
|
94
106
|
cache.write_tags << :before << :after
|
95
107
|
|
96
|
-
sitediff = SiteDiff.new(config, cache,
|
97
|
-
|
98
|
-
|
108
|
+
sitediff = SiteDiff.new(config, cache, options[:concurrency],
|
109
|
+
options['verbose'])
|
110
|
+
num_failing = sitediff.run(get_curl_opts(options))
|
111
|
+
exit_code = num_failing > 0 ? 2 : 0
|
99
112
|
|
100
|
-
sitediff.dump(options['
|
101
|
-
|
113
|
+
sitediff.dump(options['directory'], options['before-report'],
|
114
|
+
options['after-report'])
|
102
115
|
rescue Config::InvalidConfig => e
|
103
116
|
SiteDiff.log "Invalid configuration: #{e.message}", :error
|
104
|
-
rescue SiteDiffException => e
|
105
|
-
SiteDiff.log e.message, :error
|
106
117
|
else # no exception was raised
|
107
118
|
# Thor::Error --> exit(1), guaranteed by exit_on_failure?
|
108
119
|
# Failing diff --> exit(2), populated above
|
@@ -110,62 +121,61 @@ class SiteDiff
|
|
110
121
|
end
|
111
122
|
|
112
123
|
option :port,
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
option 'dump-dir',
|
117
|
-
:type => :string,
|
118
|
-
:default => 'output',
|
119
|
-
:desc => 'The directory to serve'
|
124
|
+
type: :numeric,
|
125
|
+
default: SiteDiff::Webserver::DEFAULT_PORT,
|
126
|
+
desc: 'The port to serve on'
|
120
127
|
option :browse,
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
desc
|
128
|
+
type: :boolean,
|
129
|
+
default: true,
|
130
|
+
desc: 'Whether to open the served content in your browser'
|
131
|
+
desc 'serve [OPTIONS]', 'Serve the sitediff output directory over HTTP'
|
125
132
|
def serve(*config_files)
|
126
|
-
config =
|
133
|
+
config = SiteDiff::Config.new(config_files, options['directory'])
|
134
|
+
# Could check non-empty config here but currently errors are already raised.
|
127
135
|
|
128
|
-
cache = Cache.new
|
136
|
+
cache = Cache.new(dir: options['directory'])
|
129
137
|
cache.read_tags << :before << :after
|
130
138
|
|
131
139
|
SiteDiff::Webserver::ResultServer.new(
|
132
140
|
options[:port],
|
133
|
-
options['
|
134
|
-
:
|
135
|
-
:
|
136
|
-
:
|
141
|
+
options['directory'],
|
142
|
+
browse: options[:browse],
|
143
|
+
cache: cache,
|
144
|
+
config: config
|
137
145
|
).wait
|
146
|
+
rescue SiteDiffException => e
|
147
|
+
SiteDiff.log e.message, :error
|
138
148
|
end
|
139
149
|
|
140
|
-
option :output,
|
141
|
-
:type => :string,
|
142
|
-
:default => 'sitediff',
|
143
|
-
:desc => 'Directory in which to place the configuration',
|
144
|
-
:aliases => ['-o']
|
145
150
|
option :depth,
|
146
|
-
|
147
|
-
|
148
|
-
|
151
|
+
type: :numeric,
|
152
|
+
default: 3,
|
153
|
+
desc: 'How deeply to crawl the given site'
|
149
154
|
option :rules,
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
+
type: :string,
|
156
|
+
enum: %w[yes no disabled],
|
157
|
+
default: 'disabled',
|
158
|
+
desc: 'Whether rules for the site should be auto-created'
|
159
|
+
option :concurrency,
|
160
|
+
type: :numeric,
|
161
|
+
default: 3,
|
162
|
+
desc: 'Max number of concurrent connections made'
|
163
|
+
desc 'init URL [URL]', 'Create a sitediff configuration'
|
155
164
|
def init(*urls)
|
156
|
-
unless (1..2).
|
157
|
-
SiteDiff.log
|
165
|
+
unless (1..2).cover? urls.size
|
166
|
+
SiteDiff.log 'sitediff init requires one or two URLs', :error
|
158
167
|
exit 2
|
159
168
|
end
|
160
169
|
|
161
|
-
|
162
|
-
|
170
|
+
curl_opts = get_curl_opts(options)
|
171
|
+
|
172
|
+
creator = SiteDiff::Config::Creator.new(options[:concurrency], curl_opts, *urls)
|
163
173
|
creator.create(
|
164
|
-
:
|
165
|
-
:
|
166
|
-
:
|
167
|
-
:
|
168
|
-
) do |
|
174
|
+
depth: options[:depth],
|
175
|
+
directory: options[:directory],
|
176
|
+
rules: options[:rules] != 'no',
|
177
|
+
rules_disabled: (options[:rules] == 'disabled')
|
178
|
+
) do |_tag, info|
|
169
179
|
SiteDiff.log "Visited #{info.uri}, cached"
|
170
180
|
end
|
171
181
|
|
@@ -174,37 +184,40 @@ class SiteDiff
|
|
174
184
|
end
|
175
185
|
|
176
186
|
option :url,
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
187
|
+
type: :string,
|
188
|
+
desc: 'A custom base URL to fetch from'
|
189
|
+
option :concurrency,
|
190
|
+
type: :numeric,
|
191
|
+
default: 3,
|
192
|
+
desc: 'Max number of concurrent connections made'
|
193
|
+
desc 'store [CONFIGFILES]',
|
194
|
+
'Cache the current contents of a site for later comparison'
|
181
195
|
def store(*config_files)
|
182
|
-
config =
|
183
|
-
config.validate(:
|
196
|
+
config = SiteDiff::Config.new(config_files, options['directory'])
|
197
|
+
config.validate(need_before: false)
|
184
198
|
|
185
|
-
cache = SiteDiff::Cache.new(:
|
199
|
+
cache = SiteDiff::Cache.new(create: true)
|
186
200
|
cache.write_tags << :before
|
187
201
|
|
188
202
|
base = options[:url] || config.after['url']
|
189
|
-
fetcher = SiteDiff::Fetch.new(cache, config.paths,
|
190
|
-
|
203
|
+
fetcher = SiteDiff::Fetch.new(cache, config.paths, options['concurrency'],
|
204
|
+
before: base)
|
205
|
+
fetcher.run do |path, _res|
|
191
206
|
SiteDiff.log "Visited #{path}, cached"
|
192
207
|
end
|
193
208
|
end
|
194
209
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
raise if opts[:config]
|
207
|
-
# If no config required, allow it to pass
|
210
|
+
no_commands do
|
211
|
+
def get_curl_opts(options)
|
212
|
+
# We do want string keys here
|
213
|
+
bool_hash = { 'true' => true, 'false' => false }
|
214
|
+
curl_opts = UriWrapper::DEFAULT_CURL_OPTS.clone.merge(options[:curl_options])
|
215
|
+
curl_opts.each { |k, v| curl_opts[k] = bool_hash.fetch(v, v) }
|
216
|
+
if options[:insecure]
|
217
|
+
curl_opts[:ssl_verifypeer] = false
|
218
|
+
curl_opts[:ssl_verifyhost] = 0
|
219
|
+
end
|
220
|
+
curl_opts
|
208
221
|
end
|
209
222
|
end
|
210
223
|
end
|