sitediff 0.0.3 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/sitediff +2 -3
- data/lib/sitediff.rb +35 -24
- data/lib/sitediff/cache.rb +53 -47
- data/lib/sitediff/cli.rb +127 -114
- data/lib/sitediff/config.rb +35 -59
- data/lib/sitediff/config/creator.rb +95 -90
- data/lib/sitediff/crawler.rb +83 -72
- data/lib/sitediff/diff.rb +7 -5
- data/lib/sitediff/exception.rb +3 -1
- data/lib/sitediff/fetch.rb +47 -41
- data/lib/sitediff/files/html_report.html.erb +3 -0
- data/lib/sitediff/files/rules/drupal.yaml +36 -6
- data/lib/sitediff/result.rb +13 -11
- data/lib/sitediff/rules.rb +47 -47
- data/lib/sitediff/sanitize.rb +145 -150
- data/lib/sitediff/sanitize/dom_transform.rb +73 -74
- data/lib/sitediff/sanitize/regexp.rb +55 -52
- data/lib/sitediff/uriwrapper.rb +37 -26
- data/lib/sitediff/webserver.rb +80 -77
- data/lib/sitediff/webserver/resultserver.rb +117 -76
- metadata +32 -44
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 93ab2ffc296a3c9de8ea835e47f435e0193e7854
|
4
|
+
data.tar.gz: 5a80c5bb738912114aeb029c4ff283ff890f2ce2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fd07a6ff9a14a8da965f4f5b09acb1c7f30be594d97f6383c3bec6545d1b5bb3fc26451a9943ed3fc3c81d261a0fa4ca6c5fcc5355d7263ae09700800af5af9d
|
7
|
+
data.tar.gz: bb940ede7b68b1e047dbda46d66fade88d06e0f54d9e257842a311f119efb4043939ea9b83021cdf2fd5c76ac3a23da92e7da91072cc5d4421a1717902093e17
|
data/bin/sitediff
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
# when run as gem, $0 is /usr/local/bin/sitediff not this file
|
4
|
-
if $
|
5
|
-
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
6
|
-
end
|
5
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __dir__) if $PROGRAM_NAME == __FILE__
|
7
6
|
|
8
7
|
require 'sitediff/cli'
|
9
8
|
|
data/lib/sitediff.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
#!/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
require 'sitediff/config'
|
3
5
|
require 'sitediff/fetch'
|
4
6
|
require 'sitediff/result'
|
@@ -20,11 +22,12 @@ class SiteDiff
|
|
20
22
|
|
21
23
|
# label will be colorized and str will not be.
|
22
24
|
# type dictates the color: can be :success, :error, or :failure
|
23
|
-
def self.log(str, type
|
25
|
+
def self.log(str, type = :info, label = nil)
|
24
26
|
label = label ? "[sitediff] #{label}" : '[sitediff]'
|
25
27
|
bg = fg = nil
|
26
28
|
case type
|
27
29
|
when :info
|
30
|
+
bg = fg = nil
|
28
31
|
when :diff_success
|
29
32
|
bg = :green
|
30
33
|
fg = :black
|
@@ -46,47 +49,50 @@ class SiteDiff
|
|
46
49
|
def before
|
47
50
|
@config.before['url']
|
48
51
|
end
|
52
|
+
|
49
53
|
def after
|
50
54
|
@config.after['url']
|
51
55
|
end
|
52
56
|
|
53
|
-
def initialize(config, cache, verbose=true)
|
57
|
+
def initialize(config, cache, concurrency, verbose = true)
|
54
58
|
@cache = cache
|
55
59
|
@verbose = verbose
|
56
60
|
|
57
61
|
# Check for single-site mode
|
58
62
|
validate_opts = {}
|
59
63
|
if !config.before['url'] && @cache.tag?(:before)
|
60
|
-
|
61
|
-
|
62
|
-
|
64
|
+
unless @cache.read_tags.include?(:before)
|
65
|
+
raise SiteDiffException,
|
66
|
+
"A cached 'before' is required for single-site mode"
|
67
|
+
end
|
63
68
|
validate_opts[:need_before] = false
|
64
69
|
end
|
65
70
|
config.validate(validate_opts)
|
66
71
|
|
72
|
+
@concurrency = concurrency
|
67
73
|
@config = config
|
68
74
|
end
|
69
75
|
|
70
76
|
# Sanitize HTML
|
71
77
|
def sanitize(path, read_results)
|
72
|
-
[
|
78
|
+
%i[before after].map do |tag|
|
73
79
|
html = read_results[tag].content
|
74
80
|
config = @config.send(tag)
|
75
|
-
Sanitizer.new(html, config, :
|
81
|
+
Sanitizer.new(html, config, path: path).sanitize
|
76
82
|
end
|
77
83
|
end
|
78
84
|
|
79
85
|
# Process a set of read results
|
80
86
|
def process_results(path, read_results)
|
81
|
-
if error = read_results[:before].error || read_results[:after].error
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
87
|
+
diff = if (error = (read_results[:before].error || read_results[:after].error))
|
88
|
+
Result.new(path, nil, nil, error)
|
89
|
+
else
|
90
|
+
Result.new(path, *sanitize(path, read_results), nil)
|
91
|
+
end
|
86
92
|
@results[path] = diff
|
87
93
|
|
88
94
|
# Print results in order!
|
89
|
-
while next_diff = @results[@ordered.first]
|
95
|
+
while (next_diff = @results[@ordered.first])
|
90
96
|
next_diff.log(@verbose)
|
91
97
|
@ordered.shift
|
92
98
|
end
|
@@ -94,23 +100,28 @@ class SiteDiff
|
|
94
100
|
|
95
101
|
# Perform the comparison, populate @results and return the number of failing
|
96
102
|
# paths (paths with non-zero diff).
|
97
|
-
def run
|
103
|
+
def run(curl_opts = {})
|
98
104
|
# Map of path -> Result object, populated by process_results
|
99
105
|
@results = {}
|
100
106
|
@ordered = @config.paths.dup
|
101
107
|
|
102
108
|
unless @cache.read_tags.empty?
|
103
|
-
SiteDiff.log(
|
109
|
+
SiteDiff.log('Using sites from cache: ' +
|
104
110
|
@cache.read_tags.sort.join(', '))
|
105
111
|
end
|
106
112
|
|
107
|
-
|
108
|
-
|
109
|
-
|
113
|
+
# TODO: Fix this after config merge refactor!
|
114
|
+
# Not quite right. We are not passing @config.before or @config.after
|
115
|
+
# so passing this instead but @config.after['curl_opts'] is ignored.
|
116
|
+
config_curl_opts = @config.before['curl_opts']
|
117
|
+
curl_opts = config_curl_opts.clone.merge(curl_opts) if config_curl_opts
|
118
|
+
fetcher = Fetch.new(@cache, @config.paths, @concurrency, curl_opts,
|
119
|
+
before: before, after: after)
|
120
|
+
fetcher.run(&method(:process_results))
|
110
121
|
|
111
122
|
# Order by original path order
|
112
123
|
@results = @config.paths.map { |p| @results[p] }
|
113
|
-
|
124
|
+
results.map { |r| r unless r.success? }.compact.length
|
114
125
|
end
|
115
126
|
|
116
127
|
# Dump results to disk
|
@@ -124,23 +135,23 @@ class SiteDiff
|
|
124
135
|
diff_dir = dir + DIFFS_DIR
|
125
136
|
diff_dir.rmtree if diff_dir.exist?
|
126
137
|
results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
|
127
|
-
SiteDiff
|
138
|
+
SiteDiff.log "All diff files were dumped inside #{dir.expand_path}"
|
128
139
|
|
129
140
|
# store failing paths
|
130
141
|
failures = dir + FAILURES_FILE
|
131
|
-
SiteDiff
|
142
|
+
SiteDiff.log "Writing failures to #{failures.expand_path}"
|
132
143
|
failures.open('w') do |f|
|
133
144
|
results.each { |r| f.puts r.path unless r.success? }
|
134
145
|
end
|
135
146
|
|
136
147
|
# create report of results
|
137
|
-
report = Diff
|
138
|
-
|
148
|
+
report = Diff.generate_html_report(results, report_before, report_after,
|
149
|
+
@cache)
|
139
150
|
dir.+(REPORT_FILE).open('w') { |f| f.write(report) }
|
140
151
|
|
141
152
|
# serve some settings
|
142
153
|
settings = { 'before' => report_before, 'after' => report_after,
|
143
|
-
|
154
|
+
'cached' => %w[before after] }
|
144
155
|
dir.+(SETTINGS_FILE).open('w') { |f| YAML.dump(settings, f) }
|
145
156
|
end
|
146
157
|
end
|
data/lib/sitediff/cache.rb
CHANGED
@@ -1,61 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'set'
|
4
|
+
require 'fileutils'
|
2
5
|
|
3
6
|
class SiteDiff
|
4
|
-
class Cache
|
5
|
-
|
7
|
+
class Cache
|
8
|
+
attr_accessor :read_tags, :write_tags
|
6
9
|
|
7
|
-
|
10
|
+
def initialize(opts = {})
|
11
|
+
@dir = opts[:dir] || '.'
|
12
|
+
@create = opts[:create]
|
13
|
+
@read_tags = Set.new
|
14
|
+
@write_tags = Set.new
|
15
|
+
end
|
8
16
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
@write_tags = Set.new
|
14
|
-
end
|
17
|
+
# Is a tag cached?
|
18
|
+
def tag?(tag)
|
19
|
+
File.directory?(File.join(@dir, 'snapshot', tag.to_s))
|
20
|
+
end
|
15
21
|
|
16
|
-
|
22
|
+
def get(tag, path)
|
23
|
+
return nil unless @read_tags.include? tag
|
17
24
|
|
18
|
-
|
19
|
-
def tag?(tag)
|
20
|
-
open
|
21
|
-
@dbm[tag.to_s]
|
22
|
-
end
|
25
|
+
filename = File.join(@dir, 'snapshot', tag.to_s, *path.split(File::SEPARATOR))
|
23
26
|
|
24
|
-
|
25
|
-
|
26
|
-
open or return nil
|
27
|
-
val = @dbm[key(tag, path)]
|
28
|
-
return val && Marshal.load(val)
|
29
|
-
end
|
27
|
+
filename = File.join(filename, 'index.html') if File.directory?(filename)
|
28
|
+
return nil unless File.file? filename
|
30
29
|
|
31
|
-
|
32
|
-
|
33
|
-
open or return
|
34
|
-
@dbm[tag.to_s] = 'TRUE'
|
35
|
-
@dbm[key(tag, path)] = Marshal.dump(result)
|
36
|
-
end
|
30
|
+
Marshal.load(File.read(filename))
|
31
|
+
end
|
37
32
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
end
|
33
|
+
def set(tag, path, result)
|
34
|
+
return unless @write_tags.include? tag
|
35
|
+
|
36
|
+
filename = File.join(@dir, 'snapshot', tag.to_s, *path.split(File::SEPARATOR))
|
43
37
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
38
|
+
filename = File.join(filename, 'index.html') if File.directory?(filename)
|
39
|
+
filepath = Pathname.new(filename)
|
40
|
+
unless filepath.dirname.directory?
|
41
|
+
begin
|
42
|
+
filepath.dirname.mkpath
|
43
|
+
rescue Errno::EEXIST
|
44
|
+
curdir = filepath
|
45
|
+
curdir = curdir.parent until curdir.exist?
|
46
|
+
tempname = curdir.dirname + (curdir.basename.to_s + '.temporary')
|
47
|
+
# May cause problems if action is not atomic!
|
48
|
+
# Move existing file to dir/index.html first
|
49
|
+
# Not robust! Should generate an UUID or something.
|
50
|
+
SiteDiff.log "Overwriting file #{tempname}", :warn if File.exist?(tempname)
|
51
|
+
curdir.rename(tempname)
|
52
|
+
filepath.dirname.mkpath
|
53
|
+
# Should only happen in strange situations such as when the path
|
54
|
+
# is foo/index.html/bar (i.e., index.html is a directory)
|
55
|
+
SiteDiff.log "Overwriting file #{tempname}", :warn if (curdir + 'index.html').exist?
|
56
|
+
tempname.rename(curdir + 'index.html')
|
57
|
+
end
|
58
|
+
end
|
59
|
+
File.open(filename, 'w') { |file| file.write(Marshal.dump(result)) }
|
60
|
+
end
|
61
|
+
|
62
|
+
def key(tag, path)
|
63
|
+
# Ensure encoding stays the same!
|
64
|
+
Marshal.dump([tag, path.encode('UTF-8')])
|
57
65
|
end
|
58
|
-
return true
|
59
66
|
end
|
60
67
|
end
|
61
|
-
end
|
data/lib/sitediff/cli.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'thor'
|
2
4
|
require 'sitediff'
|
3
5
|
require 'sitediff/cache'
|
@@ -9,9 +11,18 @@ require 'sitediff/webserver/resultserver'
|
|
9
11
|
class SiteDiff
|
10
12
|
class Cli < Thor
|
11
13
|
class_option 'directory',
|
12
|
-
|
13
|
-
|
14
|
-
|
14
|
+
type: :string,
|
15
|
+
aliases: '-C',
|
16
|
+
default: 'sitediff',
|
17
|
+
desc: 'Configuration directory'
|
18
|
+
class_option :curl_options,
|
19
|
+
type: :hash,
|
20
|
+
default: {},
|
21
|
+
desc: 'Options to be passed to curl'
|
22
|
+
class_option :insecure,
|
23
|
+
type: :boolean,
|
24
|
+
default: false,
|
25
|
+
desc: 'Ignore many HTTPS/SSL errors'
|
15
26
|
|
16
27
|
# Thor, by default, exits with 0 no matter what!
|
17
28
|
def self.exit_on_failure?
|
@@ -19,67 +30,67 @@ class SiteDiff
|
|
19
30
|
end
|
20
31
|
|
21
32
|
# Thor, by default, does not raise an error for use of unknown options.
|
22
|
-
def self.check_unknown_options?(
|
33
|
+
def self.check_unknown_options?(_config)
|
23
34
|
true
|
24
35
|
end
|
25
36
|
|
26
|
-
option 'dump-dir',
|
27
|
-
:type => :string,
|
28
|
-
:default => File.join('.', 'output'),
|
29
|
-
:desc => "Location to write the output to."
|
30
37
|
option 'paths-file',
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
38
|
+
type: :string,
|
39
|
+
desc: 'Paths are read (one at a line) from PATHS: ' \
|
40
|
+
'useful for iterating over sanitization rules',
|
41
|
+
aliases: '--paths-from-file'
|
35
42
|
option 'paths',
|
36
|
-
|
37
|
-
|
38
|
-
|
43
|
+
type: :array,
|
44
|
+
aliases: '-p',
|
45
|
+
desc: 'Specific path or paths to fetch'
|
39
46
|
option 'before',
|
40
|
-
|
41
|
-
|
42
|
-
|
47
|
+
type: :string,
|
48
|
+
desc: 'URL used to fetch the before HTML. Acts as a prefix to specified paths',
|
49
|
+
aliases: '--before-url'
|
43
50
|
option 'after',
|
44
|
-
|
45
|
-
|
46
|
-
|
51
|
+
type: :string,
|
52
|
+
desc: 'URL used to fetch the after HTML. Acts as a prefix to specified paths.',
|
53
|
+
aliases: '--after-url'
|
47
54
|
option 'before-report',
|
48
|
-
|
49
|
-
|
50
|
-
|
55
|
+
type: :string,
|
56
|
+
desc: 'Before URL to use for reporting purposes. Useful if port forwarding.',
|
57
|
+
aliases: '--before-url-report'
|
51
58
|
option 'after-report',
|
52
|
-
|
53
|
-
|
54
|
-
|
59
|
+
type: :string,
|
60
|
+
desc: 'After URL to use for reporting purposes. Useful if port forwarding.',
|
61
|
+
aliases: '--after-url-report'
|
55
62
|
option 'cached',
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
option '
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
63
|
+
type: :string,
|
64
|
+
enum: %w[none all before after],
|
65
|
+
default: 'before',
|
66
|
+
desc: 'Use the cached version of these sites, if available.'
|
67
|
+
option 'verbose',
|
68
|
+
type: :boolean,
|
69
|
+
aliases: '-v',
|
70
|
+
default: false,
|
71
|
+
desc: 'Show differences between versions for each page in terminal'
|
72
|
+
option :concurrency,
|
73
|
+
type: :numeric,
|
74
|
+
default: 3,
|
75
|
+
desc: 'Max number of concurrent connections made'
|
76
|
+
desc 'diff [OPTIONS] [CONFIGFILES]', 'Perform systematic diff on given URLs'
|
66
77
|
def diff(*config_files)
|
67
|
-
config =
|
78
|
+
config = SiteDiff::Config.new(config_files, options[:directory])
|
68
79
|
|
69
80
|
# override config based on options
|
70
81
|
paths = options['paths']
|
71
|
-
if paths_file = options['paths-file']
|
72
|
-
if paths
|
73
|
-
SiteDiff
|
74
|
-
exit
|
82
|
+
if (paths_file = options['paths-file'])
|
83
|
+
if paths
|
84
|
+
SiteDiff.log "Can't have both --paths-file and --paths", :error
|
85
|
+
exit(-1)
|
75
86
|
end
|
76
87
|
|
77
88
|
paths_file = Pathname.new(paths_file).expand_path
|
78
|
-
unless File.
|
89
|
+
unless File.exist? paths_file
|
79
90
|
raise Config::InvalidConfig,
|
80
|
-
|
91
|
+
"Paths file '#{paths_file}' not found!"
|
81
92
|
end
|
82
|
-
SiteDiff
|
93
|
+
SiteDiff.log "Reading paths from: #{paths_file}"
|
83
94
|
config.paths = File.readlines(paths_file)
|
84
95
|
end
|
85
96
|
config.paths = paths if paths
|
@@ -88,21 +99,21 @@ class SiteDiff
|
|
88
99
|
config.after['url'] = options['after'] if options['after']
|
89
100
|
|
90
101
|
# Setup cache
|
91
|
-
cache = SiteDiff::Cache.new(:
|
102
|
+
cache = SiteDiff::Cache.new(create: options['cached'] != 'none',
|
103
|
+
dir: options['directory'])
|
92
104
|
cache.read_tags << :before if %w[before all].include?(options['cached'])
|
93
105
|
cache.read_tags << :after if %w[after all].include?(options['cached'])
|
94
106
|
cache.write_tags << :before << :after
|
95
107
|
|
96
|
-
sitediff = SiteDiff.new(config, cache,
|
97
|
-
|
98
|
-
|
108
|
+
sitediff = SiteDiff.new(config, cache, options[:concurrency],
|
109
|
+
options['verbose'])
|
110
|
+
num_failing = sitediff.run(get_curl_opts(options))
|
111
|
+
exit_code = num_failing > 0 ? 2 : 0
|
99
112
|
|
100
|
-
sitediff.dump(options['
|
101
|
-
|
113
|
+
sitediff.dump(options['directory'], options['before-report'],
|
114
|
+
options['after-report'])
|
102
115
|
rescue Config::InvalidConfig => e
|
103
116
|
SiteDiff.log "Invalid configuration: #{e.message}", :error
|
104
|
-
rescue SiteDiffException => e
|
105
|
-
SiteDiff.log e.message, :error
|
106
117
|
else # no exception was raised
|
107
118
|
# Thor::Error --> exit(1), guaranteed by exit_on_failure?
|
108
119
|
# Failing diff --> exit(2), populated above
|
@@ -110,62 +121,61 @@ class SiteDiff
|
|
110
121
|
end
|
111
122
|
|
112
123
|
option :port,
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
option 'dump-dir',
|
117
|
-
:type => :string,
|
118
|
-
:default => 'output',
|
119
|
-
:desc => 'The directory to serve'
|
124
|
+
type: :numeric,
|
125
|
+
default: SiteDiff::Webserver::DEFAULT_PORT,
|
126
|
+
desc: 'The port to serve on'
|
120
127
|
option :browse,
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
desc
|
128
|
+
type: :boolean,
|
129
|
+
default: true,
|
130
|
+
desc: 'Whether to open the served content in your browser'
|
131
|
+
desc 'serve [OPTIONS]', 'Serve the sitediff output directory over HTTP'
|
125
132
|
def serve(*config_files)
|
126
|
-
config =
|
133
|
+
config = SiteDiff::Config.new(config_files, options['directory'])
|
134
|
+
# Could check non-empty config here but currently errors are already raised.
|
127
135
|
|
128
|
-
cache = Cache.new
|
136
|
+
cache = Cache.new(dir: options['directory'])
|
129
137
|
cache.read_tags << :before << :after
|
130
138
|
|
131
139
|
SiteDiff::Webserver::ResultServer.new(
|
132
140
|
options[:port],
|
133
|
-
options['
|
134
|
-
:
|
135
|
-
:
|
136
|
-
:
|
141
|
+
options['directory'],
|
142
|
+
browse: options[:browse],
|
143
|
+
cache: cache,
|
144
|
+
config: config
|
137
145
|
).wait
|
146
|
+
rescue SiteDiffException => e
|
147
|
+
SiteDiff.log e.message, :error
|
138
148
|
end
|
139
149
|
|
140
|
-
option :output,
|
141
|
-
:type => :string,
|
142
|
-
:default => 'sitediff',
|
143
|
-
:desc => 'Directory in which to place the configuration',
|
144
|
-
:aliases => ['-o']
|
145
150
|
option :depth,
|
146
|
-
|
147
|
-
|
148
|
-
|
151
|
+
type: :numeric,
|
152
|
+
default: 3,
|
153
|
+
desc: 'How deeply to crawl the given site'
|
149
154
|
option :rules,
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
+
type: :string,
|
156
|
+
enum: %w[yes no disabled],
|
157
|
+
default: 'disabled',
|
158
|
+
desc: 'Whether rules for the site should be auto-created'
|
159
|
+
option :concurrency,
|
160
|
+
type: :numeric,
|
161
|
+
default: 3,
|
162
|
+
desc: 'Max number of concurrent connections made'
|
163
|
+
desc 'init URL [URL]', 'Create a sitediff configuration'
|
155
164
|
def init(*urls)
|
156
|
-
unless (1..2).
|
157
|
-
SiteDiff.log
|
165
|
+
unless (1..2).cover? urls.size
|
166
|
+
SiteDiff.log 'sitediff init requires one or two URLs', :error
|
158
167
|
exit 2
|
159
168
|
end
|
160
169
|
|
161
|
-
|
162
|
-
|
170
|
+
curl_opts = get_curl_opts(options)
|
171
|
+
|
172
|
+
creator = SiteDiff::Config::Creator.new(options[:concurrency], curl_opts, *urls)
|
163
173
|
creator.create(
|
164
|
-
:
|
165
|
-
:
|
166
|
-
:
|
167
|
-
:
|
168
|
-
) do |
|
174
|
+
depth: options[:depth],
|
175
|
+
directory: options[:directory],
|
176
|
+
rules: options[:rules] != 'no',
|
177
|
+
rules_disabled: (options[:rules] == 'disabled')
|
178
|
+
) do |_tag, info|
|
169
179
|
SiteDiff.log "Visited #{info.uri}, cached"
|
170
180
|
end
|
171
181
|
|
@@ -174,37 +184,40 @@ class SiteDiff
|
|
174
184
|
end
|
175
185
|
|
176
186
|
option :url,
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
187
|
+
type: :string,
|
188
|
+
desc: 'A custom base URL to fetch from'
|
189
|
+
option :concurrency,
|
190
|
+
type: :numeric,
|
191
|
+
default: 3,
|
192
|
+
desc: 'Max number of concurrent connections made'
|
193
|
+
desc 'store [CONFIGFILES]',
|
194
|
+
'Cache the current contents of a site for later comparison'
|
181
195
|
def store(*config_files)
|
182
|
-
config =
|
183
|
-
config.validate(:
|
196
|
+
config = SiteDiff::Config.new(config_files, options['directory'])
|
197
|
+
config.validate(need_before: false)
|
184
198
|
|
185
|
-
cache = SiteDiff::Cache.new(:
|
199
|
+
cache = SiteDiff::Cache.new(create: true)
|
186
200
|
cache.write_tags << :before
|
187
201
|
|
188
202
|
base = options[:url] || config.after['url']
|
189
|
-
fetcher = SiteDiff::Fetch.new(cache, config.paths,
|
190
|
-
|
203
|
+
fetcher = SiteDiff::Fetch.new(cache, config.paths, options['concurrency'],
|
204
|
+
before: base)
|
205
|
+
fetcher.run do |path, _res|
|
191
206
|
SiteDiff.log "Visited #{path}, cached"
|
192
207
|
end
|
193
208
|
end
|
194
209
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
raise if opts[:config]
|
207
|
-
# If no config required, allow it to pass
|
210
|
+
no_commands do
|
211
|
+
def get_curl_opts(options)
|
212
|
+
# We do want string keys here
|
213
|
+
bool_hash = { 'true' => true, 'false' => false }
|
214
|
+
curl_opts = UriWrapper::DEFAULT_CURL_OPTS.clone.merge(options[:curl_options])
|
215
|
+
curl_opts.each { |k, v| curl_opts[k] = bool_hash.fetch(v, v) }
|
216
|
+
if options[:insecure]
|
217
|
+
curl_opts[:ssl_verifypeer] = false
|
218
|
+
curl_opts[:ssl_verifyhost] = 0
|
219
|
+
end
|
220
|
+
curl_opts
|
208
221
|
end
|
209
222
|
end
|
210
223
|
end
|