sitediff 0.0.6 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.eslintignore +1 -0
- data/.eslintrc.json +28 -0
- data/.project +11 -0
- data/.rubocop.yml +179 -0
- data/.rubocop_todo.yml +51 -0
- data/CHANGELOG.md +28 -0
- data/Dockerfile +33 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +85 -0
- data/INSTALLATION.md +146 -0
- data/LICENSE +339 -0
- data/README.md +810 -0
- data/Rakefile +12 -0
- data/Thorfile +135 -0
- data/bin/sitediff +9 -2
- data/config/.gitkeep +0 -0
- data/config/sanitize_domains.example.yaml +8 -0
- data/config/sitediff.example.yaml +81 -0
- data/docker-compose.test.yml +3 -0
- data/lib/sitediff/api.rb +276 -0
- data/lib/sitediff/cache.rb +57 -8
- data/lib/sitediff/cli.rb +156 -176
- data/lib/sitediff/config/creator.rb +61 -77
- data/lib/sitediff/config/preset.rb +75 -0
- data/lib/sitediff/config.rb +436 -31
- data/lib/sitediff/crawler.rb +27 -21
- data/lib/sitediff/diff.rb +32 -9
- data/lib/sitediff/fetch.rb +10 -3
- data/lib/sitediff/files/diff.html.erb +20 -2
- data/lib/sitediff/files/jquery.min.js +2 -0
- data/lib/sitediff/files/normalize.css +349 -0
- data/lib/sitediff/files/report.html.erb +171 -0
- data/lib/sitediff/files/sidebyside.html.erb +5 -2
- data/lib/sitediff/files/sitediff.css +303 -30
- data/lib/sitediff/files/sitediff.js +367 -0
- data/lib/sitediff/presets/drupal.yaml +63 -0
- data/lib/sitediff/report.rb +254 -0
- data/lib/sitediff/result.rb +50 -20
- data/lib/sitediff/sanitize/dom_transform.rb +47 -8
- data/lib/sitediff/sanitize/regexp.rb +24 -3
- data/lib/sitediff/sanitize.rb +81 -12
- data/lib/sitediff/uriwrapper.rb +65 -23
- data/lib/sitediff/webserver/resultserver.rb +30 -33
- data/lib/sitediff/webserver.rb +15 -3
- data/lib/sitediff.rb +130 -83
- data/misc/sitediff - overview report.png +0 -0
- data/misc/sitediff - page report.png +0 -0
- data/package-lock.json +878 -0
- data/package.json +25 -0
- data/sitediff.gemspec +51 -0
- metadata +91 -29
- data/lib/sitediff/files/html_report.html.erb +0 -66
- data/lib/sitediff/files/rules/drupal.yaml +0 -63
- data/lib/sitediff/rules.rb +0 -65
@@ -0,0 +1,75 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'pathname'
|
4
|
+
require 'sitediff/config'
|
5
|
+
|
6
|
+
class SiteDiff
|
7
|
+
class Config
|
8
|
+
##
|
9
|
+
# Preset helper.
|
10
|
+
class Preset
|
11
|
+
##
|
12
|
+
# Directory in which presets live.
|
13
|
+
#
|
14
|
+
# TODO: Move this outside "lib".
|
15
|
+
DIRECTORY = "#{Pathname.new(__dir__).dirname}/presets".freeze
|
16
|
+
|
17
|
+
##
|
18
|
+
# Reads preset rules.
|
19
|
+
#
|
20
|
+
# @param [String] preset
|
21
|
+
# Presets
|
22
|
+
#
|
23
|
+
# @return [Hash]
|
24
|
+
# A hash containing the preset's rules.
|
25
|
+
def self.read(name)
|
26
|
+
@cache = {} if @cache.nil?
|
27
|
+
|
28
|
+
# Load and cache preset config.
|
29
|
+
if @cache[name].nil?
|
30
|
+
exist? name, exception: true
|
31
|
+
@cache[name] = Config.load_conf file(name)
|
32
|
+
end
|
33
|
+
|
34
|
+
@cache[name]
|
35
|
+
end
|
36
|
+
|
37
|
+
##
|
38
|
+
# Get all possible rules.
|
39
|
+
#
|
40
|
+
# @return [Array]
|
41
|
+
# All presets.
|
42
|
+
def self.all
|
43
|
+
# Load and cache preset names.
|
44
|
+
if @all.nil?
|
45
|
+
@all = []
|
46
|
+
pattern = "#{DIRECTORY}/*.yaml"
|
47
|
+
Dir.glob(pattern) do |file|
|
48
|
+
@all << File.basename(file, '.yaml')
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
@all
|
53
|
+
end
|
54
|
+
|
55
|
+
##
|
56
|
+
# Checks whether a preset exists.
|
57
|
+
def self.exist?(name, exception: false)
|
58
|
+
result = File.exist?(file(name))
|
59
|
+
|
60
|
+
# Raise an exception, if required.
|
61
|
+
if exception && !result
|
62
|
+
raise Config::InvalidConfig, "Preset not found: #{name}"
|
63
|
+
end
|
64
|
+
|
65
|
+
result
|
66
|
+
end
|
67
|
+
|
68
|
+
##
|
69
|
+
# Returns the path to a preset file.
|
70
|
+
def self.file(name)
|
71
|
+
DIRECTORY + "/#{name}.yaml"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/lib/sitediff/config.rb
CHANGED
@@ -1,21 +1,71 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'sitediff/config/preset'
|
3
4
|
require 'sitediff/exception'
|
4
5
|
require 'sitediff/sanitize'
|
5
6
|
require 'pathname'
|
6
7
|
require 'yaml'
|
7
8
|
|
8
9
|
class SiteDiff
|
10
|
+
# SiteDiff Configuration.
|
9
11
|
class Config
|
12
|
+
# Default config file.
|
10
13
|
DEFAULT_FILENAME = 'sitediff.yaml'
|
11
14
|
|
12
|
-
#
|
13
|
-
|
14
|
-
|
15
|
+
# Default paths file.
|
16
|
+
DEFAULT_PATHS_FILENAME = 'paths.txt'
|
17
|
+
|
18
|
+
# Default SiteDiff config.
|
19
|
+
DEFAULT_CONFIG = {
|
20
|
+
'settings' => {
|
21
|
+
'depth' => 3,
|
22
|
+
'interval' => 0,
|
23
|
+
'include' => '',
|
24
|
+
'exclude' => '',
|
25
|
+
'concurrency' => 3,
|
26
|
+
'preset' => nil
|
27
|
+
},
|
28
|
+
'before' => {},
|
29
|
+
'after' => {},
|
30
|
+
'paths' => []
|
31
|
+
}.freeze
|
32
|
+
|
33
|
+
# Keys allowed in config files.
|
34
|
+
# TODO: Deprecate repeated params before_url and after_url.
|
35
|
+
# TODO: Create a method self.supports
|
36
|
+
# TODO: Deprecate in favor of self.supports key, subkey, subkey...
|
37
|
+
ALLOWED_CONFIG_KEYS = Sanitizer::TOOLS.values.flatten(1) + %w[
|
38
|
+
includes
|
39
|
+
settings
|
40
|
+
before
|
41
|
+
after
|
42
|
+
before_url
|
43
|
+
after_url
|
44
|
+
ignore_whitespace
|
45
|
+
export
|
46
|
+
output
|
47
|
+
report
|
48
|
+
]
|
49
|
+
|
50
|
+
##
|
51
|
+
# Keys allowed in the "settings" key.
|
52
|
+
# TODO: Create a method self.supports
|
53
|
+
# TODO: Deprecate in favor of self.supports key, subkey, subkey...
|
54
|
+
ALLOWED_SETTINGS_KEYS = %w[
|
55
|
+
preset
|
56
|
+
depth
|
57
|
+
include
|
58
|
+
exclude
|
59
|
+
concurrency
|
60
|
+
interval
|
61
|
+
curl_opts
|
62
|
+
].freeze
|
15
63
|
|
16
64
|
class InvalidConfig < SiteDiffException; end
|
17
65
|
class ConfigNotFound < SiteDiffException; end
|
18
66
|
|
67
|
+
attr_reader :directory
|
68
|
+
|
19
69
|
# Takes a Hash and normalizes it to the following form by merging globals
|
20
70
|
# into before and after. A normalized config Hash looks like this:
|
21
71
|
#
|
@@ -25,6 +75,12 @@ class SiteDiff
|
|
25
75
|
# before:
|
26
76
|
# url: http://before
|
27
77
|
# selector: body
|
78
|
+
# ## Note: use either `selector` or `regions`, but not both
|
79
|
+
# regions:
|
80
|
+
# - name: title
|
81
|
+
# selector: .field-name-title h2
|
82
|
+
# - name: body
|
83
|
+
# selector: .field-name-field-news-description .field-item
|
28
84
|
# dom_transform:
|
29
85
|
# - type: remove
|
30
86
|
# selector: script
|
@@ -33,10 +89,17 @@ class SiteDiff
|
|
33
89
|
# url: http://after
|
34
90
|
# selector: body
|
35
91
|
#
|
92
|
+
# ## Note: use `output` only with `regions`
|
93
|
+
# output:
|
94
|
+
# - title
|
95
|
+
# - author
|
96
|
+
# - source
|
97
|
+
# - body
|
98
|
+
#
|
36
99
|
def self.normalize(conf)
|
37
100
|
tools = Sanitizer::TOOLS
|
38
101
|
|
39
|
-
#
|
102
|
+
# Merge globals
|
40
103
|
%w[before after].each do |pos|
|
41
104
|
conf[pos] ||= {}
|
42
105
|
tools[:array].each do |key|
|
@@ -44,13 +107,14 @@ class SiteDiff
|
|
44
107
|
conf[pos][key] += conf[key] if conf[key]
|
45
108
|
end
|
46
109
|
tools[:scalar].each { |key| conf[pos][key] ||= conf[key] }
|
47
|
-
conf[pos]['url'] ||= conf[pos
|
110
|
+
conf[pos]['url'] ||= conf["pos#{_url}"] if defined?(_url)
|
48
111
|
conf[pos]['curl_opts'] = conf['curl_opts']
|
49
112
|
end
|
50
|
-
|
113
|
+
|
114
|
+
# Normalize paths.
|
51
115
|
conf['paths'] = Config.normalize_paths(conf['paths'])
|
52
116
|
|
53
|
-
conf.select { |k, _v|
|
117
|
+
conf.select { |k, _v| ALLOWED_CONFIG_KEYS.include? k }
|
54
118
|
end
|
55
119
|
|
56
120
|
# Merges two normalized Hashes according to the following rules:
|
@@ -67,65 +131,397 @@ class SiteDiff
|
|
67
131
|
# (h2) before: {selector: bar, sanitization: [pattern: bar]}
|
68
132
|
# (h3) before: {selector: foo, sanitization: [pattern: foo, pattern: bar]}
|
69
133
|
def self.merge(first, second)
|
70
|
-
result = {
|
71
|
-
|
134
|
+
result = {
|
135
|
+
'before' => {},
|
136
|
+
'after' => {},
|
137
|
+
'output' => [],
|
138
|
+
'settings' => {}
|
139
|
+
}
|
140
|
+
|
141
|
+
# Merge sanitization rules.
|
142
|
+
Sanitizer::TOOLS.values.flatten(1).each do |key|
|
143
|
+
result[key] = second[key] || first[key]
|
144
|
+
result.delete(key) unless result[key]
|
145
|
+
end
|
146
|
+
|
147
|
+
# Rule 1.
|
72
148
|
%w[before after].each do |pos|
|
149
|
+
first[pos] ||= {}
|
150
|
+
second[pos] ||= {}
|
151
|
+
|
152
|
+
# If only the second hash has the value.
|
73
153
|
unless first[pos]
|
74
154
|
result[pos] = second[pos] || {}
|
75
155
|
next
|
76
156
|
end
|
157
|
+
|
77
158
|
result[pos] = first[pos].merge!(second[pos]) do |key, a, b|
|
78
|
-
|
159
|
+
# Rule 2a.
|
160
|
+
result[pos][key] = if Sanitizer::TOOLS[:array].include? key
|
79
161
|
(a || []) + (b || [])
|
162
|
+
elsif key == 'settings'
|
163
|
+
b
|
80
164
|
else
|
81
|
-
a || b #
|
165
|
+
a || b # Rule 2b.
|
82
166
|
end
|
83
167
|
end
|
84
168
|
end
|
169
|
+
|
170
|
+
# Merge output array.
|
171
|
+
result['output'] += (first['output'] || []) + (second['output'] || [])
|
172
|
+
|
173
|
+
# Merge url_report keys.
|
174
|
+
%w[before_url_report after_url_report].each do |pos|
|
175
|
+
result[pos] = first[pos] || second[pos]
|
176
|
+
end
|
177
|
+
|
178
|
+
# Merge settings.
|
179
|
+
result['settings'] = merge_deep(
|
180
|
+
first['settings'] || {},
|
181
|
+
second['settings'] || {}
|
182
|
+
)
|
183
|
+
|
184
|
+
# Merge report labels.
|
185
|
+
result['report'] = merge_deep(
|
186
|
+
first['report'] || {},
|
187
|
+
second['report'] || {}
|
188
|
+
)
|
189
|
+
|
85
190
|
result
|
86
191
|
end
|
87
192
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
193
|
+
##
|
194
|
+
# Merges 2 iterable objects deeply.
|
195
|
+
def self.merge_deep(first, second)
|
196
|
+
first.merge(second) do |_key, val1, val2|
|
197
|
+
case val1.class
|
198
|
+
when Hash
|
199
|
+
self.class.merge_deep(val1, val2 || {})
|
200
|
+
when Array
|
201
|
+
val1 + (val2 || [])
|
202
|
+
else
|
203
|
+
val2
|
96
204
|
end
|
97
|
-
@config = Config.merge(@config, Config.load_conf(file))
|
98
205
|
end
|
99
206
|
end
|
100
207
|
|
101
|
-
|
102
|
-
|
208
|
+
##
|
209
|
+
# Gets all loaded configuration except defaults.
|
210
|
+
#
|
211
|
+
# @return [Hash]
|
212
|
+
# Config data.
|
213
|
+
def all
|
214
|
+
result = Marshal.load(Marshal.dump(@config))
|
215
|
+
self.class.remove_defaults(result)
|
103
216
|
end
|
104
217
|
|
105
|
-
|
106
|
-
|
218
|
+
##
|
219
|
+
# Removes default parameters from a config hash.
|
220
|
+
#
|
221
|
+
# I know this is weird, but it'll be fixed. The config management needs to
|
222
|
+
# be streamlined further.
|
223
|
+
def self.remove_defaults(data)
|
224
|
+
# Create a deep copy of the config data.
|
225
|
+
result = data
|
226
|
+
|
227
|
+
# Exclude default settings.
|
228
|
+
result['settings'].delete_if do |key, value|
|
229
|
+
value == DEFAULT_CONFIG['settings'][key] || !value
|
230
|
+
end
|
231
|
+
|
232
|
+
# Exclude default curl opts.
|
233
|
+
result['settings']['curl_opts'] ||= {}
|
234
|
+
result['settings']['curl_opts'].delete_if do |key, value|
|
235
|
+
value == UriWrapper::DEFAULT_CURL_OPTS[key.to_sym]
|
236
|
+
end
|
237
|
+
|
238
|
+
# Delete curl opts if empty.
|
239
|
+
unless result['settings']['curl_opts'].length.positive?
|
240
|
+
result['settings'].delete('curl_opts')
|
241
|
+
end
|
242
|
+
|
243
|
+
result
|
107
244
|
end
|
108
245
|
|
246
|
+
# Creates a SiteDiff Config object.
|
247
|
+
def initialize(file, directory)
|
248
|
+
# Fallback to default config filename, if none is specified.
|
249
|
+
file = File.join(directory, DEFAULT_FILENAME) if file.nil?
|
250
|
+
unless File.exist?(file)
|
251
|
+
path = File.expand_path(file)
|
252
|
+
raise InvalidConfig, "Missing config file #{path}."
|
253
|
+
end
|
254
|
+
@config = Config.merge(DEFAULT_CONFIG, Config.load_conf(file))
|
255
|
+
@file = file
|
256
|
+
@directory = directory
|
257
|
+
|
258
|
+
# Validate configurations.
|
259
|
+
validate
|
260
|
+
end
|
261
|
+
|
262
|
+
# Get "before" site configuration.
|
263
|
+
def before(apply_preset: false)
|
264
|
+
section(:before, with_preset: apply_preset)
|
265
|
+
end
|
266
|
+
|
267
|
+
# Get "before" site URL.
|
268
|
+
def before_url
|
269
|
+
result = before
|
270
|
+
result['url'] if result
|
271
|
+
end
|
272
|
+
|
273
|
+
# Get "after" site configuration.
|
274
|
+
def after(apply_preset: false)
|
275
|
+
section(:after, with_preset: apply_preset)
|
276
|
+
end
|
277
|
+
|
278
|
+
# Get "after" site URL.
|
279
|
+
def after_url
|
280
|
+
result = after
|
281
|
+
result['url'] if result
|
282
|
+
end
|
283
|
+
|
284
|
+
# Get paths.
|
109
285
|
def paths
|
110
286
|
@config['paths']
|
111
287
|
end
|
112
288
|
|
289
|
+
# Set paths.
|
113
290
|
def paths=(paths)
|
291
|
+
raise 'Paths must be an Array' unless paths.is_a? Array
|
292
|
+
|
114
293
|
@config['paths'] = Config.normalize_paths(paths)
|
115
294
|
end
|
116
295
|
|
296
|
+
# Get ignore_whitespace option
|
297
|
+
def ignore_whitespace
|
298
|
+
@config['ignore_whitespace']
|
299
|
+
end
|
300
|
+
|
301
|
+
# Set ignore_whitespace option
|
302
|
+
def ignore_whitespace=(ignore_whitespace)
|
303
|
+
@config['ignore_whitespace'] = ignore_whitespace
|
304
|
+
end
|
305
|
+
|
306
|
+
# Get export option
|
307
|
+
def export
|
308
|
+
@config['export']
|
309
|
+
end
|
310
|
+
|
311
|
+
# Set export option
|
312
|
+
def export=(export)
|
313
|
+
@config['export'] = export
|
314
|
+
end
|
315
|
+
|
316
|
+
# Get output option
|
317
|
+
def output
|
318
|
+
@config['output']
|
319
|
+
end
|
320
|
+
|
321
|
+
# Set output option
|
322
|
+
def output=(output)
|
323
|
+
raise 'Output must be an Array' unless output.is_a? Array
|
324
|
+
|
325
|
+
@config['output'] = output
|
326
|
+
end
|
327
|
+
|
328
|
+
# Return report display settings.
|
329
|
+
def report
|
330
|
+
@config['report']
|
331
|
+
end
|
332
|
+
|
333
|
+
# Set crawl time for 'before'
|
334
|
+
def before_time=(time)
|
335
|
+
@config['report']['before_time'] = time
|
336
|
+
end
|
337
|
+
|
338
|
+
# Set crawl time for 'after'
|
339
|
+
def after_time=(time)
|
340
|
+
@config['report']['after_time'] = time
|
341
|
+
end
|
342
|
+
|
343
|
+
##
|
344
|
+
# Writes an array of paths to a file.
|
345
|
+
#
|
346
|
+
# @param [Array] paths
|
347
|
+
# An array of paths.
|
348
|
+
# @param [String] file
|
349
|
+
# Optional path to a file.
|
350
|
+
def paths_file_write(paths, file = nil)
|
351
|
+
unless paths.is_a?(Array) && paths.length.positive?
|
352
|
+
raise SiteDiffException, 'Write failed. Invalid paths.'
|
353
|
+
end
|
354
|
+
|
355
|
+
file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)
|
356
|
+
File.open(file, 'w+') { |f| f.puts(paths) }
|
357
|
+
end
|
358
|
+
|
359
|
+
##
|
360
|
+
# Reads a collection of paths from a file.
|
361
|
+
#
|
362
|
+
# @param [String] file
|
363
|
+
# A file containing one path per line.
|
364
|
+
#
|
365
|
+
# @return [Integer]
|
366
|
+
# Number of paths read.
|
367
|
+
def paths_file_read(file = nil)
|
368
|
+
file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)
|
369
|
+
|
370
|
+
unless File.exist? file
|
371
|
+
raise Config::InvalidConfig, "File not found: #{file}"
|
372
|
+
end
|
373
|
+
|
374
|
+
self.paths = File.readlines(file)
|
375
|
+
|
376
|
+
# Return the number of paths.
|
377
|
+
paths.length
|
378
|
+
end
|
379
|
+
|
380
|
+
##
|
381
|
+
# Get roots.
|
382
|
+
#
|
383
|
+
# Example: If the config has a "before" and "after" sections, then roots
|
384
|
+
# will be ["before", "after"].
|
385
|
+
def roots
|
386
|
+
@roots = { 'after' => after_url }
|
387
|
+
@roots['before'] = before_url if before
|
388
|
+
@roots
|
389
|
+
end
|
390
|
+
|
391
|
+
##
|
392
|
+
# Gets a setting.
|
393
|
+
#
|
394
|
+
# @param [String] key
|
395
|
+
# A key.
|
396
|
+
#
|
397
|
+
# @return [*]
|
398
|
+
# A value, if exists.
|
399
|
+
def setting(key)
|
400
|
+
key = key.to_s if key.is_a?(Symbol)
|
401
|
+
return @config['settings'][key] if @config['settings'].key?(key)
|
402
|
+
end
|
403
|
+
|
404
|
+
##
|
405
|
+
# Gets all settings.
|
406
|
+
#
|
407
|
+
# TODO: Make sure the settings are not writable.
|
408
|
+
#
|
409
|
+
# @return [Hash]
|
410
|
+
# All settings.
|
411
|
+
def settings
|
412
|
+
@config['settings']
|
413
|
+
end
|
414
|
+
|
117
415
|
# Checks if the configuration is usable for diff-ing.
|
416
|
+
# TODO: Do we actually need the opts argument?
|
118
417
|
def validate(opts = {})
|
119
418
|
opts = { need_before: true }.merge(opts)
|
120
419
|
|
121
|
-
|
122
|
-
|
420
|
+
if opts[:need_before] && !before['url']
|
421
|
+
raise InvalidConfig, "Undefined 'before' base URL."
|
422
|
+
end
|
423
|
+
|
123
424
|
raise InvalidConfig, "Undefined 'after' base URL." unless after['url']
|
124
|
-
|
425
|
+
|
426
|
+
# Validate interval and concurrency.
|
427
|
+
interval = setting(:interval)
|
428
|
+
concurrency = setting(:concurrency)
|
429
|
+
if interval.to_i != 0 && concurrency != 1
|
430
|
+
raise InvalidConfig, 'Concurrency must be 1 when an interval is set.'
|
431
|
+
end
|
432
|
+
|
433
|
+
# Validate preset.
|
434
|
+
Preset.exist? setting(:preset), exception: true if setting(:preset)
|
435
|
+
end
|
436
|
+
|
437
|
+
##
|
438
|
+
# Returns object clone with stringified keys.
|
439
|
+
# TODO: Make this method available globally, if required.
|
440
|
+
def self.stringify_keys(object)
|
441
|
+
# Do nothing if it is not an object.
|
442
|
+
return object unless object.respond_to?('each_key')
|
443
|
+
|
444
|
+
# Convert symbol indices to strings.
|
445
|
+
output = {}
|
446
|
+
object.each_key do |old_k|
|
447
|
+
new_k = old_k.is_a?(Symbol) ? old_k.to_s : old_k
|
448
|
+
output[new_k] = stringify_keys object[old_k]
|
449
|
+
end
|
450
|
+
|
451
|
+
# Return the new hash with string indices.
|
452
|
+
output
|
453
|
+
end
|
454
|
+
|
455
|
+
##
|
456
|
+
# Creates a RegExp from a string.
|
457
|
+
def self.create_regexp(string_param)
|
458
|
+
begin
|
459
|
+
@return_value = string_param == '' ? nil : Regexp.new(string_param)
|
460
|
+
rescue SiteDiffException => e
|
461
|
+
@return_value = nil
|
462
|
+
SiteDiff.log "Invalid RegExp: #{string_param}", :error
|
463
|
+
SiteDiff.log e.message, :error
|
464
|
+
# TODO: Use SiteDiff.log type :debug
|
465
|
+
# SiteDiff.log e.backtrace, :error if options[:verbose]
|
466
|
+
end
|
467
|
+
@return_value
|
468
|
+
end
|
469
|
+
|
470
|
+
##
|
471
|
+
# Return merged CURL options.
|
472
|
+
def curl_opts
|
473
|
+
# We do want string keys here
|
474
|
+
bool_hash = { 'true' => true, 'false' => false }
|
475
|
+
curl_opts = UriWrapper::DEFAULT_CURL_OPTS
|
476
|
+
.clone
|
477
|
+
.merge(settings['curl_opts'] || {})
|
478
|
+
curl_opts.each { |k, v| curl_opts[k] = bool_hash.fetch(v, v) }
|
479
|
+
curl_opts
|
125
480
|
end
|
126
481
|
|
127
482
|
private
|
128
483
|
|
484
|
+
##
|
485
|
+
# Returns one of the "before" or "after" sections.
|
486
|
+
#
|
487
|
+
# @param [String|Symbol]
|
488
|
+
# Section name. Example: before, after.
|
489
|
+
# @param [Boolean] with_preset
|
490
|
+
# Whether to merge with preset config (if any).
|
491
|
+
#
|
492
|
+
# @return [Hash|Nil]
|
493
|
+
# Section data or Nil.
|
494
|
+
def section(name, with_preset: false)
|
495
|
+
name = name.to_s if name.is_a? Symbol
|
496
|
+
|
497
|
+
# Validate section.
|
498
|
+
unless %w[before after].include? name
|
499
|
+
raise SiteDiffException, '"name" must be one of "before" or "after".'
|
500
|
+
end
|
501
|
+
|
502
|
+
# Return nil if section is not defined.
|
503
|
+
return nil unless @config[name]
|
504
|
+
|
505
|
+
result = @config[name]
|
506
|
+
|
507
|
+
# Merge preset rules, if required.
|
508
|
+
preset = setting(:preset)
|
509
|
+
if with_preset && !preset.nil?
|
510
|
+
preset_config = Preset.read preset
|
511
|
+
|
512
|
+
# Merge plugins with array values.
|
513
|
+
# TODO: This won't be required after plugin declarations are improved.
|
514
|
+
# See https://rm.ewdev.ca/issues/18301
|
515
|
+
Sanitizer::TOOLS[:array].each do |key|
|
516
|
+
if preset_config[key]
|
517
|
+
result[key] = (result[key] || []) + preset_config[key]
|
518
|
+
end
|
519
|
+
end
|
520
|
+
end
|
521
|
+
|
522
|
+
result
|
523
|
+
end
|
524
|
+
|
129
525
|
def self.normalize_paths(paths)
|
130
526
|
paths ||= []
|
131
527
|
paths.map { |p| (p[0] == '/' ? p : "/#{p}").chomp }
|
@@ -133,13 +529,20 @@ class SiteDiff
|
|
133
529
|
|
134
530
|
# reads a YAML file and raises an InvalidConfig if the file is not valid.
|
135
531
|
def self.load_raw_yaml(file)
|
532
|
+
# TODO: Only show this in verbose mode.
|
136
533
|
SiteDiff.log "Reading config file: #{Pathname.new(file).expand_path}"
|
137
|
-
conf = YAML.load_file(file) || {}
|
138
|
-
|
534
|
+
conf = YAML.load_file(file, permitted_classes: [Regexp]) || {}
|
535
|
+
|
536
|
+
unless conf.is_a? Hash
|
537
|
+
raise InvalidConfig, "Invalid configuration file: '#{file}'"
|
538
|
+
end
|
139
539
|
|
140
540
|
conf.each_key do |k, _v|
|
141
|
-
|
541
|
+
unless ALLOWED_CONFIG_KEYS.include? k
|
542
|
+
raise InvalidConfig, "Unknown configuration key (#{file}): '#{k}'"
|
543
|
+
end
|
142
544
|
end
|
545
|
+
|
143
546
|
conf
|
144
547
|
end
|
145
548
|
|
@@ -148,7 +551,9 @@ class SiteDiff
|
|
148
551
|
def self.load_conf(file, visited = [])
|
149
552
|
# don't get fooled by a/../a/ or symlinks
|
150
553
|
file = File.realpath(file)
|
151
|
-
|
554
|
+
if visited.include? file
|
555
|
+
raise InvalidConfig, "Circular dependency: #{file}"
|
556
|
+
end
|
152
557
|
|
153
558
|
conf = load_raw_yaml(file) # not normalized yet
|
154
559
|
visited << file
|