sitediff 0.0.6 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/bin/sitediff +9 -2
- data/lib/sitediff.rb +126 -81
- data/lib/sitediff/cache.rb +35 -6
- data/lib/sitediff/cli.rb +254 -119
- data/lib/sitediff/config.rb +362 -29
- data/lib/sitediff/config/creator.rb +53 -71
- data/lib/sitediff/config/preset.rb +75 -0
- data/lib/sitediff/crawler.rb +11 -15
- data/lib/sitediff/diff.rb +28 -9
- data/lib/sitediff/fetch.rb +9 -2
- data/lib/sitediff/files/diff.html.erb +20 -2
- data/lib/sitediff/files/jquery.min.js +2 -0
- data/lib/sitediff/files/normalize.css +349 -0
- data/lib/sitediff/files/report.html.erb +144 -0
- data/lib/sitediff/files/sidebyside.html.erb +5 -2
- data/lib/sitediff/files/sitediff.css +226 -30
- data/lib/sitediff/files/sitediff.js +176 -0
- data/lib/sitediff/report.rb +238 -0
- data/lib/sitediff/result.rb +47 -19
- data/lib/sitediff/sanitize.rb +29 -8
- data/lib/sitediff/sanitize/dom_transform.rb +45 -6
- data/lib/sitediff/sanitize/regexp.rb +23 -2
- data/lib/sitediff/uriwrapper.rb +56 -15
- data/lib/sitediff/webserver.rb +12 -3
- data/lib/sitediff/webserver/resultserver.rb +28 -33
- metadata +33 -16
- data/lib/sitediff/files/html_report.html.erb +0 -66
- data/lib/sitediff/files/rules/drupal.yaml +0 -63
- data/lib/sitediff/rules.rb +0 -65
data/lib/sitediff/config.rb
CHANGED
@@ -1,17 +1,63 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'sitediff/config/preset'
|
3
4
|
require 'sitediff/exception'
|
4
5
|
require 'sitediff/sanitize'
|
5
6
|
require 'pathname'
|
6
7
|
require 'yaml'
|
7
8
|
|
8
9
|
class SiteDiff
|
10
|
+
# SiteDiff Configuration.
|
9
11
|
class Config
|
12
|
+
# Default config file.
|
10
13
|
DEFAULT_FILENAME = 'sitediff.yaml'
|
11
14
|
|
12
|
-
#
|
13
|
-
|
14
|
-
|
15
|
+
# Default paths file.
|
16
|
+
DEFAULT_PATHS_FILENAME = 'paths.txt'
|
17
|
+
|
18
|
+
# Default SiteDiff config.
|
19
|
+
DEFAULT_CONFIG = {
|
20
|
+
'settings' => {
|
21
|
+
'depth' => 3,
|
22
|
+
'interval' => 0,
|
23
|
+
'whitelist' => '',
|
24
|
+
'blacklist' => '',
|
25
|
+
'concurrency' => 3,
|
26
|
+
'preset' => nil
|
27
|
+
},
|
28
|
+
'before' => {},
|
29
|
+
'after' => {},
|
30
|
+
'paths' => []
|
31
|
+
}.freeze
|
32
|
+
|
33
|
+
# Keys allowed in config files.
|
34
|
+
# TODO: Deprecate repeated params before_url and after_url.
|
35
|
+
# TODO: Create a method self.supports
|
36
|
+
# TODO: Deprecate in favor of self.supports key, subkey, subkey...
|
37
|
+
ALLOWED_CONFIG_KEYS = Sanitizer::TOOLS.values.flatten(1) + %w[
|
38
|
+
includes
|
39
|
+
settings
|
40
|
+
before
|
41
|
+
after
|
42
|
+
before_url
|
43
|
+
after_url
|
44
|
+
ignore_whitespace
|
45
|
+
export
|
46
|
+
]
|
47
|
+
|
48
|
+
##
|
49
|
+
# Keys allowed in the "settings" key.
|
50
|
+
# TODO: Create a method self.supports
|
51
|
+
# TODO: Deprecate in favor of self.supports key, subkey, subkey...
|
52
|
+
ALLOWED_SETTINGS_KEYS = %w[
|
53
|
+
preset
|
54
|
+
depth
|
55
|
+
whitelist
|
56
|
+
blacklist
|
57
|
+
concurrency
|
58
|
+
interval
|
59
|
+
curl_opts
|
60
|
+
].freeze
|
15
61
|
|
16
62
|
class InvalidConfig < SiteDiffException; end
|
17
63
|
class ConfigNotFound < SiteDiffException; end
|
@@ -36,7 +82,7 @@ class SiteDiff
|
|
36
82
|
def self.normalize(conf)
|
37
83
|
tools = Sanitizer::TOOLS
|
38
84
|
|
39
|
-
#
|
85
|
+
# Merge globals
|
40
86
|
%w[before after].each do |pos|
|
41
87
|
conf[pos] ||= {}
|
42
88
|
tools[:array].each do |key|
|
@@ -47,10 +93,11 @@ class SiteDiff
|
|
47
93
|
conf[pos]['url'] ||= conf[pos + '_url']
|
48
94
|
conf[pos]['curl_opts'] = conf['curl_opts']
|
49
95
|
end
|
50
|
-
|
96
|
+
|
97
|
+
# Normalize paths.
|
51
98
|
conf['paths'] = Config.normalize_paths(conf['paths'])
|
52
99
|
|
53
|
-
conf.select { |k, _v|
|
100
|
+
conf.select { |k, _v| ALLOWED_CONFIG_KEYS.include? k }
|
54
101
|
end
|
55
102
|
|
56
103
|
# Merges two normalized Hashes according to the following rules:
|
@@ -67,65 +114,342 @@ class SiteDiff
|
|
67
114
|
# (h2) before: {selector: bar, sanitization: [pattern: bar]}
|
68
115
|
# (h3) before: {selector: foo, sanitization: [pattern: foo, pattern: bar]}
|
69
116
|
def self.merge(first, second)
|
70
|
-
result = {
|
71
|
-
|
117
|
+
result = {
|
118
|
+
'before' => {},
|
119
|
+
'after' => {},
|
120
|
+
'settings' => {}
|
121
|
+
}
|
122
|
+
|
123
|
+
# Merge sanitization rules.
|
124
|
+
Sanitizer::TOOLS.values.flatten(1).each do |key|
|
125
|
+
result[key] = second[key] || first[key]
|
126
|
+
result.delete(key) unless result[key]
|
127
|
+
end
|
128
|
+
|
129
|
+
# Rule 1.
|
72
130
|
%w[before after].each do |pos|
|
131
|
+
first[pos] ||= {}
|
132
|
+
second[pos] ||= {}
|
133
|
+
|
134
|
+
# If only the second hash has the value.
|
73
135
|
unless first[pos]
|
74
136
|
result[pos] = second[pos] || {}
|
75
137
|
next
|
76
138
|
end
|
139
|
+
|
77
140
|
result[pos] = first[pos].merge!(second[pos]) do |key, a, b|
|
78
|
-
|
141
|
+
# Rule 2a.
|
142
|
+
result[pos][key] = if Sanitizer::TOOLS[:array].include? key
|
79
143
|
(a || []) + (b || [])
|
144
|
+
elsif key == 'settings'
|
145
|
+
b
|
80
146
|
else
|
81
|
-
a || b #
|
147
|
+
a || b # Rule 2b.
|
82
148
|
end
|
83
149
|
end
|
84
150
|
end
|
151
|
+
|
152
|
+
# Merge settings.
|
153
|
+
result['settings'] = merge_deep(
|
154
|
+
first['settings'] || {},
|
155
|
+
second['settings'] || {}
|
156
|
+
)
|
157
|
+
|
85
158
|
result
|
86
159
|
end
|
87
160
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
161
|
+
##
|
162
|
+
# Merges 2 iterable objects deeply.
|
163
|
+
def self.merge_deep(first, second)
|
164
|
+
first.merge(second) do |_key, val1, val2|
|
165
|
+
if val1.is_a? Hash
|
166
|
+
self.class.merge_deep(val1, val2 || {})
|
167
|
+
elsif val1.is_a? Array
|
168
|
+
val1 + (val2 || [])
|
169
|
+
else
|
170
|
+
val2
|
96
171
|
end
|
97
|
-
@config = Config.merge(@config, Config.load_conf(file))
|
98
172
|
end
|
99
173
|
end
|
100
174
|
|
101
|
-
|
102
|
-
|
175
|
+
##
|
176
|
+
# Gets all loaded configuration except defaults.
|
177
|
+
#
|
178
|
+
# @return [Hash]
|
179
|
+
# Config data.
|
180
|
+
def all
|
181
|
+
result = Marshal.load(Marshal.dump(@config))
|
182
|
+
self.class.remove_defaults(result)
|
183
|
+
end
|
184
|
+
|
185
|
+
##
|
186
|
+
# Removes default parameters from a config hash.
|
187
|
+
#
|
188
|
+
# I know this is weird, but it'll be fixed. The config management needs to
|
189
|
+
# be streamlined further.
|
190
|
+
def self.remove_defaults(data)
|
191
|
+
# Create a deep copy of the config data.
|
192
|
+
result = data
|
193
|
+
|
194
|
+
# Exclude default settings.
|
195
|
+
result['settings'].delete_if do |key, value|
|
196
|
+
value == DEFAULT_CONFIG['settings'][key] || !value
|
197
|
+
end
|
198
|
+
|
199
|
+
# Exclude default curl opts.
|
200
|
+
result['settings']['curl_opts'] ||= {}
|
201
|
+
result['settings']['curl_opts'].delete_if do |key, value|
|
202
|
+
value == UriWrapper::DEFAULT_CURL_OPTS[key.to_sym]
|
203
|
+
end
|
204
|
+
|
205
|
+
# Delete curl opts if empty.
|
206
|
+
unless result['settings']['curl_opts'].length.positive?
|
207
|
+
result['settings'].delete('curl_opts')
|
208
|
+
end
|
209
|
+
|
210
|
+
result
|
211
|
+
end
|
212
|
+
|
213
|
+
# Creates a SiteDiff Config object.
|
214
|
+
def initialize(file, directory)
|
215
|
+
# Fallback to default config filename, if none is specified.
|
216
|
+
file = File.join(directory, DEFAULT_FILENAME) if file.nil?
|
217
|
+
unless File.exist?(file)
|
218
|
+
path = File.expand_path(file)
|
219
|
+
raise InvalidConfig, "Missing config file #{path}."
|
220
|
+
end
|
221
|
+
@config = Config.merge(DEFAULT_CONFIG, Config.load_conf(file))
|
222
|
+
@file = file
|
223
|
+
@directory = directory
|
224
|
+
|
225
|
+
# Validate configurations.
|
226
|
+
validate
|
227
|
+
end
|
228
|
+
|
229
|
+
# Get "before" site configuration.
|
230
|
+
def before(apply_preset = false)
|
231
|
+
section :before, apply_preset
|
232
|
+
end
|
233
|
+
|
234
|
+
# Get "before" site URL.
|
235
|
+
def before_url
|
236
|
+
result = before
|
237
|
+
result['url'] if result
|
103
238
|
end
|
104
239
|
|
105
|
-
|
106
|
-
|
240
|
+
# Get "after" site configuration.
|
241
|
+
def after(apply_preset = false)
|
242
|
+
section :after, apply_preset
|
107
243
|
end
|
108
244
|
|
245
|
+
# Get "after" site URL.
|
246
|
+
def after_url
|
247
|
+
result = after
|
248
|
+
result['url'] if result
|
249
|
+
end
|
250
|
+
|
251
|
+
# Get paths.
|
109
252
|
def paths
|
110
253
|
@config['paths']
|
111
254
|
end
|
112
255
|
|
256
|
+
# Set paths.
|
113
257
|
def paths=(paths)
|
258
|
+
raise 'Paths must be an Array' unless paths.is_a? Array
|
259
|
+
|
114
260
|
@config['paths'] = Config.normalize_paths(paths)
|
115
261
|
end
|
116
262
|
|
263
|
+
# Get ignore_whitespace option
|
264
|
+
def ignore_whitespace
|
265
|
+
@config['ignore_whitespace']
|
266
|
+
end
|
267
|
+
|
268
|
+
# Set ignore_whitespace option
|
269
|
+
def ignore_whitespace=(ignore_whitespace)
|
270
|
+
@config['ignore_whitespace'] = ignore_whitespace
|
271
|
+
end
|
272
|
+
|
273
|
+
# Get export option
|
274
|
+
def export
|
275
|
+
@config['export']
|
276
|
+
end
|
277
|
+
|
278
|
+
# Set export option
|
279
|
+
def export=(export)
|
280
|
+
@config['export'] = export
|
281
|
+
end
|
282
|
+
|
283
|
+
##
|
284
|
+
# Writes an array of paths to a file.
|
285
|
+
#
|
286
|
+
# @param [Array] paths
|
287
|
+
# An array of paths.
|
288
|
+
# @param [String] file
|
289
|
+
# Optional path to a file.
|
290
|
+
def paths_file_write(paths, file = nil)
|
291
|
+
unless paths.is_a?(Array) && paths.length.positive?
|
292
|
+
raise SiteDiffException, 'Write failed. Invalid paths.'
|
293
|
+
end
|
294
|
+
|
295
|
+
file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)
|
296
|
+
File.open(file, 'w+') { |f| f.puts(paths) }
|
297
|
+
end
|
298
|
+
|
299
|
+
##
|
300
|
+
# Reads a collection of paths from a file.
|
301
|
+
#
|
302
|
+
# @param [String] file
|
303
|
+
# A file containing one path per line.
|
304
|
+
#
|
305
|
+
# @return [Integer]
|
306
|
+
# Number of paths read.
|
307
|
+
def paths_file_read(file = nil)
|
308
|
+
file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)
|
309
|
+
|
310
|
+
unless File.exist? file
|
311
|
+
raise Config::InvalidConfig, "File not found: #{file}"
|
312
|
+
end
|
313
|
+
|
314
|
+
self.paths = File.readlines(file)
|
315
|
+
|
316
|
+
# Return the number of paths.
|
317
|
+
paths.length
|
318
|
+
end
|
319
|
+
|
320
|
+
##
|
321
|
+
# Get roots.
|
322
|
+
#
|
323
|
+
# Example: If the config has a "before" and "after" sections, then roots
|
324
|
+
# will be ["before", "after"].
|
325
|
+
def roots
|
326
|
+
@roots = { 'after' => after_url }
|
327
|
+
@roots['before'] = before_url if before
|
328
|
+
@roots
|
329
|
+
end
|
330
|
+
|
331
|
+
##
|
332
|
+
# Gets a setting.
|
333
|
+
#
|
334
|
+
# @param [String] key
|
335
|
+
# A key.
|
336
|
+
#
|
337
|
+
# @return [*]
|
338
|
+
# A value, if exists.
|
339
|
+
def setting(key)
|
340
|
+
key = key.to_s if key.is_a?(Symbol)
|
341
|
+
return @config['settings'][key] if @config['settings'].key?(key)
|
342
|
+
end
|
343
|
+
|
344
|
+
##
|
345
|
+
# Gets all settings.
|
346
|
+
#
|
347
|
+
# TODO: Make sure the settings are not writable.
|
348
|
+
#
|
349
|
+
# @return [Hash]
|
350
|
+
# All settings.
|
351
|
+
def settings
|
352
|
+
@config['settings']
|
353
|
+
end
|
354
|
+
|
117
355
|
# Checks if the configuration is usable for diff-ing.
|
356
|
+
# TODO: Do we actually need the opts argument?
|
118
357
|
def validate(opts = {})
|
119
358
|
opts = { need_before: true }.merge(opts)
|
120
359
|
|
121
|
-
|
122
|
-
|
360
|
+
if opts[:need_before] && !before['url']
|
361
|
+
raise InvalidConfig, "Undefined 'before' base URL."
|
362
|
+
end
|
363
|
+
|
123
364
|
raise InvalidConfig, "Undefined 'after' base URL." unless after['url']
|
124
|
-
|
365
|
+
|
366
|
+
# Validate interval and concurrency.
|
367
|
+
interval = setting(:interval)
|
368
|
+
concurrency = setting(:concurrency)
|
369
|
+
if interval.to_i != 0 && concurrency != 1
|
370
|
+
raise InvalidConfig, 'Concurrency must be 1 when an interval is set.'
|
371
|
+
end
|
372
|
+
|
373
|
+
# Validate preset.
|
374
|
+
Preset.exist? setting(:preset), true if setting(:preset)
|
375
|
+
end
|
376
|
+
|
377
|
+
##
|
378
|
+
# Returns object clone with stringified keys.
|
379
|
+
# TODO: Make this method available globally, if required.
|
380
|
+
def self.stringify_keys(object)
|
381
|
+
# Do nothing if it is not an object.
|
382
|
+
return object unless object.respond_to?('each_key')
|
383
|
+
|
384
|
+
# Convert symbol indices to strings.
|
385
|
+
output = {}
|
386
|
+
object.each_key do |old_k|
|
387
|
+
new_k = old_k.is_a?(Symbol) ? old_k.to_s : old_k
|
388
|
+
output[new_k] = stringify_keys object[old_k]
|
389
|
+
end
|
390
|
+
|
391
|
+
# Return the new hash with string indices.
|
392
|
+
output
|
393
|
+
end
|
394
|
+
|
395
|
+
##
|
396
|
+
# Creates a RegExp from a string.
|
397
|
+
def self.create_regexp(string_param)
|
398
|
+
begin
|
399
|
+
@return_value = string_param == '' ? nil : Regexp.new(string_param)
|
400
|
+
rescue SiteDiffException => e
|
401
|
+
@return_value = nil
|
402
|
+
SiteDiff.log 'Invalid RegExp: ' + string_param, :error
|
403
|
+
SiteDiff.log e.message, :error
|
404
|
+
# TODO: Use SiteDiff.log type :debug
|
405
|
+
# SiteDiff.log e.backtrace, :error if options[:verbose]
|
406
|
+
end
|
407
|
+
@return_value
|
125
408
|
end
|
126
409
|
|
127
410
|
private
|
128
411
|
|
412
|
+
##
|
413
|
+
# Returns one of the "before" or "after" sections.
|
414
|
+
#
|
415
|
+
# @param [String|Symbol]
|
416
|
+
# Section name. Example: before, after.
|
417
|
+
# @param [Boolean] with_preset
|
418
|
+
# Whether to merge with preset config (if any).
|
419
|
+
#
|
420
|
+
# @return [Hash|Nil]
|
421
|
+
# Section data or Nil.
|
422
|
+
def section(name, with_preset = false)
|
423
|
+
name = name.to_s if name.is_a? Symbol
|
424
|
+
|
425
|
+
# Validate section.
|
426
|
+
unless %w[before after].include? name
|
427
|
+
raise SiteDiffException, '"name" must be one of "before" or "after".'
|
428
|
+
end
|
429
|
+
|
430
|
+
# Return nil if section is not defined.
|
431
|
+
return nil unless @config[name]
|
432
|
+
|
433
|
+
result = @config[name]
|
434
|
+
|
435
|
+
# Merge preset rules, if required.
|
436
|
+
preset = setting(:preset)
|
437
|
+
if with_preset && !preset.nil?
|
438
|
+
preset_config = Preset.read preset
|
439
|
+
|
440
|
+
# Merge plugins with array values.
|
441
|
+
# TODO: This won't be required after plugin declarations are improved.
|
442
|
+
# See https://rm.ewdev.ca/issues/18301
|
443
|
+
Sanitizer::TOOLS[:array].each do |key|
|
444
|
+
if preset_config[key]
|
445
|
+
result[key] = (result[key] || []) + preset_config[key]
|
446
|
+
end
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
result
|
451
|
+
end
|
452
|
+
|
129
453
|
def self.normalize_paths(paths)
|
130
454
|
paths ||= []
|
131
455
|
paths.map { |p| (p[0] == '/' ? p : "/#{p}").chomp }
|
@@ -133,13 +457,20 @@ class SiteDiff
|
|
133
457
|
|
134
458
|
# reads a YAML file and raises an InvalidConfig if the file is not valid.
|
135
459
|
def self.load_raw_yaml(file)
|
460
|
+
# TODO: Only show this in verbose mode.
|
136
461
|
SiteDiff.log "Reading config file: #{Pathname.new(file).expand_path}"
|
137
462
|
conf = YAML.load_file(file) || {}
|
138
|
-
|
463
|
+
|
464
|
+
unless conf.is_a? Hash
|
465
|
+
raise InvalidConfig, "Invalid configuration file: '#{file}'"
|
466
|
+
end
|
139
467
|
|
140
468
|
conf.each_key do |k, _v|
|
141
|
-
|
469
|
+
unless ALLOWED_CONFIG_KEYS.include? k
|
470
|
+
raise InvalidConfig, "Unknown configuration key (#{file}): '#{k}'"
|
471
|
+
end
|
142
472
|
end
|
473
|
+
|
143
474
|
conf
|
144
475
|
end
|
145
476
|
|
@@ -148,7 +479,9 @@ class SiteDiff
|
|
148
479
|
def self.load_conf(file, visited = [])
|
149
480
|
# don't get fooled by a/../a/ or symlinks
|
150
481
|
file = File.realpath(file)
|
151
|
-
|
482
|
+
if visited.include? file
|
483
|
+
raise InvalidConfig, "Circular dependency: #{file}"
|
484
|
+
end
|
152
485
|
|
153
486
|
conf = load_raw_yaml(file) # not normalized yet
|
154
487
|
visited << file
|