sitediff 0.0.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/bin/sitediff +10 -4
- data/lib/sitediff.rb +179 -91
- data/lib/sitediff/cache.rb +106 -0
- data/lib/sitediff/cli.rb +391 -60
- data/lib/sitediff/config.rb +383 -37
- data/lib/sitediff/config/creator.rb +114 -0
- data/lib/sitediff/config/preset.rb +75 -0
- data/lib/sitediff/crawler.rb +131 -0
- data/lib/sitediff/diff.rb +57 -12
- data/lib/sitediff/exception.rb +5 -0
- data/lib/sitediff/fetch.rb +76 -0
- data/lib/sitediff/files/diff.html.erb +20 -2
- data/lib/sitediff/files/jquery.min.js +2 -0
- data/lib/sitediff/files/normalize.css +349 -0
- data/lib/sitediff/files/report.html.erb +144 -0
- data/lib/sitediff/files/sidebyside.html.erb +16 -0
- data/lib/sitediff/files/sitediff.css +236 -29
- data/lib/sitediff/files/sitediff.js +176 -0
- data/lib/sitediff/report.rb +238 -0
- data/lib/sitediff/result.rb +63 -26
- data/lib/sitediff/sanitize.rb +160 -141
- data/lib/sitediff/sanitize/dom_transform.rb +130 -0
- data/lib/sitediff/sanitize/regexp.rb +82 -0
- data/lib/sitediff/uriwrapper.rb +114 -35
- data/lib/sitediff/webserver.rb +94 -0
- data/lib/sitediff/webserver/resultserver.rb +134 -0
- metadata +103 -43
- data/lib/sitediff/files/html_report.html.erb +0 -47
- data/lib/sitediff/util/cache.rb +0 -32
- data/lib/sitediff/util/webserver.rb +0 -77
data/lib/sitediff/config.rb
CHANGED
@@ -1,13 +1,66 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'sitediff/config/preset'
|
4
|
+
require 'sitediff/exception'
|
5
|
+
require 'sitediff/sanitize'
|
6
|
+
require 'pathname'
|
1
7
|
require 'yaml'
|
2
8
|
|
3
9
|
class SiteDiff
|
10
|
+
# SiteDiff Configuration.
|
4
11
|
class Config
|
12
|
+
# Default config file.
|
13
|
+
DEFAULT_FILENAME = 'sitediff.yaml'
|
14
|
+
|
15
|
+
# Default paths file.
|
16
|
+
DEFAULT_PATHS_FILENAME = 'paths.txt'
|
17
|
+
|
18
|
+
# Default SiteDiff config.
|
19
|
+
DEFAULT_CONFIG = {
|
20
|
+
'settings' => {
|
21
|
+
'depth' => 3,
|
22
|
+
'interval' => 0,
|
23
|
+
'whitelist' => '',
|
24
|
+
'blacklist' => '',
|
25
|
+
'concurrency' => 3,
|
26
|
+
'preset' => nil
|
27
|
+
},
|
28
|
+
'before' => {},
|
29
|
+
'after' => {},
|
30
|
+
'paths' => []
|
31
|
+
}.freeze
|
32
|
+
|
33
|
+
# Keys allowed in config files.
|
34
|
+
# TODO: Deprecate repeated params before_url and after_url.
|
35
|
+
# TODO: Create a method self.supports
|
36
|
+
# TODO: Deprecate in favor of self.supports key, subkey, subkey...
|
37
|
+
ALLOWED_CONFIG_KEYS = Sanitizer::TOOLS.values.flatten(1) + %w[
|
38
|
+
includes
|
39
|
+
settings
|
40
|
+
before
|
41
|
+
after
|
42
|
+
before_url
|
43
|
+
after_url
|
44
|
+
ignore_whitespace
|
45
|
+
export
|
46
|
+
]
|
5
47
|
|
6
|
-
|
7
|
-
|
8
|
-
|
48
|
+
##
|
49
|
+
# Keys allowed in the "settings" key.
|
50
|
+
# TODO: Create a method self.supports
|
51
|
+
# TODO: Deprecate in favor of self.supports key, subkey, subkey...
|
52
|
+
ALLOWED_SETTINGS_KEYS = %w[
|
53
|
+
preset
|
54
|
+
depth
|
55
|
+
whitelist
|
56
|
+
blacklist
|
57
|
+
concurrency
|
58
|
+
interval
|
59
|
+
curl_opts
|
60
|
+
].freeze
|
9
61
|
|
10
|
-
class InvalidConfig <
|
62
|
+
class InvalidConfig < SiteDiffException; end
|
63
|
+
class ConfigNotFound < SiteDiffException; end
|
11
64
|
|
12
65
|
# Takes a Hash and normalizes it to the following form by merging globals
|
13
66
|
# into before and after. A normalized config Hash looks like this:
|
@@ -27,22 +80,24 @@ class SiteDiff
|
|
27
80
|
# selector: body
|
28
81
|
#
|
29
82
|
def self.normalize(conf)
|
30
|
-
tools =
|
83
|
+
tools = Sanitizer::TOOLS
|
31
84
|
|
32
|
-
#
|
85
|
+
# Merge globals
|
33
86
|
%w[before after].each do |pos|
|
34
87
|
conf[pos] ||= {}
|
35
88
|
tools[:array].each do |key|
|
36
89
|
conf[pos][key] ||= []
|
37
90
|
conf[pos][key] += conf[key] if conf[key]
|
38
91
|
end
|
39
|
-
tools[:scalar].each {|key| conf[pos][key] ||= conf[key]}
|
92
|
+
tools[:scalar].each { |key| conf[pos][key] ||= conf[key] }
|
40
93
|
conf[pos]['url'] ||= conf[pos + '_url']
|
94
|
+
conf[pos]['curl_opts'] = conf['curl_opts']
|
41
95
|
end
|
42
|
-
# normalize paths
|
43
|
-
conf['paths'] = Config::normalize_paths(conf['paths'])
|
44
96
|
|
45
|
-
|
97
|
+
# Normalize paths.
|
98
|
+
conf['paths'] = Config.normalize_paths(conf['paths'])
|
99
|
+
|
100
|
+
conf.select { |k, _v| ALLOWED_CONFIG_KEYS.include? k }
|
46
101
|
end
|
47
102
|
|
48
103
|
# Merges two normalized Hashes according to the following rules:
|
@@ -59,77 +114,369 @@ class SiteDiff
|
|
59
114
|
# (h2) before: {selector: bar, sanitization: [pattern: bar]}
|
60
115
|
# (h3) before: {selector: foo, sanitization: [pattern: foo, pattern: bar]}
|
61
116
|
def self.merge(first, second)
|
62
|
-
result = {
|
63
|
-
|
117
|
+
result = {
|
118
|
+
'before' => {},
|
119
|
+
'after' => {},
|
120
|
+
'settings' => {}
|
121
|
+
}
|
122
|
+
|
123
|
+
# Merge sanitization rules.
|
124
|
+
Sanitizer::TOOLS.values.flatten(1).each do |key|
|
125
|
+
result[key] = second[key] || first[key]
|
126
|
+
result.delete(key) unless result[key]
|
127
|
+
end
|
128
|
+
|
129
|
+
# Rule 1.
|
64
130
|
%w[before after].each do |pos|
|
131
|
+
first[pos] ||= {}
|
132
|
+
second[pos] ||= {}
|
133
|
+
|
134
|
+
# If only the second hash has the value.
|
65
135
|
unless first[pos]
|
66
136
|
result[pos] = second[pos] || {}
|
67
137
|
next
|
68
138
|
end
|
139
|
+
|
69
140
|
result[pos] = first[pos].merge!(second[pos]) do |key, a, b|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
141
|
+
# Rule 2a.
|
142
|
+
result[pos][key] = if Sanitizer::TOOLS[:array].include? key
|
143
|
+
(a || []) + (b || [])
|
144
|
+
elsif key == 'settings'
|
145
|
+
b
|
146
|
+
else
|
147
|
+
a || b # Rule 2b.
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
# Merge settings.
|
153
|
+
result['settings'] = merge_deep(
|
154
|
+
first['settings'] || {},
|
155
|
+
second['settings'] || {}
|
156
|
+
)
|
157
|
+
|
158
|
+
result
|
159
|
+
end
|
160
|
+
|
161
|
+
##
|
162
|
+
# Merges 2 iterable objects deeply.
|
163
|
+
def self.merge_deep(first, second)
|
164
|
+
first.merge(second) do |_key, val1, val2|
|
165
|
+
if val1.is_a? Hash
|
166
|
+
self.class.merge_deep(val1, val2 || {})
|
167
|
+
elsif val1.is_a? Array
|
168
|
+
val1 + (val2 || [])
|
169
|
+
else
|
170
|
+
val2
|
75
171
|
end
|
76
172
|
end
|
173
|
+
end
|
174
|
+
|
175
|
+
##
|
176
|
+
# Gets all loaded configuration except defaults.
|
177
|
+
#
|
178
|
+
# @return [Hash]
|
179
|
+
# Config data.
|
180
|
+
def all
|
181
|
+
result = Marshal.load(Marshal.dump(@config))
|
182
|
+
self.class.remove_defaults(result)
|
183
|
+
end
|
184
|
+
|
185
|
+
##
|
186
|
+
# Removes default parameters from a config hash.
|
187
|
+
#
|
188
|
+
# I know this is weird, but it'll be fixed. The config management needs to
|
189
|
+
# be streamlined further.
|
190
|
+
def self.remove_defaults(data)
|
191
|
+
# Create a deep copy of the config data.
|
192
|
+
result = data
|
193
|
+
|
194
|
+
# Exclude default settings.
|
195
|
+
result['settings'].delete_if do |key, value|
|
196
|
+
value == DEFAULT_CONFIG['settings'][key] || !value
|
197
|
+
end
|
198
|
+
|
199
|
+
# Exclude default curl opts.
|
200
|
+
result['settings']['curl_opts'] ||= {}
|
201
|
+
result['settings']['curl_opts'].delete_if do |key, value|
|
202
|
+
value == UriWrapper::DEFAULT_CURL_OPTS[key.to_sym]
|
203
|
+
end
|
204
|
+
|
205
|
+
# Delete curl opts if empty.
|
206
|
+
unless result['settings']['curl_opts'].length.positive?
|
207
|
+
result['settings'].delete('curl_opts')
|
208
|
+
end
|
209
|
+
|
77
210
|
result
|
78
211
|
end
|
79
212
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
213
|
+
# Creates a SiteDiff Config object.
|
214
|
+
def initialize(file, directory)
|
215
|
+
# Fallback to default config filename, if none is specified.
|
216
|
+
file = File.join(directory, DEFAULT_FILENAME) if file.nil?
|
217
|
+
unless File.exist?(file)
|
218
|
+
path = File.expand_path(file)
|
219
|
+
raise InvalidConfig, "Missing config file #{path}."
|
84
220
|
end
|
221
|
+
@config = Config.merge(DEFAULT_CONFIG, Config.load_conf(file))
|
222
|
+
@file = file
|
223
|
+
@directory = directory
|
224
|
+
|
225
|
+
# Validate configurations.
|
226
|
+
validate
|
85
227
|
end
|
86
228
|
|
87
|
-
|
88
|
-
|
229
|
+
# Get "before" site configuration.
|
230
|
+
def before(apply_preset = false)
|
231
|
+
section :before, apply_preset
|
89
232
|
end
|
90
|
-
|
91
|
-
|
233
|
+
|
234
|
+
# Get "before" site URL.
|
235
|
+
def before_url
|
236
|
+
result = before
|
237
|
+
result['url'] if result
|
92
238
|
end
|
93
239
|
|
240
|
+
# Get "after" site configuration.
|
241
|
+
def after(apply_preset = false)
|
242
|
+
section :after, apply_preset
|
243
|
+
end
|
244
|
+
|
245
|
+
# Get "after" site URL.
|
246
|
+
def after_url
|
247
|
+
result = after
|
248
|
+
result['url'] if result
|
249
|
+
end
|
250
|
+
|
251
|
+
# Get paths.
|
94
252
|
def paths
|
95
253
|
@config['paths']
|
96
254
|
end
|
255
|
+
|
256
|
+
# Set paths.
|
97
257
|
def paths=(paths)
|
98
|
-
|
258
|
+
raise 'Paths must be an Array' unless paths.is_a? Array
|
259
|
+
|
260
|
+
@config['paths'] = Config.normalize_paths(paths)
|
261
|
+
end
|
262
|
+
|
263
|
+
# Get ignore_whitespace option
|
264
|
+
def ignore_whitespace
|
265
|
+
@config['ignore_whitespace']
|
266
|
+
end
|
267
|
+
|
268
|
+
# Set ignore_whitespace option
|
269
|
+
def ignore_whitespace=(ignore_whitespace)
|
270
|
+
@config['ignore_whitespace'] = ignore_whitespace
|
271
|
+
end
|
272
|
+
|
273
|
+
# Get export option
|
274
|
+
def export
|
275
|
+
@config['export']
|
276
|
+
end
|
277
|
+
|
278
|
+
# Set export option
|
279
|
+
def export=(export)
|
280
|
+
@config['export'] = export
|
281
|
+
end
|
282
|
+
|
283
|
+
##
|
284
|
+
# Writes an array of paths to a file.
|
285
|
+
#
|
286
|
+
# @param [Array] paths
|
287
|
+
# An array of paths.
|
288
|
+
# @param [String] file
|
289
|
+
# Optional path to a file.
|
290
|
+
def paths_file_write(paths, file = nil)
|
291
|
+
unless paths.is_a?(Array) && paths.length.positive?
|
292
|
+
raise SiteDiffException, 'Write failed. Invalid paths.'
|
293
|
+
end
|
294
|
+
|
295
|
+
file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)
|
296
|
+
File.open(file, 'w+') { |f| f.puts(paths) }
|
297
|
+
end
|
298
|
+
|
299
|
+
##
|
300
|
+
# Reads a collection of paths from a file.
|
301
|
+
#
|
302
|
+
# @param [String] file
|
303
|
+
# A file containing one path per line.
|
304
|
+
#
|
305
|
+
# @return [Integer]
|
306
|
+
# Number of paths read.
|
307
|
+
def paths_file_read(file = nil)
|
308
|
+
file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)
|
309
|
+
|
310
|
+
unless File.exist? file
|
311
|
+
raise Config::InvalidConfig, "File not found: #{file}"
|
312
|
+
end
|
313
|
+
|
314
|
+
self.paths = File.readlines(file)
|
315
|
+
|
316
|
+
# Return the number of paths.
|
317
|
+
paths.length
|
318
|
+
end
|
319
|
+
|
320
|
+
##
|
321
|
+
# Get roots.
|
322
|
+
#
|
323
|
+
# Example: If the config has a "before" and "after" sections, then roots
|
324
|
+
# will be ["before", "after"].
|
325
|
+
def roots
|
326
|
+
@roots = { 'after' => after_url }
|
327
|
+
@roots['before'] = before_url if before
|
328
|
+
@roots
|
329
|
+
end
|
330
|
+
|
331
|
+
##
|
332
|
+
# Gets a setting.
|
333
|
+
#
|
334
|
+
# @param [String] key
|
335
|
+
# A key.
|
336
|
+
#
|
337
|
+
# @return [*]
|
338
|
+
# A value, if exists.
|
339
|
+
def setting(key)
|
340
|
+
key = key.to_s if key.is_a?(Symbol)
|
341
|
+
return @config['settings'][key] if @config['settings'].key?(key)
|
342
|
+
end
|
343
|
+
|
344
|
+
##
|
345
|
+
# Gets all settings.
|
346
|
+
#
|
347
|
+
# TODO: Make sure the settings are not writable.
|
348
|
+
#
|
349
|
+
# @return [Hash]
|
350
|
+
# All settings.
|
351
|
+
def settings
|
352
|
+
@config['settings']
|
99
353
|
end
|
100
354
|
|
101
355
|
# Checks if the configuration is usable for diff-ing.
|
102
|
-
|
103
|
-
|
356
|
+
# TODO: Do we actually need the opts argument?
|
357
|
+
def validate(opts = {})
|
358
|
+
opts = { need_before: true }.merge(opts)
|
359
|
+
|
360
|
+
if opts[:need_before] && !before['url']
|
361
|
+
raise InvalidConfig, "Undefined 'before' base URL."
|
362
|
+
end
|
363
|
+
|
104
364
|
raise InvalidConfig, "Undefined 'after' base URL." unless after['url']
|
105
|
-
|
365
|
+
|
366
|
+
# Validate interval and concurrency.
|
367
|
+
interval = setting(:interval)
|
368
|
+
concurrency = setting(:concurrency)
|
369
|
+
if interval.to_i != 0 && concurrency != 1
|
370
|
+
raise InvalidConfig, 'Concurrency must be 1 when an interval is set.'
|
371
|
+
end
|
372
|
+
|
373
|
+
# Validate preset.
|
374
|
+
Preset.exist? setting(:preset), true if setting(:preset)
|
375
|
+
end
|
376
|
+
|
377
|
+
##
|
378
|
+
# Returns object clone with stringified keys.
|
379
|
+
# TODO: Make this method available globally, if required.
|
380
|
+
def self.stringify_keys(object)
|
381
|
+
# Do nothing if it is not an object.
|
382
|
+
return object unless object.respond_to?('each_key')
|
383
|
+
|
384
|
+
# Convert symbol indices to strings.
|
385
|
+
output = {}
|
386
|
+
object.each_key do |old_k|
|
387
|
+
new_k = old_k.is_a?(Symbol) ? old_k.to_s : old_k
|
388
|
+
output[new_k] = stringify_keys object[old_k]
|
389
|
+
end
|
390
|
+
|
391
|
+
# Return the new hash with string indices.
|
392
|
+
output
|
393
|
+
end
|
394
|
+
|
395
|
+
##
|
396
|
+
# Creates a RegExp from a string.
|
397
|
+
def self.create_regexp(string_param)
|
398
|
+
begin
|
399
|
+
@return_value = string_param == '' ? nil : Regexp.new(string_param)
|
400
|
+
rescue SiteDiffException => e
|
401
|
+
@return_value = nil
|
402
|
+
SiteDiff.log 'Invalid RegExp: ' + string_param, :error
|
403
|
+
SiteDiff.log e.message, :error
|
404
|
+
# TODO: Use SiteDiff.log type :debug
|
405
|
+
# SiteDiff.log e.backtrace, :error if options[:verbose]
|
406
|
+
end
|
407
|
+
@return_value
|
106
408
|
end
|
107
409
|
|
108
410
|
private
|
109
411
|
|
412
|
+
##
|
413
|
+
# Returns one of the "before" or "after" sections.
|
414
|
+
#
|
415
|
+
# @param [String|Symbol]
|
416
|
+
# Section name. Example: before, after.
|
417
|
+
# @param [Boolean] with_preset
|
418
|
+
# Whether to merge with preset config (if any).
|
419
|
+
#
|
420
|
+
# @return [Hash|Nil]
|
421
|
+
# Section data or Nil.
|
422
|
+
def section(name, with_preset = false)
|
423
|
+
name = name.to_s if name.is_a? Symbol
|
424
|
+
|
425
|
+
# Validate section.
|
426
|
+
unless %w[before after].include? name
|
427
|
+
raise SiteDiffException, '"name" must be one of "before" or "after".'
|
428
|
+
end
|
429
|
+
|
430
|
+
# Return nil if section is not defined.
|
431
|
+
return nil unless @config[name]
|
432
|
+
|
433
|
+
result = @config[name]
|
434
|
+
|
435
|
+
# Merge preset rules, if required.
|
436
|
+
preset = setting(:preset)
|
437
|
+
if with_preset && !preset.nil?
|
438
|
+
preset_config = Preset.read preset
|
439
|
+
|
440
|
+
# Merge plugins with array values.
|
441
|
+
# TODO: This won't be required after plugin declarations are improved.
|
442
|
+
# See https://rm.ewdev.ca/issues/18301
|
443
|
+
Sanitizer::TOOLS[:array].each do |key|
|
444
|
+
if preset_config[key]
|
445
|
+
result[key] = (result[key] || []) + preset_config[key]
|
446
|
+
end
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
result
|
451
|
+
end
|
452
|
+
|
110
453
|
def self.normalize_paths(paths)
|
111
454
|
paths ||= []
|
112
|
-
|
455
|
+
paths.map { |p| (p[0] == '/' ? p : "/#{p}").chomp }
|
113
456
|
end
|
114
457
|
|
115
458
|
# reads a YAML file and raises an InvalidConfig if the file is not valid.
|
116
459
|
def self.load_raw_yaml(file)
|
117
|
-
|
460
|
+
# TODO: Only show this in verbose mode.
|
461
|
+
SiteDiff.log "Reading config file: #{Pathname.new(file).expand_path}"
|
118
462
|
conf = YAML.load_file(file) || {}
|
463
|
+
|
119
464
|
unless conf.is_a? Hash
|
120
465
|
raise InvalidConfig, "Invalid configuration file: '#{file}'"
|
121
466
|
end
|
122
|
-
|
123
|
-
|
467
|
+
|
468
|
+
conf.each_key do |k, _v|
|
469
|
+
unless ALLOWED_CONFIG_KEYS.include? k
|
124
470
|
raise InvalidConfig, "Unknown configuration key (#{file}): '#{k}'"
|
125
471
|
end
|
126
472
|
end
|
473
|
+
|
127
474
|
conf
|
128
475
|
end
|
129
476
|
|
130
477
|
# loads a single YAML configuration file, merges all its 'included' files
|
131
478
|
# and returns a normalized Hash.
|
132
|
-
def self.load_conf(file, visited=[])
|
479
|
+
def self.load_conf(file, visited = [])
|
133
480
|
# don't get fooled by a/../a/ or symlinks
|
134
481
|
file = File.realpath(file)
|
135
482
|
if visited.include? file
|
@@ -141,14 +488,13 @@ class SiteDiff
|
|
141
488
|
|
142
489
|
# normalize and merge includes
|
143
490
|
includes = conf['includes'] || []
|
144
|
-
conf = Config
|
491
|
+
conf = Config.normalize(conf)
|
145
492
|
includes.each do |dep|
|
146
493
|
# include paths are relative to the including file.
|
147
494
|
dep = File.join(File.dirname(file), dep)
|
148
|
-
conf = Config
|
495
|
+
conf = Config.merge(conf, load_conf(dep, visited))
|
149
496
|
end
|
150
497
|
conf
|
151
498
|
end
|
152
|
-
|
153
499
|
end
|
154
500
|
end
|