sitediff 0.0.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,13 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sitediff/config/preset'
4
+ require 'sitediff/exception'
5
+ require 'sitediff/sanitize'
6
+ require 'pathname'
1
7
  require 'yaml'
2
8
 
3
9
  class SiteDiff
10
+ # SiteDiff Configuration.
4
11
  class Config
12
+ # Default config file.
13
+ DEFAULT_FILENAME = 'sitediff.yaml'
14
+
15
+ # Default paths file.
16
+ DEFAULT_PATHS_FILENAME = 'paths.txt'
17
+
18
+ # Default SiteDiff config.
19
+ DEFAULT_CONFIG = {
20
+ 'settings' => {
21
+ 'depth' => 3,
22
+ 'interval' => 0,
23
+ 'whitelist' => '',
24
+ 'blacklist' => '',
25
+ 'concurrency' => 3,
26
+ 'preset' => nil
27
+ },
28
+ 'before' => {},
29
+ 'after' => {},
30
+ 'paths' => []
31
+ }.freeze
32
+
33
+ # Keys allowed in config files.
34
+ # TODO: Deprecate repeated params before_url and after_url.
35
+ # TODO: Create a method self.supports
36
+ # TODO: Deprecate in favor of self.supports key, subkey, subkey...
37
+ ALLOWED_CONFIG_KEYS = Sanitizer::TOOLS.values.flatten(1) + %w[
38
+ includes
39
+ settings
40
+ before
41
+ after
42
+ before_url
43
+ after_url
44
+ ignore_whitespace
45
+ export
46
+ ]
5
47
 
6
- # keys allowed in configuration files
7
- CONF_KEYS = Sanitize::TOOLS.values.flatten(1) +
8
- %w[paths before after before_url after_url includes]
48
+ ##
49
+ # Keys allowed in the "settings" key.
50
+ # TODO: Create a method self.supports
51
+ # TODO: Deprecate in favor of self.supports key, subkey, subkey...
52
+ ALLOWED_SETTINGS_KEYS = %w[
53
+ preset
54
+ depth
55
+ whitelist
56
+ blacklist
57
+ concurrency
58
+ interval
59
+ curl_opts
60
+ ].freeze
9
61
 
10
- class InvalidConfig < Exception; end
62
+ class InvalidConfig < SiteDiffException; end
63
+ class ConfigNotFound < SiteDiffException; end
11
64
 
12
65
  # Takes a Hash and normalizes it to the following form by merging globals
13
66
  # into before and after. A normalized config Hash looks like this:
@@ -27,22 +80,24 @@ class SiteDiff
27
80
  # selector: body
28
81
  #
29
82
  def self.normalize(conf)
30
- tools = Sanitize::TOOLS
83
+ tools = Sanitizer::TOOLS
31
84
 
32
- # merge globals
85
+ # Merge globals
33
86
  %w[before after].each do |pos|
34
87
  conf[pos] ||= {}
35
88
  tools[:array].each do |key|
36
89
  conf[pos][key] ||= []
37
90
  conf[pos][key] += conf[key] if conf[key]
38
91
  end
39
- tools[:scalar].each {|key| conf[pos][key] ||= conf[key]}
92
+ tools[:scalar].each { |key| conf[pos][key] ||= conf[key] }
40
93
  conf[pos]['url'] ||= conf[pos + '_url']
94
+ conf[pos]['curl_opts'] = conf['curl_opts']
41
95
  end
42
- # normalize paths
43
- conf['paths'] = Config::normalize_paths(conf['paths'])
44
96
 
45
- conf.select {|k,v| %w[before after paths].include? k}
97
+ # Normalize paths.
98
+ conf['paths'] = Config.normalize_paths(conf['paths'])
99
+
100
+ conf.select { |k, _v| ALLOWED_CONFIG_KEYS.include? k }
46
101
  end
47
102
 
48
103
  # Merges two normalized Hashes according to the following rules:
@@ -59,77 +114,369 @@ class SiteDiff
59
114
  # (h2) before: {selector: bar, sanitization: [pattern: bar]}
60
115
  # (h3) before: {selector: foo, sanitization: [pattern: foo, pattern: bar]}
61
116
  def self.merge(first, second)
62
- result = { 'paths' => {}, 'before' => {}, 'after' => {} }
63
- result['paths'] = (first['paths'] || []) + (second['paths'] || []) # rule 1
117
+ result = {
118
+ 'before' => {},
119
+ 'after' => {},
120
+ 'settings' => {}
121
+ }
122
+
123
+ # Merge sanitization rules.
124
+ Sanitizer::TOOLS.values.flatten(1).each do |key|
125
+ result[key] = second[key] || first[key]
126
+ result.delete(key) unless result[key]
127
+ end
128
+
129
+ # Rule 1.
64
130
  %w[before after].each do |pos|
131
+ first[pos] ||= {}
132
+ second[pos] ||= {}
133
+
134
+ # If only the second hash has the value.
65
135
  unless first[pos]
66
136
  result[pos] = second[pos] || {}
67
137
  next
68
138
  end
139
+
69
140
  result[pos] = first[pos].merge!(second[pos]) do |key, a, b|
70
- if Sanitize::TOOLS[:array].include? key # rule 2a
71
- result[pos][key] = (a || []) + (b|| [])
72
- else
73
- result[pos][key] = a || b # rule 2b
74
- end
141
+ # Rule 2a.
142
+ result[pos][key] = if Sanitizer::TOOLS[:array].include? key
143
+ (a || []) + (b || [])
144
+ elsif key == 'settings'
145
+ b
146
+ else
147
+ a || b # Rule 2b.
148
+ end
149
+ end
150
+ end
151
+
152
+ # Merge settings.
153
+ result['settings'] = merge_deep(
154
+ first['settings'] || {},
155
+ second['settings'] || {}
156
+ )
157
+
158
+ result
159
+ end
160
+
161
+ ##
162
+ # Merges 2 iterable objects deeply.
163
+ def self.merge_deep(first, second)
164
+ first.merge(second) do |_key, val1, val2|
165
+ if val1.is_a? Hash
166
+ self.class.merge_deep(val1, val2 || {})
167
+ elsif val1.is_a? Array
168
+ val1 + (val2 || [])
169
+ else
170
+ val2
75
171
  end
76
172
  end
173
+ end
174
+
175
+ ##
176
+ # Gets all loaded configuration except defaults.
177
+ #
178
+ # @return [Hash]
179
+ # Config data.
180
+ def all
181
+ result = Marshal.load(Marshal.dump(@config))
182
+ self.class.remove_defaults(result)
183
+ end
184
+
185
+ ##
186
+ # Removes default parameters from a config hash.
187
+ #
188
+ # I know this is weird, but it'll be fixed. The config management needs to
189
+ # be streamlined further.
190
+ def self.remove_defaults(data)
191
+ # Create a deep copy of the config data.
192
+ result = data
193
+
194
+ # Exclude default settings.
195
+ result['settings'].delete_if do |key, value|
196
+ value == DEFAULT_CONFIG['settings'][key] || !value
197
+ end
198
+
199
+ # Exclude default curl opts.
200
+ result['settings']['curl_opts'] ||= {}
201
+ result['settings']['curl_opts'].delete_if do |key, value|
202
+ value == UriWrapper::DEFAULT_CURL_OPTS[key.to_sym]
203
+ end
204
+
205
+ # Delete curl opts if empty.
206
+ unless result['settings']['curl_opts'].length.positive?
207
+ result['settings'].delete('curl_opts')
208
+ end
209
+
77
210
  result
78
211
  end
79
212
 
80
- def initialize(files)
81
- @config = {'paths' => [], 'before' => {}, 'after' => {} }
82
- files.each do |file|
83
- @config = Config::merge(@config, Config::load_conf(file))
213
+ # Creates a SiteDiff Config object.
214
+ def initialize(file, directory)
215
+ # Fallback to default config filename, if none is specified.
216
+ file = File.join(directory, DEFAULT_FILENAME) if file.nil?
217
+ unless File.exist?(file)
218
+ path = File.expand_path(file)
219
+ raise InvalidConfig, "Missing config file #{path}."
84
220
  end
221
+ @config = Config.merge(DEFAULT_CONFIG, Config.load_conf(file))
222
+ @file = file
223
+ @directory = directory
224
+
225
+ # Validate configurations.
226
+ validate
85
227
  end
86
228
 
87
- def before
88
- @config['before']
229
+ # Get "before" site configuration.
230
+ def before(apply_preset = false)
231
+ section :before, apply_preset
89
232
  end
90
- def after
91
- @config['after']
233
+
234
+ # Get "before" site URL.
235
+ def before_url
236
+ result = before
237
+ result['url'] if result
92
238
  end
93
239
 
240
+ # Get "after" site configuration.
241
+ def after(apply_preset = false)
242
+ section :after, apply_preset
243
+ end
244
+
245
+ # Get "after" site URL.
246
+ def after_url
247
+ result = after
248
+ result['url'] if result
249
+ end
250
+
251
+ # Get paths.
94
252
  def paths
95
253
  @config['paths']
96
254
  end
255
+
256
+ # Set paths.
97
257
  def paths=(paths)
98
- @config['paths'] = Config::normalize_paths(paths)
258
+ raise 'Paths must be an Array' unless paths.is_a? Array
259
+
260
+ @config['paths'] = Config.normalize_paths(paths)
261
+ end
262
+
263
+ # Get ignore_whitespace option
264
+ def ignore_whitespace
265
+ @config['ignore_whitespace']
266
+ end
267
+
268
+ # Set ignore_whitespace option
269
+ def ignore_whitespace=(ignore_whitespace)
270
+ @config['ignore_whitespace'] = ignore_whitespace
271
+ end
272
+
273
+ # Get export option
274
+ def export
275
+ @config['export']
276
+ end
277
+
278
+ # Set export option
279
+ def export=(export)
280
+ @config['export'] = export
281
+ end
282
+
283
+ ##
284
+ # Writes an array of paths to a file.
285
+ #
286
+ # @param [Array] paths
287
+ # An array of paths.
288
+ # @param [String] file
289
+ # Optional path to a file.
290
+ def paths_file_write(paths, file = nil)
291
+ unless paths.is_a?(Array) && paths.length.positive?
292
+ raise SiteDiffException, 'Write failed. Invalid paths.'
293
+ end
294
+
295
+ file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)
296
+ File.open(file, 'w+') { |f| f.puts(paths) }
297
+ end
298
+
299
+ ##
300
+ # Reads a collection of paths from a file.
301
+ #
302
+ # @param [String] file
303
+ # A file containing one path per line.
304
+ #
305
+ # @return [Integer]
306
+ # Number of paths read.
307
+ def paths_file_read(file = nil)
308
+ file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)
309
+
310
+ unless File.exist? file
311
+ raise Config::InvalidConfig, "File not found: #{file}"
312
+ end
313
+
314
+ self.paths = File.readlines(file)
315
+
316
+ # Return the number of paths.
317
+ paths.length
318
+ end
319
+
320
+ ##
321
+ # Get roots.
322
+ #
323
+ # Example: If the config has a "before" and "after" sections, then roots
324
+ # will be ["before", "after"].
325
+ def roots
326
+ @roots = { 'after' => after_url }
327
+ @roots['before'] = before_url if before
328
+ @roots
329
+ end
330
+
331
+ ##
332
+ # Gets a setting.
333
+ #
334
+ # @param [String] key
335
+ # A key.
336
+ #
337
+ # @return [*]
338
+ # A value, if exists.
339
+ def setting(key)
340
+ key = key.to_s if key.is_a?(Symbol)
341
+ return @config['settings'][key] if @config['settings'].key?(key)
342
+ end
343
+
344
+ ##
345
+ # Gets all settings.
346
+ #
347
+ # TODO: Make sure the settings are not writable.
348
+ #
349
+ # @return [Hash]
350
+ # All settings.
351
+ def settings
352
+ @config['settings']
99
353
  end
100
354
 
101
355
  # Checks if the configuration is usable for diff-ing.
102
- def validate
103
- raise InvalidConfig, "Undefined 'before' base URL." unless before['url']
356
+ # TODO: Do we actually need the opts argument?
357
+ def validate(opts = {})
358
+ opts = { need_before: true }.merge(opts)
359
+
360
+ if opts[:need_before] && !before['url']
361
+ raise InvalidConfig, "Undefined 'before' base URL."
362
+ end
363
+
104
364
  raise InvalidConfig, "Undefined 'after' base URL." unless after['url']
105
- raise InvalidConfig, "Undefined 'paths'." unless (paths and !paths.empty?)
365
+
366
+ # Validate interval and concurrency.
367
+ interval = setting(:interval)
368
+ concurrency = setting(:concurrency)
369
+ if interval.to_i != 0 && concurrency != 1
370
+ raise InvalidConfig, 'Concurrency must be 1 when an interval is set.'
371
+ end
372
+
373
+ # Validate preset.
374
+ Preset.exist? setting(:preset), true if setting(:preset)
375
+ end
376
+
377
+ ##
378
+ # Returns object clone with stringified keys.
379
+ # TODO: Make this method available globally, if required.
380
+ def self.stringify_keys(object)
381
+ # Do nothing if it is not an object.
382
+ return object unless object.respond_to?('each_key')
383
+
384
+ # Convert symbol indices to strings.
385
+ output = {}
386
+ object.each_key do |old_k|
387
+ new_k = old_k.is_a?(Symbol) ? old_k.to_s : old_k
388
+ output[new_k] = stringify_keys object[old_k]
389
+ end
390
+
391
+ # Return the new hash with string indices.
392
+ output
393
+ end
394
+
395
+ ##
396
+ # Creates a RegExp from a string.
397
+ def self.create_regexp(string_param)
398
+ begin
399
+ @return_value = string_param == '' ? nil : Regexp.new(string_param)
400
+ rescue SiteDiffException => e
401
+ @return_value = nil
402
+ SiteDiff.log 'Invalid RegExp: ' + string_param, :error
403
+ SiteDiff.log e.message, :error
404
+ # TODO: Use SiteDiff.log type :debug
405
+ # SiteDiff.log e.backtrace, :error if options[:verbose]
406
+ end
407
+ @return_value
106
408
  end
107
409
 
108
410
  private
109
411
 
412
+ ##
413
+ # Returns one of the "before" or "after" sections.
414
+ #
415
+ # @param [String|Symbol]
416
+ # Section name. Example: before, after.
417
+ # @param [Boolean] with_preset
418
+ # Whether to merge with preset config (if any).
419
+ #
420
+ # @return [Hash|Nil]
421
+ # Section data or Nil.
422
+ def section(name, with_preset = false)
423
+ name = name.to_s if name.is_a? Symbol
424
+
425
+ # Validate section.
426
+ unless %w[before after].include? name
427
+ raise SiteDiffException, '"name" must be one of "before" or "after".'
428
+ end
429
+
430
+ # Return nil if section is not defined.
431
+ return nil unless @config[name]
432
+
433
+ result = @config[name]
434
+
435
+ # Merge preset rules, if required.
436
+ preset = setting(:preset)
437
+ if with_preset && !preset.nil?
438
+ preset_config = Preset.read preset
439
+
440
+ # Merge plugins with array values.
441
+ # TODO: This won't be required after plugin declarations are improved.
442
+ # See https://rm.ewdev.ca/issues/18301
443
+ Sanitizer::TOOLS[:array].each do |key|
444
+ if preset_config[key]
445
+ result[key] = (result[key] || []) + preset_config[key]
446
+ end
447
+ end
448
+ end
449
+
450
+ result
451
+ end
452
+
110
453
  def self.normalize_paths(paths)
111
454
  paths ||= []
112
- return paths.map { |p| (p[0] == '/' ? p : "/#{p}").chomp }
455
+ paths.map { |p| (p[0] == '/' ? p : "/#{p}").chomp }
113
456
  end
114
457
 
115
458
  # reads a YAML file and raises an InvalidConfig if the file is not valid.
116
459
  def self.load_raw_yaml(file)
117
- SiteDiff::log "Reading config file: #{file}"
460
+ # TODO: Only show this in verbose mode.
461
+ SiteDiff.log "Reading config file: #{Pathname.new(file).expand_path}"
118
462
  conf = YAML.load_file(file) || {}
463
+
119
464
  unless conf.is_a? Hash
120
465
  raise InvalidConfig, "Invalid configuration file: '#{file}'"
121
466
  end
122
- conf.each do |k,v|
123
- unless CONF_KEYS.include? k
467
+
468
+ conf.each_key do |k, _v|
469
+ unless ALLOWED_CONFIG_KEYS.include? k
124
470
  raise InvalidConfig, "Unknown configuration key (#{file}): '#{k}'"
125
471
  end
126
472
  end
473
+
127
474
  conf
128
475
  end
129
476
 
130
477
  # loads a single YAML configuration file, merges all its 'included' files
131
478
  # and returns a normalized Hash.
132
- def self.load_conf(file, visited=[])
479
+ def self.load_conf(file, visited = [])
133
480
  # don't get fooled by a/../a/ or symlinks
134
481
  file = File.realpath(file)
135
482
  if visited.include? file
@@ -141,14 +488,13 @@ class SiteDiff
141
488
 
142
489
  # normalize and merge includes
143
490
  includes = conf['includes'] || []
144
- conf = Config::normalize(conf)
491
+ conf = Config.normalize(conf)
145
492
  includes.each do |dep|
146
493
  # include paths are relative to the including file.
147
494
  dep = File.join(File.dirname(file), dep)
148
- conf = Config::merge(conf, load_conf(dep, visited))
495
+ conf = Config.merge(conf, load_conf(dep, visited))
149
496
  end
150
497
  conf
151
498
  end
152
-
153
499
  end
154
500
  end