sitediff 0.0.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sitediff/config/preset'
4
+ require 'sitediff/exception'
5
+ require 'sitediff/sanitize'
6
+ require 'pathname'
1
7
  require 'yaml'
2
8
 
3
9
  class SiteDiff
10
+ # SiteDiff Configuration.
4
11
  class Config
12
+ # Default config file.
13
+ DEFAULT_FILENAME = 'sitediff.yaml'
14
+
15
+ # Default paths file.
16
+ DEFAULT_PATHS_FILENAME = 'paths.txt'
17
+
18
+ # Default SiteDiff config.
19
+ DEFAULT_CONFIG = {
20
+ 'settings' => {
21
+ 'depth' => 3,
22
+ 'interval' => 0,
23
+ 'whitelist' => '',
24
+ 'blacklist' => '',
25
+ 'concurrency' => 3,
26
+ 'preset' => nil
27
+ },
28
+ 'before' => {},
29
+ 'after' => {},
30
+ 'paths' => []
31
+ }.freeze
32
+
33
+ # Keys allowed in config files.
34
+ # TODO: Deprecate repeated params before_url and after_url.
35
+ # TODO: Create a method self.supports
36
+ # TODO: Deprecate in favor of self.supports key, subkey, subkey...
37
+ ALLOWED_CONFIG_KEYS = Sanitizer::TOOLS.values.flatten(1) + %w[
38
+ includes
39
+ settings
40
+ before
41
+ after
42
+ before_url
43
+ after_url
44
+ ignore_whitespace
45
+ export
46
+ ]
5
47
 
6
- # keys allowed in configuration files
7
- CONF_KEYS = Sanitize::TOOLS.values.flatten(1) +
8
- %w[paths before after before_url after_url includes]
48
+ ##
49
+ # Keys allowed in the "settings" key.
50
+ # TODO: Create a method self.supports
51
+ # TODO: Deprecate in favor of self.supports key, subkey, subkey...
52
+ ALLOWED_SETTINGS_KEYS = %w[
53
+ preset
54
+ depth
55
+ whitelist
56
+ blacklist
57
+ concurrency
58
+ interval
59
+ curl_opts
60
+ ].freeze
9
61
 
10
- class InvalidConfig < Exception; end
62
+ class InvalidConfig < SiteDiffException; end
63
+ class ConfigNotFound < SiteDiffException; end
11
64
 
12
65
  # Takes a Hash and normalizes it to the following form by merging globals
13
66
  # into before and after. A normalized config Hash looks like this:
@@ -27,22 +80,24 @@ class SiteDiff
27
80
  # selector: body
28
81
  #
29
82
  def self.normalize(conf)
30
- tools = Sanitize::TOOLS
83
+ tools = Sanitizer::TOOLS
31
84
 
32
- # merge globals
85
+ # Merge globals
33
86
  %w[before after].each do |pos|
34
87
  conf[pos] ||= {}
35
88
  tools[:array].each do |key|
36
89
  conf[pos][key] ||= []
37
90
  conf[pos][key] += conf[key] if conf[key]
38
91
  end
39
- tools[:scalar].each {|key| conf[pos][key] ||= conf[key]}
92
+ tools[:scalar].each { |key| conf[pos][key] ||= conf[key] }
40
93
  conf[pos]['url'] ||= conf[pos + '_url']
94
+ conf[pos]['curl_opts'] = conf['curl_opts']
41
95
  end
42
- # normalize paths
43
- conf['paths'] = Config::normalize_paths(conf['paths'])
44
96
 
45
- conf.select {|k,v| %w[before after paths].include? k}
97
+ # Normalize paths.
98
+ conf['paths'] = Config.normalize_paths(conf['paths'])
99
+
100
+ conf.select { |k, _v| ALLOWED_CONFIG_KEYS.include? k }
46
101
  end
47
102
 
48
103
  # Merges two normalized Hashes according to the following rules:
@@ -59,77 +114,369 @@ class SiteDiff
59
114
  # (h2) before: {selector: bar, sanitization: [pattern: bar]}
60
115
  # (h3) before: {selector: foo, sanitization: [pattern: foo, pattern: bar]}
61
116
  def self.merge(first, second)
62
- result = { 'paths' => {}, 'before' => {}, 'after' => {} }
63
- result['paths'] = (first['paths'] || []) + (second['paths'] || []) # rule 1
117
+ result = {
118
+ 'before' => {},
119
+ 'after' => {},
120
+ 'settings' => {}
121
+ }
122
+
123
+ # Merge sanitization rules.
124
+ Sanitizer::TOOLS.values.flatten(1).each do |key|
125
+ result[key] = second[key] || first[key]
126
+ result.delete(key) unless result[key]
127
+ end
128
+
129
+ # Rule 1.
64
130
  %w[before after].each do |pos|
131
+ first[pos] ||= {}
132
+ second[pos] ||= {}
133
+
134
+ # If only the second hash has the value.
65
135
  unless first[pos]
66
136
  result[pos] = second[pos] || {}
67
137
  next
68
138
  end
139
+
69
140
  result[pos] = first[pos].merge!(second[pos]) do |key, a, b|
70
- if Sanitize::TOOLS[:array].include? key # rule 2a
71
- result[pos][key] = (a || []) + (b|| [])
72
- else
73
- result[pos][key] = a || b # rule 2b
74
- end
141
+ # Rule 2a.
142
+ result[pos][key] = if Sanitizer::TOOLS[:array].include? key
143
+ (a || []) + (b || [])
144
+ elsif key == 'settings'
145
+ b
146
+ else
147
+ a || b # Rule 2b.
148
+ end
149
+ end
150
+ end
151
+
152
+ # Merge settings.
153
+ result['settings'] = merge_deep(
154
+ first['settings'] || {},
155
+ second['settings'] || {}
156
+ )
157
+
158
+ result
159
+ end
160
+
161
+ ##
162
+ # Merges 2 iterable objects deeply.
163
+ def self.merge_deep(first, second)
164
+ first.merge(second) do |_key, val1, val2|
165
+ if val1.is_a? Hash
166
+ self.class.merge_deep(val1, val2 || {})
167
+ elsif val1.is_a? Array
168
+ val1 + (val2 || [])
169
+ else
170
+ val2
75
171
  end
76
172
  end
173
+ end
174
+
175
+ ##
176
+ # Gets all loaded configuration except defaults.
177
+ #
178
+ # @return [Hash]
179
+ # Config data.
180
+ def all
181
+ result = Marshal.load(Marshal.dump(@config))
182
+ self.class.remove_defaults(result)
183
+ end
184
+
185
+ ##
186
+ # Removes default parameters from a config hash.
187
+ #
188
+ # I know this is weird, but it'll be fixed. The config management needs to
189
+ # be streamlined further.
190
+ def self.remove_defaults(data)
191
+ # Create a deep copy of the config data.
192
+ result = data
193
+
194
+ # Exclude default settings.
195
+ result['settings'].delete_if do |key, value|
196
+ value == DEFAULT_CONFIG['settings'][key] || !value
197
+ end
198
+
199
+ # Exclude default curl opts.
200
+ result['settings']['curl_opts'] ||= {}
201
+ result['settings']['curl_opts'].delete_if do |key, value|
202
+ value == UriWrapper::DEFAULT_CURL_OPTS[key.to_sym]
203
+ end
204
+
205
+ # Delete curl opts if empty.
206
+ unless result['settings']['curl_opts'].length.positive?
207
+ result['settings'].delete('curl_opts')
208
+ end
209
+
77
210
  result
78
211
  end
79
212
 
80
- def initialize(files)
81
- @config = {'paths' => [], 'before' => {}, 'after' => {} }
82
- files.each do |file|
83
- @config = Config::merge(@config, Config::load_conf(file))
213
+ # Creates a SiteDiff Config object.
214
+ def initialize(file, directory)
215
+ # Fallback to default config filename, if none is specified.
216
+ file = File.join(directory, DEFAULT_FILENAME) if file.nil?
217
+ unless File.exist?(file)
218
+ path = File.expand_path(file)
219
+ raise InvalidConfig, "Missing config file #{path}."
84
220
  end
221
+ @config = Config.merge(DEFAULT_CONFIG, Config.load_conf(file))
222
+ @file = file
223
+ @directory = directory
224
+
225
+ # Validate configurations.
226
+ validate
85
227
  end
86
228
 
87
- def before
88
- @config['before']
229
+ # Get "before" site configuration.
230
+ def before(apply_preset = false)
231
+ section :before, apply_preset
89
232
  end
90
- def after
91
- @config['after']
233
+
234
+ # Get "before" site URL.
235
+ def before_url
236
+ result = before
237
+ result['url'] if result
92
238
  end
93
239
 
240
+ # Get "after" site configuration.
241
+ def after(apply_preset = false)
242
+ section :after, apply_preset
243
+ end
244
+
245
+ # Get "after" site URL.
246
+ def after_url
247
+ result = after
248
+ result['url'] if result
249
+ end
250
+
251
+ # Get paths.
94
252
  def paths
95
253
  @config['paths']
96
254
  end
255
+
256
+ # Set paths.
97
257
  def paths=(paths)
98
- @config['paths'] = Config::normalize_paths(paths)
258
+ raise 'Paths must be an Array' unless paths.is_a? Array
259
+
260
+ @config['paths'] = Config.normalize_paths(paths)
261
+ end
262
+
263
+ # Get ignore_whitespace option
264
+ def ignore_whitespace
265
+ @config['ignore_whitespace']
266
+ end
267
+
268
+ # Set ignore_whitespace option
269
+ def ignore_whitespace=(ignore_whitespace)
270
+ @config['ignore_whitespace'] = ignore_whitespace
271
+ end
272
+
273
+ # Get export option
274
+ def export
275
+ @config['export']
276
+ end
277
+
278
+ # Set export option
279
+ def export=(export)
280
+ @config['export'] = export
281
+ end
282
+
283
+ ##
284
+ # Writes an array of paths to a file.
285
+ #
286
+ # @param [Array] paths
287
+ # An array of paths.
288
+ # @param [String] file
289
+ # Optional path to a file.
290
+ def paths_file_write(paths, file = nil)
291
+ unless paths.is_a?(Array) && paths.length.positive?
292
+ raise SiteDiffException, 'Write failed. Invalid paths.'
293
+ end
294
+
295
+ file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)
296
+ File.open(file, 'w+') { |f| f.puts(paths) }
297
+ end
298
+
299
+ ##
300
+ # Reads a collection of paths from a file.
301
+ #
302
+ # @param [String] file
303
+ # A file containing one path per line.
304
+ #
305
+ # @return [Integer]
306
+ # Number of paths read.
307
+ def paths_file_read(file = nil)
308
+ file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)
309
+
310
+ unless File.exist? file
311
+ raise Config::InvalidConfig, "File not found: #{file}"
312
+ end
313
+
314
+ self.paths = File.readlines(file)
315
+
316
+ # Return the number of paths.
317
+ paths.length
318
+ end
319
+
320
+ ##
321
+ # Get roots.
322
+ #
323
+ # Example: If the config has a "before" and "after" sections, then roots
324
+ # will be ["before", "after"].
325
+ def roots
326
+ @roots = { 'after' => after_url }
327
+ @roots['before'] = before_url if before
328
+ @roots
329
+ end
330
+
331
+ ##
332
+ # Gets a setting.
333
+ #
334
+ # @param [String] key
335
+ # A key.
336
+ #
337
+ # @return [*]
338
+ # A value, if exists.
339
+ def setting(key)
340
+ key = key.to_s if key.is_a?(Symbol)
341
+ return @config['settings'][key] if @config['settings'].key?(key)
342
+ end
343
+
344
+ ##
345
+ # Gets all settings.
346
+ #
347
+ # TODO: Make sure the settings are not writable.
348
+ #
349
+ # @return [Hash]
350
+ # All settings.
351
+ def settings
352
+ @config['settings']
99
353
  end
100
354
 
101
355
  # Checks if the configuration is usable for diff-ing.
102
- def validate
103
- raise InvalidConfig, "Undefined 'before' base URL." unless before['url']
356
+ # TODO: Do we actually need the opts argument?
357
+ def validate(opts = {})
358
+ opts = { need_before: true }.merge(opts)
359
+
360
+ if opts[:need_before] && !before['url']
361
+ raise InvalidConfig, "Undefined 'before' base URL."
362
+ end
363
+
104
364
  raise InvalidConfig, "Undefined 'after' base URL." unless after['url']
105
- raise InvalidConfig, "Undefined 'paths'." unless (paths and !paths.empty?)
365
+
366
+ # Validate interval and concurrency.
367
+ interval = setting(:interval)
368
+ concurrency = setting(:concurrency)
369
+ if interval.to_i != 0 && concurrency != 1
370
+ raise InvalidConfig, 'Concurrency must be 1 when an interval is set.'
371
+ end
372
+
373
+ # Validate preset.
374
+ Preset.exist? setting(:preset), true if setting(:preset)
375
+ end
376
+
377
+ ##
378
+ # Returns object clone with stringified keys.
379
+ # TODO: Make this method available globally, if required.
380
+ def self.stringify_keys(object)
381
+ # Do nothing if it is not an object.
382
+ return object unless object.respond_to?('each_key')
383
+
384
+ # Convert symbol indices to strings.
385
+ output = {}
386
+ object.each_key do |old_k|
387
+ new_k = old_k.is_a?(Symbol) ? old_k.to_s : old_k
388
+ output[new_k] = stringify_keys object[old_k]
389
+ end
390
+
391
+ # Return the new hash with string indices.
392
+ output
393
+ end
394
+
395
+ ##
396
+ # Creates a RegExp from a string.
397
+ def self.create_regexp(string_param)
398
+ begin
399
+ @return_value = string_param == '' ? nil : Regexp.new(string_param)
400
+ rescue SiteDiffException => e
401
+ @return_value = nil
402
+ SiteDiff.log 'Invalid RegExp: ' + string_param, :error
403
+ SiteDiff.log e.message, :error
404
+ # TODO: Use SiteDiff.log type :debug
405
+ # SiteDiff.log e.backtrace, :error if options[:verbose]
406
+ end
407
+ @return_value
106
408
  end
107
409
 
108
410
  private
109
411
 
412
+ ##
413
+ # Returns one of the "before" or "after" sections.
414
+ #
415
+ # @param [String|Symbol]
416
+ # Section name. Example: before, after.
417
+ # @param [Boolean] with_preset
418
+ # Whether to merge with preset config (if any).
419
+ #
420
+ # @return [Hash|Nil]
421
+ # Section data or Nil.
422
+ def section(name, with_preset = false)
423
+ name = name.to_s if name.is_a? Symbol
424
+
425
+ # Validate section.
426
+ unless %w[before after].include? name
427
+ raise SiteDiffException, '"name" must be one of "before" or "after".'
428
+ end
429
+
430
+ # Return nil if section is not defined.
431
+ return nil unless @config[name]
432
+
433
+ result = @config[name]
434
+
435
+ # Merge preset rules, if required.
436
+ preset = setting(:preset)
437
+ if with_preset && !preset.nil?
438
+ preset_config = Preset.read preset
439
+
440
+ # Merge plugins with array values.
441
+ # TODO: This won't be required after plugin declarations are improved.
442
+ # See https://rm.ewdev.ca/issues/18301
443
+ Sanitizer::TOOLS[:array].each do |key|
444
+ if preset_config[key]
445
+ result[key] = (result[key] || []) + preset_config[key]
446
+ end
447
+ end
448
+ end
449
+
450
+ result
451
+ end
452
+
110
453
  def self.normalize_paths(paths)
111
454
  paths ||= []
112
- return paths.map { |p| (p[0] == '/' ? p : "/#{p}").chomp }
455
+ paths.map { |p| (p[0] == '/' ? p : "/#{p}").chomp }
113
456
  end
114
457
 
115
458
  # reads a YAML file and raises an InvalidConfig if the file is not valid.
116
459
  def self.load_raw_yaml(file)
117
- SiteDiff::log "Reading config file: #{file}"
460
+ # TODO: Only show this in verbose mode.
461
+ SiteDiff.log "Reading config file: #{Pathname.new(file).expand_path}"
118
462
  conf = YAML.load_file(file) || {}
463
+
119
464
  unless conf.is_a? Hash
120
465
  raise InvalidConfig, "Invalid configuration file: '#{file}'"
121
466
  end
122
- conf.each do |k,v|
123
- unless CONF_KEYS.include? k
467
+
468
+ conf.each_key do |k, _v|
469
+ unless ALLOWED_CONFIG_KEYS.include? k
124
470
  raise InvalidConfig, "Unknown configuration key (#{file}): '#{k}'"
125
471
  end
126
472
  end
473
+
127
474
  conf
128
475
  end
129
476
 
130
477
  # loads a single YAML configuration file, merges all its 'included' files
131
478
  # and returns a normalized Hash.
132
- def self.load_conf(file, visited=[])
479
+ def self.load_conf(file, visited = [])
133
480
  # don't get fooled by a/../a/ or symlinks
134
481
  file = File.realpath(file)
135
482
  if visited.include? file
@@ -141,14 +488,13 @@ class SiteDiff
141
488
 
142
489
  # normalize and merge includes
143
490
  includes = conf['includes'] || []
144
- conf = Config::normalize(conf)
491
+ conf = Config.normalize(conf)
145
492
  includes.each do |dep|
146
493
  # include paths are relative to the including file.
147
494
  dep = File.join(File.dirname(file), dep)
148
- conf = Config::merge(conf, load_conf(dep, visited))
495
+ conf = Config.merge(conf, load_conf(dep, visited))
149
496
  end
150
497
  conf
151
498
  end
152
-
153
499
  end
154
500
  end