sitediff 0.0.6 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,63 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'sitediff/config/preset'
3
4
  require 'sitediff/exception'
4
5
  require 'sitediff/sanitize'
5
6
  require 'pathname'
6
7
  require 'yaml'
7
8
 
8
9
  class SiteDiff
10
+ # SiteDiff Configuration.
9
11
  class Config
12
+ # Default config file.
10
13
  DEFAULT_FILENAME = 'sitediff.yaml'
11
14
 
12
- # keys allowed in configuration files
13
- CONF_KEYS = Sanitizer::TOOLS.values.flatten(1) +
14
- %w[paths before after before_url after_url includes curl_opts]
15
+ # Default paths file.
16
+ DEFAULT_PATHS_FILENAME = 'paths.txt'
17
+
18
+ # Default SiteDiff config.
19
+ DEFAULT_CONFIG = {
20
+ 'settings' => {
21
+ 'depth' => 3,
22
+ 'interval' => 0,
23
+ 'whitelist' => '',
24
+ 'blacklist' => '',
25
+ 'concurrency' => 3,
26
+ 'preset' => nil
27
+ },
28
+ 'before' => {},
29
+ 'after' => {},
30
+ 'paths' => []
31
+ }.freeze
32
+
33
+ # Keys allowed in config files.
34
+ # TODO: Deprecate repeated params before_url and after_url.
35
+ # TODO: Create a method self.supports
36
+ # TODO: Deprecate in favor of self.supports key, subkey, subkey...
37
+ ALLOWED_CONFIG_KEYS = Sanitizer::TOOLS.values.flatten(1) + %w[
38
+ includes
39
+ settings
40
+ before
41
+ after
42
+ before_url
43
+ after_url
44
+ ignore_whitespace
45
+ export
46
+ ]
47
+
48
+ ##
49
+ # Keys allowed in the "settings" key.
50
+ # TODO: Create a method self.supports
51
+ # TODO: Deprecate in favor of self.supports key, subkey, subkey...
52
+ ALLOWED_SETTINGS_KEYS = %w[
53
+ preset
54
+ depth
55
+ whitelist
56
+ blacklist
57
+ concurrency
58
+ interval
59
+ curl_opts
60
+ ].freeze
15
61
 
16
62
  class InvalidConfig < SiteDiffException; end
17
63
  class ConfigNotFound < SiteDiffException; end
@@ -36,7 +82,7 @@ class SiteDiff
36
82
  def self.normalize(conf)
37
83
  tools = Sanitizer::TOOLS
38
84
 
39
- # merge globals
85
+ # Merge globals
40
86
  %w[before after].each do |pos|
41
87
  conf[pos] ||= {}
42
88
  tools[:array].each do |key|
@@ -47,10 +93,11 @@ class SiteDiff
47
93
  conf[pos]['url'] ||= conf[pos + '_url']
48
94
  conf[pos]['curl_opts'] = conf['curl_opts']
49
95
  end
50
- # normalize paths
96
+
97
+ # Normalize paths.
51
98
  conf['paths'] = Config.normalize_paths(conf['paths'])
52
99
 
53
- conf.select { |k, _v| %w[before after paths curl_opts].include? k }
100
+ conf.select { |k, _v| ALLOWED_CONFIG_KEYS.include? k }
54
101
  end
55
102
 
56
103
  # Merges two normalized Hashes according to the following rules:
@@ -67,65 +114,342 @@ class SiteDiff
67
114
  # (h2) before: {selector: bar, sanitization: [pattern: bar]}
68
115
  # (h3) before: {selector: foo, sanitization: [pattern: foo, pattern: bar]}
69
116
  def self.merge(first, second)
70
- result = { 'paths' => {}, 'before' => {}, 'after' => {} }
71
- result['paths'] = (first['paths'] || []) + (second['paths'] || []) # rule 1
117
+ result = {
118
+ 'before' => {},
119
+ 'after' => {},
120
+ 'settings' => {}
121
+ }
122
+
123
+ # Merge sanitization rules.
124
+ Sanitizer::TOOLS.values.flatten(1).each do |key|
125
+ result[key] = second[key] || first[key]
126
+ result.delete(key) unless result[key]
127
+ end
128
+
129
+ # Rule 1.
72
130
  %w[before after].each do |pos|
131
+ first[pos] ||= {}
132
+ second[pos] ||= {}
133
+
134
+ # If only the second hash has the value.
73
135
  unless first[pos]
74
136
  result[pos] = second[pos] || {}
75
137
  next
76
138
  end
139
+
77
140
  result[pos] = first[pos].merge!(second[pos]) do |key, a, b|
78
- result[pos][key] = if Sanitizer::TOOLS[:array].include? key # rule 2a
141
+ # Rule 2a.
142
+ result[pos][key] = if Sanitizer::TOOLS[:array].include? key
79
143
  (a || []) + (b || [])
144
+ elsif key == 'settings'
145
+ b
80
146
  else
81
- a || b # rule 2b
147
+ a || b # Rule 2b.
82
148
  end
83
149
  end
84
150
  end
151
+
152
+ # Merge settings.
153
+ result['settings'] = merge_deep(
154
+ first['settings'] || {},
155
+ second['settings'] || {}
156
+ )
157
+
85
158
  result
86
159
  end
87
160
 
88
- def initialize(files, dir)
89
- @config = { 'paths' => [], 'before' => {}, 'after' => {} }
90
-
91
- files = [File.join(dir, DEFAULT_FILENAME)] if files.empty?
92
- files.each do |file|
93
- unless File.exist?(file)
94
- raise InvalidConfig,
95
- format('Missing config file %s.', File.expand_path(file))
161
+ ##
162
+ # Merges 2 iterable objects deeply.
163
+ def self.merge_deep(first, second)
164
+ first.merge(second) do |_key, val1, val2|
165
+ if val1.is_a? Hash
166
+ self.class.merge_deep(val1, val2 || {})
167
+ elsif val1.is_a? Array
168
+ val1 + (val2 || [])
169
+ else
170
+ val2
96
171
  end
97
- @config = Config.merge(@config, Config.load_conf(file))
98
172
  end
99
173
  end
100
174
 
101
- def before
102
- @config['before']
175
+ ##
176
+ # Gets all loaded configuration except defaults.
177
+ #
178
+ # @return [Hash]
179
+ # Config data.
180
+ def all
181
+ result = Marshal.load(Marshal.dump(@config))
182
+ self.class.remove_defaults(result)
183
+ end
184
+
185
+ ##
186
+ # Removes default parameters from a config hash.
187
+ #
188
+ # I know this is weird, but it'll be fixed. The config management needs to
189
+ # be streamlined further.
190
+ def self.remove_defaults(data)
191
+ # Create a deep copy of the config data.
192
+ result = data
193
+
194
+ # Exclude default settings.
195
+ result['settings'].delete_if do |key, value|
196
+ value == DEFAULT_CONFIG['settings'][key] || !value
197
+ end
198
+
199
+ # Exclude default curl opts.
200
+ result['settings']['curl_opts'] ||= {}
201
+ result['settings']['curl_opts'].delete_if do |key, value|
202
+ value == UriWrapper::DEFAULT_CURL_OPTS[key.to_sym]
203
+ end
204
+
205
+ # Delete curl opts if empty.
206
+ unless result['settings']['curl_opts'].length.positive?
207
+ result['settings'].delete('curl_opts')
208
+ end
209
+
210
+ result
211
+ end
212
+
213
+ # Creates a SiteDiff Config object.
214
+ def initialize(file, directory)
215
+ # Fallback to default config filename, if none is specified.
216
+ file = File.join(directory, DEFAULT_FILENAME) if file.nil?
217
+ unless File.exist?(file)
218
+ path = File.expand_path(file)
219
+ raise InvalidConfig, "Missing config file #{path}."
220
+ end
221
+ @config = Config.merge(DEFAULT_CONFIG, Config.load_conf(file))
222
+ @file = file
223
+ @directory = directory
224
+
225
+ # Validate configurations.
226
+ validate
227
+ end
228
+
229
+ # Get "before" site configuration.
230
+ def before(apply_preset = false)
231
+ section :before, apply_preset
232
+ end
233
+
234
+ # Get "before" site URL.
235
+ def before_url
236
+ result = before
237
+ result['url'] if result
103
238
  end
104
239
 
105
- def after
106
- @config['after']
240
+ # Get "after" site configuration.
241
+ def after(apply_preset = false)
242
+ section :after, apply_preset
107
243
  end
108
244
 
245
+ # Get "after" site URL.
246
+ def after_url
247
+ result = after
248
+ result['url'] if result
249
+ end
250
+
251
+ # Get paths.
109
252
  def paths
110
253
  @config['paths']
111
254
  end
112
255
 
256
+ # Set paths.
113
257
  def paths=(paths)
258
+ raise 'Paths must be an Array' unless paths.is_a? Array
259
+
114
260
  @config['paths'] = Config.normalize_paths(paths)
115
261
  end
116
262
 
263
+ # Get ignore_whitespace option
264
+ def ignore_whitespace
265
+ @config['ignore_whitespace']
266
+ end
267
+
268
+ # Set ignore_whitespace option
269
+ def ignore_whitespace=(ignore_whitespace)
270
+ @config['ignore_whitespace'] = ignore_whitespace
271
+ end
272
+
273
+ # Get export option
274
+ def export
275
+ @config['export']
276
+ end
277
+
278
+ # Set export option
279
+ def export=(export)
280
+ @config['export'] = export
281
+ end
282
+
283
+ ##
284
+ # Writes an array of paths to a file.
285
+ #
286
+ # @param [Array] paths
287
+ # An array of paths.
288
+ # @param [String] file
289
+ # Optional path to a file.
290
+ def paths_file_write(paths, file = nil)
291
+ unless paths.is_a?(Array) && paths.length.positive?
292
+ raise SiteDiffException, 'Write failed. Invalid paths.'
293
+ end
294
+
295
+ file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)
296
+ File.open(file, 'w+') { |f| f.puts(paths) }
297
+ end
298
+
299
+ ##
300
+ # Reads a collection of paths from a file.
301
+ #
302
+ # @param [String] file
303
+ # A file containing one path per line.
304
+ #
305
+ # @return [Integer]
306
+ # Number of paths read.
307
+ def paths_file_read(file = nil)
308
+ file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)
309
+
310
+ unless File.exist? file
311
+ raise Config::InvalidConfig, "File not found: #{file}"
312
+ end
313
+
314
+ self.paths = File.readlines(file)
315
+
316
+ # Return the number of paths.
317
+ paths.length
318
+ end
319
+
320
+ ##
321
+ # Get roots.
322
+ #
323
+ # Example: If the config has a "before" and "after" sections, then roots
324
+ # will be ["before", "after"].
325
+ def roots
326
+ @roots = { 'after' => after_url }
327
+ @roots['before'] = before_url if before
328
+ @roots
329
+ end
330
+
331
+ ##
332
+ # Gets a setting.
333
+ #
334
+ # @param [String] key
335
+ # A key.
336
+ #
337
+ # @return [*]
338
+ # A value, if exists.
339
+ def setting(key)
340
+ key = key.to_s if key.is_a?(Symbol)
341
+ return @config['settings'][key] if @config['settings'].key?(key)
342
+ end
343
+
344
+ ##
345
+ # Gets all settings.
346
+ #
347
+ # TODO: Make sure the settings are not writable.
348
+ #
349
+ # @return [Hash]
350
+ # All settings.
351
+ def settings
352
+ @config['settings']
353
+ end
354
+
117
355
  # Checks if the configuration is usable for diff-ing.
356
+ # TODO: Do we actually need the opts argument?
118
357
  def validate(opts = {})
119
358
  opts = { need_before: true }.merge(opts)
120
359
 
121
- raise InvalidConfig, "Undefined 'before' base URL." if \
122
- opts[:need_before] && !before['url']
360
+ if opts[:need_before] && !before['url']
361
+ raise InvalidConfig, "Undefined 'before' base URL."
362
+ end
363
+
123
364
  raise InvalidConfig, "Undefined 'after' base URL." unless after['url']
124
- raise InvalidConfig, "Undefined 'paths'." unless paths && !paths.empty?
365
+
366
+ # Validate interval and concurrency.
367
+ interval = setting(:interval)
368
+ concurrency = setting(:concurrency)
369
+ if interval.to_i != 0 && concurrency != 1
370
+ raise InvalidConfig, 'Concurrency must be 1 when an interval is set.'
371
+ end
372
+
373
+ # Validate preset.
374
+ Preset.exist? setting(:preset), true if setting(:preset)
375
+ end
376
+
377
+ ##
378
+ # Returns object clone with stringified keys.
379
+ # TODO: Make this method available globally, if required.
380
+ def self.stringify_keys(object)
381
+ # Do nothing if it is not an object.
382
+ return object unless object.respond_to?('each_key')
383
+
384
+ # Convert symbol indices to strings.
385
+ output = {}
386
+ object.each_key do |old_k|
387
+ new_k = old_k.is_a?(Symbol) ? old_k.to_s : old_k
388
+ output[new_k] = stringify_keys object[old_k]
389
+ end
390
+
391
+ # Return the new hash with string indices.
392
+ output
393
+ end
394
+
395
+ ##
396
+ # Creates a RegExp from a string.
397
+ def self.create_regexp(string_param)
398
+ begin
399
+ @return_value = string_param == '' ? nil : Regexp.new(string_param)
400
+ rescue SiteDiffException => e
401
+ @return_value = nil
402
+ SiteDiff.log 'Invalid RegExp: ' + string_param, :error
403
+ SiteDiff.log e.message, :error
404
+ # TODO: Use SiteDiff.log type :debug
405
+ # SiteDiff.log e.backtrace, :error if options[:verbose]
406
+ end
407
+ @return_value
125
408
  end
126
409
 
127
410
  private
128
411
 
412
+ ##
413
+ # Returns one of the "before" or "after" sections.
414
+ #
415
+ # @param [String|Symbol]
416
+ # Section name. Example: before, after.
417
+ # @param [Boolean] with_preset
418
+ # Whether to merge with preset config (if any).
419
+ #
420
+ # @return [Hash|Nil]
421
+ # Section data or Nil.
422
+ def section(name, with_preset = false)
423
+ name = name.to_s if name.is_a? Symbol
424
+
425
+ # Validate section.
426
+ unless %w[before after].include? name
427
+ raise SiteDiffException, '"name" must be one of "before" or "after".'
428
+ end
429
+
430
+ # Return nil if section is not defined.
431
+ return nil unless @config[name]
432
+
433
+ result = @config[name]
434
+
435
+ # Merge preset rules, if required.
436
+ preset = setting(:preset)
437
+ if with_preset && !preset.nil?
438
+ preset_config = Preset.read preset
439
+
440
+ # Merge plugins with array values.
441
+ # TODO: This won't be required after plugin declarations are improved.
442
+ # See https://rm.ewdev.ca/issues/18301
443
+ Sanitizer::TOOLS[:array].each do |key|
444
+ if preset_config[key]
445
+ result[key] = (result[key] || []) + preset_config[key]
446
+ end
447
+ end
448
+ end
449
+
450
+ result
451
+ end
452
+
129
453
  def self.normalize_paths(paths)
130
454
  paths ||= []
131
455
  paths.map { |p| (p[0] == '/' ? p : "/#{p}").chomp }
@@ -133,13 +457,20 @@ class SiteDiff
133
457
 
134
458
  # reads a YAML file and raises an InvalidConfig if the file is not valid.
135
459
  def self.load_raw_yaml(file)
460
+ # TODO: Only show this in verbose mode.
136
461
  SiteDiff.log "Reading config file: #{Pathname.new(file).expand_path}"
137
462
  conf = YAML.load_file(file) || {}
138
- raise InvalidConfig, "Invalid configuration file: '#{file}'" unless conf.is_a? Hash
463
+
464
+ unless conf.is_a? Hash
465
+ raise InvalidConfig, "Invalid configuration file: '#{file}'"
466
+ end
139
467
 
140
468
  conf.each_key do |k, _v|
141
- raise InvalidConfig, "Unknown configuration key (#{file}): '#{k}'" unless CONF_KEYS.include? k
469
+ unless ALLOWED_CONFIG_KEYS.include? k
470
+ raise InvalidConfig, "Unknown configuration key (#{file}): '#{k}'"
471
+ end
142
472
  end
473
+
143
474
  conf
144
475
  end
145
476
 
@@ -148,7 +479,9 @@ class SiteDiff
148
479
  def self.load_conf(file, visited = [])
149
480
  # don't get fooled by a/../a/ or symlinks
150
481
  file = File.realpath(file)
151
- raise InvalidConfig, "Circular dependency: #{file}" if visited.include? file
482
+ if visited.include? file
483
+ raise InvalidConfig, "Circular dependency: #{file}"
484
+ end
152
485
 
153
486
  conf = load_raw_yaml(file) # not normalized yet
154
487
  visited << file