sitediff 0.0.6 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +5 -5
  2. data/.eslintignore +1 -0
  3. data/.eslintrc.json +28 -0
  4. data/.project +11 -0
  5. data/.rubocop.yml +179 -0
  6. data/.rubocop_todo.yml +51 -0
  7. data/CHANGELOG.md +28 -0
  8. data/Dockerfile +33 -0
  9. data/Gemfile +11 -0
  10. data/Gemfile.lock +85 -0
  11. data/INSTALLATION.md +146 -0
  12. data/LICENSE +339 -0
  13. data/README.md +810 -0
  14. data/Rakefile +12 -0
  15. data/Thorfile +135 -0
  16. data/bin/sitediff +9 -2
  17. data/config/.gitkeep +0 -0
  18. data/config/sanitize_domains.example.yaml +8 -0
  19. data/config/sitediff.example.yaml +81 -0
  20. data/docker-compose.test.yml +3 -0
  21. data/lib/sitediff/api.rb +276 -0
  22. data/lib/sitediff/cache.rb +57 -8
  23. data/lib/sitediff/cli.rb +156 -176
  24. data/lib/sitediff/config/creator.rb +61 -77
  25. data/lib/sitediff/config/preset.rb +75 -0
  26. data/lib/sitediff/config.rb +436 -31
  27. data/lib/sitediff/crawler.rb +27 -21
  28. data/lib/sitediff/diff.rb +32 -9
  29. data/lib/sitediff/fetch.rb +10 -3
  30. data/lib/sitediff/files/diff.html.erb +20 -2
  31. data/lib/sitediff/files/jquery.min.js +2 -0
  32. data/lib/sitediff/files/normalize.css +349 -0
  33. data/lib/sitediff/files/report.html.erb +171 -0
  34. data/lib/sitediff/files/sidebyside.html.erb +5 -2
  35. data/lib/sitediff/files/sitediff.css +303 -30
  36. data/lib/sitediff/files/sitediff.js +367 -0
  37. data/lib/sitediff/presets/drupal.yaml +63 -0
  38. data/lib/sitediff/report.rb +254 -0
  39. data/lib/sitediff/result.rb +50 -20
  40. data/lib/sitediff/sanitize/dom_transform.rb +47 -8
  41. data/lib/sitediff/sanitize/regexp.rb +24 -3
  42. data/lib/sitediff/sanitize.rb +81 -12
  43. data/lib/sitediff/uriwrapper.rb +65 -23
  44. data/lib/sitediff/webserver/resultserver.rb +30 -33
  45. data/lib/sitediff/webserver.rb +15 -3
  46. data/lib/sitediff.rb +130 -83
  47. data/misc/sitediff - overview report.png +0 -0
  48. data/misc/sitediff - page report.png +0 -0
  49. data/package-lock.json +878 -0
  50. data/package.json +25 -0
  51. data/sitediff.gemspec +51 -0
  52. metadata +91 -29
  53. data/lib/sitediff/files/html_report.html.erb +0 -66
  54. data/lib/sitediff/files/rules/drupal.yaml +0 -63
  55. data/lib/sitediff/rules.rb +0 -65
data/lib/sitediff.rb CHANGED
@@ -2,63 +2,85 @@
2
2
  # frozen_string_literal: true
3
3
 
4
4
  require 'sitediff/config'
5
+ require 'sitediff/diff'
5
6
  require 'sitediff/fetch'
6
7
  require 'sitediff/result'
8
+ require 'sitediff/report'
7
9
  require 'pathname'
8
10
  require 'rainbow'
11
+ require 'rubygems'
9
12
  require 'yaml'
10
13
 
14
+ # SiteDiff Object.
11
15
  class SiteDiff
12
- # path to misc. static files (e.g. erb, css files)
16
+ attr_reader :config, :results
17
+
18
+ # SiteDiff installation directory.
19
+ ROOT_DIR = File.dirname(File.dirname(__FILE__))
20
+
21
+ # Path to misc files. Ex: *.erb, *.css.
13
22
  FILES_DIR = File.join(File.dirname(__FILE__), 'sitediff', 'files')
14
23
 
15
- # subdirectory containing all failing diffs
16
- DIFFS_DIR = 'diffs'
17
-
18
- # files in output
19
- FAILURES_FILE = 'failures.txt'
20
- REPORT_FILE = 'report.html'
21
- SETTINGS_FILE = 'settings.yaml'
22
-
23
- # label will be colorized and str will not be.
24
- # type dictates the color: can be :success, :error, or :failure
25
- def self.log(str, type = :info, label = nil)
26
- label = label ? "[sitediff] #{label}" : '[sitediff]'
27
- bg = fg = nil
28
- case type
29
- when :info
30
- bg = fg = nil
31
- when :diff_success
32
- bg = :green
24
+ # Logs a message.
25
+ #
26
+ # Label will be colorized and message will not.
27
+ # Type dictates the color: can be :success, :error, or :failure.
28
+ #
29
+ # TODO: Only print :debug messages in debug mode.
30
+ def self.log(message, type = :info, label = nil)
31
+ # Prepare label.
32
+ label ||= type unless type == :info
33
+ label = label.to_s
34
+ unless label.empty?
35
+ # Colorize label.
33
36
  fg = :black
34
- when :diff_failure
35
- bg = :red
36
- when :warn
37
- bg = :yellow
38
- fg = :black
39
- when :error
40
- bg = :red
37
+ bg = :blue
38
+
39
+ case type
40
+ when :info
41
+ bg = :cyan
42
+ when :success
43
+ bg = :green
44
+ when :error
45
+ bg = :red
46
+ when :warning
47
+ bg = :yellow
48
+ end
49
+
50
+ label = "[#{label}]"
51
+ label = Rainbow(label)
52
+ label = label.bg(bg) if bg
53
+ label = label.fg(fg) if fg
54
+
55
+ # Add a space after the label.
56
+ label += ' '
41
57
  end
42
- label = Rainbow(label)
43
- label = label.bg(bg) if bg
44
- label = label.fg(fg) if fg
45
- puts label + ' ' + str
58
+
59
+ puts label + message
46
60
  end
47
61
 
48
- attr_reader :config, :results
62
+ ##
63
+ # Returns the "before" site's URL.
64
+ #
65
+ # TODO: Remove in favor of config.before_url.
49
66
  def before
50
67
  @config.before['url']
51
68
  end
52
69
 
70
+ ##
71
+ # Returns the "after" site's URL.
72
+ #
73
+ # TODO: Remove in favor of config.after_url.
53
74
  def after
54
75
  @config.after['url']
55
76
  end
56
77
 
57
- def initialize(config, cache, concurrency, interval, verbose = true, debug = false)
78
+ # Initialize SiteDiff.
79
+ def initialize(config, cache, verbose: true, debug: false)
58
80
  @cache = cache
59
81
  @verbose = verbose
60
82
  @debug = debug
61
- @interval = interval
83
+
62
84
  # Check for single-site mode
63
85
  validate_opts = {}
64
86
  if !config.before['url'] && @cache.tag?(:before)
@@ -69,37 +91,50 @@ class SiteDiff
69
91
  validate_opts[:need_before] = false
70
92
  end
71
93
  config.validate(validate_opts)
72
-
73
- @concurrency = concurrency
94
+ # Configure diff.
95
+ Diff.diff_config(config)
74
96
  @config = config
75
97
  end
76
98
 
77
- # Sanitize HTML
78
- def sanitize(path, read_results)
99
+ # Sanitize HTML.
100
+ def sanitize(path_passed, read_results)
79
101
  %i[before after].map do |tag|
80
102
  html = read_results[tag].content
103
+ # TODO: See why encoding is empty while running tests.
104
+ #
105
+ # The presence of an "encoding" value used to be used to determine
106
+ # if the sanitizer would be called. However, encoding turns up blank
107
+ # during rspec tests for some reason.
81
108
  encoding = read_results[tag].encoding
82
- if encoding
83
- config = @config.send(tag)
84
- Sanitizer.new(html, config, path: path).sanitize
109
+ if encoding || html.length.positive?
110
+ section = @config.send(tag, apply_preset: true)
111
+ opts = { path: path_passed }
112
+ opts[:output] = @config.output if @config.output
113
+ Sanitizer.new(html, section, opts).sanitize
85
114
  else
86
115
  html
87
116
  end
88
117
  end
89
118
  end
90
119
 
91
- # Process a set of read results
120
+ ##
121
+ # Process a set of read results.
122
+ #
123
+ # This is the callback that processes items fetched by the Fetcher.
92
124
  def process_results(path, read_results)
93
- if (error = (read_results[:before].error || read_results[:after].error))
125
+ error = (read_results[:before].error || read_results[:after].error)
126
+ if error
94
127
  diff = Result.new(path, nil, nil, nil, nil, error)
95
128
  else
96
129
  begin
97
- diff = Result.new(path,
98
- *sanitize(path, read_results),
99
- read_results[:before].encoding,
100
- read_results[:after].encoding,
101
- nil)
102
- rescue => e
130
+ diff = Result.new(
131
+ path,
132
+ *sanitize(path, read_results),
133
+ read_results[:before].encoding,
134
+ read_results[:after].encoding,
135
+ nil
136
+ )
137
+ rescue StandardError => e
103
138
  raise if @debug
104
139
 
105
140
  Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
@@ -109,65 +144,77 @@ class SiteDiff
109
144
 
110
145
  # Print results in order!
111
146
  while (next_diff = @results[@ordered.first])
112
- next_diff.log(@verbose)
147
+ next_diff.log(verbose: @verbose)
113
148
  @ordered.shift
114
149
  end
115
150
  end
116
151
 
117
- # Perform the comparison, populate @results and return the number of failing
118
- # paths (paths with non-zero diff).
119
- def run(curl_opts = {}, debug = true)
152
+ ##
153
+ # Compute diff as per config.
154
+ #
155
+ # @return [Integer]
156
+ # Number of paths which have diffs.
157
+ def run
120
158
  # Map of path -> Result object, populated by process_results
121
159
  @results = {}
122
160
  @ordered = @config.paths.dup
123
161
 
124
162
  unless @cache.read_tags.empty?
125
- SiteDiff.log('Using sites from cache: ' +
126
- @cache.read_tags.sort.join(', '))
163
+ SiteDiff.log("Using sites from cache: #{@cache.read_tags.sort.join(', ')}")
127
164
  end
128
165
 
129
166
  # TODO: Fix this after config merge refactor!
130
167
  # Not quite right. We are not passing @config.before or @config.after
131
168
  # so passing this instead but @config.after['curl_opts'] is ignored.
169
+ curl_opts = @config.setting :curl_opts
132
170
  config_curl_opts = @config.before['curl_opts']
133
171
  curl_opts = config_curl_opts.clone.merge(curl_opts) if config_curl_opts
134
- fetcher = Fetch.new(@cache, @config.paths, @interval, @concurrency, curl_opts, debug,
135
- before: before, after: after)
172
+ fetcher = Fetch.new(
173
+ @cache,
174
+ @config.paths,
175
+ @config.setting(:interval),
176
+ @config.setting(:concurrency),
177
+ curl_opts,
178
+ debug: @debug,
179
+ before: @config.before_url,
180
+ after: @config.after_url
181
+ )
182
+
183
+ # Run the Fetcher with "process results" as a callback.
136
184
  fetcher.run(&method(:process_results))
137
185
 
138
186
  # Order by original path order
139
- @results = @config.paths.map { |p| @results[p] }
187
+ @results = @config.paths.map { |path| @results[path] }
140
188
  results.map { |r| r unless r.success? }.compact.length
141
189
  end
142
190
 
143
- # Dump results to disk
144
- def dump(dir, report_before, report_after)
145
- report_before ||= before
146
- report_after ||= after
147
- dir = Pathname.new(dir)
148
- dir.mkpath unless dir.directory?
149
-
150
- # store diffs of each failing case, first wipe out existing diffs
151
- diff_dir = dir + DIFFS_DIR
152
- diff_dir.rmtree if diff_dir.exist?
153
- results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
154
- SiteDiff.log "All diff files were dumped inside #{dir.expand_path}"
155
-
156
- # store failing paths
157
- failures = dir + FAILURES_FILE
158
- SiteDiff.log "Writing failures to #{failures.expand_path}"
159
- failures.open('w') do |f|
160
- results.each { |r| f.puts r.path unless r.success? }
191
+ ##
192
+ # Get a reporter object to help with report generation.
193
+ def report
194
+ if @results.nil?
195
+ raise SiteDiffException(
196
+ 'No results detected. Run SiteDiff.run before SiteDiff.report.'
197
+ )
161
198
  end
162
199
 
163
- # create report of results
164
- report = Diff.generate_html_report(results, report_before, report_after,
165
- @cache)
166
- dir.+(REPORT_FILE).open('w') { |f| f.write(report) }
200
+ Report.new(@config, @cache, @results)
201
+ end
202
+
203
+ ##
204
+ # Get SiteDiff gemspec.
205
+ def self.gemspec
206
+ file = "#{ROOT_DIR}/sitediff.gemspec"
207
+ Gem::Specification.load(file)
208
+ end
167
209
 
168
- # serve some settings
169
- settings = { 'before' => report_before, 'after' => report_after,
170
- 'cached' => %w[before after] }
171
- dir.+(SETTINGS_FILE).open('w') { |f| YAML.dump(settings, f) }
210
+ ##
211
+ # Ensures that a directory exists and returns a Pathname for it.
212
+ #
213
+ # @param [String] dir
214
+ # path/to/directory
215
+ def self.ensure_dir(dir)
216
+ dir = Pathname.new(dir) unless dir.is_a? Pathname
217
+ dir.mkpath unless dir.directory?
218
+ dir
172
219
  end
173
220
  end
Binary file
Binary file