sitediff 0.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: f75892f718764c8fd2c18d7f3f7e7cf8908d60ea07c2a765510c8ef409b9f0c1
4
+ data.tar.gz: 3b3744eca0dda04821152aab596fb67891204a1599b4db72e13b4af484693e65
5
+ SHA512:
6
+ metadata.gz: 97e9098b290742f1b3efe3c284e9392be95ffd0f7576df413a6ec612142b0573acf8b8b4d43369961c154d801db6284fcc1a8d69cea7da8ed99b64a0a1f1af75
7
+ data.tar.gz: c4b0e93bc4e0acb3d675c8d675d8f6235035aae72421794495f25223cb086eaa4c87d2cde63caa0eda257b0d91f374a0efbbb416ef8ee88c2f0ffde89a608831
@@ -1,10 +1,16 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  # when run as gem, $0 is /usr/local/bin/sitediff not this file
4
- if $0 == __FILE__
5
- $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
5
+ if $PROGRAM_NAME == __FILE__
6
+ $LOAD_PATH.unshift File.expand_path('../lib', __dir__)
6
7
  end
7
8
 
8
9
  require 'sitediff/cli'
9
10
 
10
- SiteDiff::Cli.start
11
+ begin
12
+ SiteDiff::Cli.start
13
+ rescue Interrupt
14
+ puts("\n")
15
+ SiteDiff.log('Stopping. Interrupted by user.')
16
+ end
@@ -1,146 +1,220 @@
1
1
  #!/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  require 'sitediff/config'
5
+ require 'sitediff/diff'
3
6
  require 'sitediff/fetch'
4
7
  require 'sitediff/result'
8
+ require 'sitediff/report'
5
9
  require 'pathname'
6
10
  require 'rainbow'
11
+ require 'rubygems'
7
12
  require 'yaml'
8
13
 
14
+ # SiteDiff Object.
9
15
  class SiteDiff
10
- # path to misc. static files (e.g. erb, css files)
16
+ attr_reader :config, :results
17
+
18
+ # SiteDiff installation directory.
19
+ ROOT_DIR = File.dirname(File.dirname(__FILE__))
20
+
21
+ # Path to misc files. Ex: *.erb, *.css.
11
22
  FILES_DIR = File.join(File.dirname(__FILE__), 'sitediff', 'files')
12
23
 
13
- # subdirectory containing all failing diffs
14
- DIFFS_DIR = 'diffs'
15
-
16
- # files in output
17
- FAILURES_FILE = 'failures.txt'
18
- REPORT_FILE = 'report.html'
19
- SETTINGS_FILE = 'settings.yaml'
20
-
21
- # label will be colorized and str will not be.
22
- # type dictates the color: can be :success, :error, or :failure
23
- def self.log(str, type=:info, label=nil)
24
- label = label ? "[sitediff] #{label}" : '[sitediff]'
25
- bg = fg = nil
26
- case type
27
- when :info
28
- when :diff_success
29
- bg = :green
24
+ # Logs a message.
25
+ #
26
+ # Label will be colorized and message will not.
27
+ # Type dictates the color: can be :success, :error, or :failure.
28
+ #
29
+ # TODO: Only print :debug messages in debug mode.
30
+ def self.log(message, type = :info, label = nil)
31
+ # Prepare label.
32
+ label ||= type unless type == :info
33
+ label = label.to_s
34
+ unless label.empty?
35
+ # Colorize label.
30
36
  fg = :black
31
- when :diff_failure
32
- bg = :red
33
- when :warn
34
- bg = :yellow
35
- fg = :black
36
- when :error
37
- bg = :red
37
+ bg = :blue
38
+
39
+ case type
40
+ when :info
41
+ bg = :cyan
42
+ when :success
43
+ bg = :green
44
+ when :error
45
+ bg = :red
46
+ when :warning
47
+ bg = :yellow
48
+ end
49
+
50
+ label = '[' + label.to_s + ']'
51
+ label = Rainbow(label)
52
+ label = label.bg(bg) if bg
53
+ label = label.fg(fg) if fg
54
+
55
+ # Add a space after the label.
56
+ label += ' '
38
57
  end
39
- label = Rainbow(label)
40
- label = label.bg(bg) if bg
41
- label = label.fg(fg) if fg
42
- puts label + ' ' + str
58
+
59
+ puts label + message
43
60
  end
44
61
 
45
- attr_reader :config, :results
62
+ ##
63
+ # Returns the "before" site's URL.
64
+ #
65
+ # TODO: Remove in favor of config.before_url.
46
66
  def before
47
67
  @config.before['url']
48
68
  end
69
+
70
+ ##
71
+ # Returns the "after" site's URL.
72
+ #
73
+ # TODO: Remove in favor of config.after_url.
49
74
  def after
50
75
  @config.after['url']
51
76
  end
52
77
 
53
- def initialize(config, cache, verbose=true)
78
+ # Initialize SiteDiff.
79
+ def initialize(config, cache, verbose = true, debug = false)
54
80
  @cache = cache
55
81
  @verbose = verbose
82
+ @debug = debug
56
83
 
57
84
  # Check for single-site mode
58
85
  validate_opts = {}
59
86
  if !config.before['url'] && @cache.tag?(:before)
60
- raise SiteDiffException,
61
- "A cached 'before' is required for single-site mode" \
62
- unless @cache.read_tags.include?(:before)
87
+ unless @cache.read_tags.include?(:before)
88
+ raise SiteDiffException,
89
+ "A cached 'before' is required for single-site mode"
90
+ end
63
91
  validate_opts[:need_before] = false
64
92
  end
65
93
  config.validate(validate_opts)
66
-
94
+ # Configure diff.
95
+ Diff.diff_config(config)
67
96
  @config = config
68
97
  end
69
98
 
70
- # Sanitize HTML
99
+ # Sanitize HTML.
71
100
  def sanitize(path, read_results)
72
- [:before, :after].map do |tag|
101
+ %i[before after].map do |tag|
73
102
  html = read_results[tag].content
74
- config = @config.send(tag)
75
- Sanitizer.new(html, config, :path => path).sanitize
103
+ # TODO: See why encoding is empty while running tests.
104
+ #
105
+ # The presence of an "encoding" value used to be used to determine
106
+ # if the sanitizer would be called. However, encoding turns up blank
107
+ # during rspec tests for some reason.
108
+ encoding = read_results[tag].encoding
109
+ if encoding || html.length.positive?
110
+ section = @config.send(tag, true)
111
+ opts = { path: path }
112
+ opts[:output] = @config.output if @config.output
113
+ Sanitizer.new(html, section, opts).sanitize
114
+ else
115
+ html
116
+ end
76
117
  end
77
118
  end
78
119
 
79
- # Process a set of read results
120
+ ##
121
+ # Process a set of read results.
122
+ #
123
+ # This is the callback that processes items fetched by the Fetcher.
80
124
  def process_results(path, read_results)
81
- if error = read_results[:before].error || read_results[:after].error
82
- diff = Result.new(path, nil, nil, error)
125
+ error = (read_results[:before].error || read_results[:after].error)
126
+ if error
127
+ diff = Result.new(path, nil, nil, nil, nil, error)
83
128
  else
84
- diff = Result.new(path, *sanitize(path, read_results), nil)
129
+ begin
130
+ diff = Result.new(
131
+ path,
132
+ *sanitize(path, read_results),
133
+ read_results[:before].encoding,
134
+ read_results[:after].encoding,
135
+ nil
136
+ )
137
+ rescue StandardError => e
138
+ raise if @debug
139
+
140
+ Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
141
+ end
85
142
  end
86
143
  @results[path] = diff
87
144
 
88
145
  # Print results in order!
89
- while next_diff = @results[@ordered.first]
146
+ while (next_diff = @results[@ordered.first])
90
147
  next_diff.log(@verbose)
91
148
  @ordered.shift
92
149
  end
93
150
  end
94
151
 
95
- # Perform the comparison, populate @results and return the number of failing
96
- # paths (paths with non-zero diff).
152
+ ##
153
+ # Compute diff as per config.
154
+ #
155
+ # @return [Integer]
156
+ # Number of paths which have diffs.
97
157
  def run
98
158
  # Map of path -> Result object, populated by process_results
99
159
  @results = {}
100
160
  @ordered = @config.paths.dup
101
161
 
102
162
  unless @cache.read_tags.empty?
103
- SiteDiff.log("Using sites from cache: " +
104
- @cache.read_tags.sort.join(', '))
163
+ SiteDiff.log('Using sites from cache: ' + @cache.read_tags.sort.join(', '))
105
164
  end
106
165
 
107
- fetcher = Fetch.new(@cache, @config.paths,
108
- :before => before, :after => after)
109
- fetcher.run(&self.method(:process_results))
166
+ # TODO: Fix this after config merge refactor!
167
+ # Not quite right. We are not passing @config.before or @config.after
168
+ # so passing this instead but @config.after['curl_opts'] is ignored.
169
+ curl_opts = @config.setting :curl_opts
170
+ config_curl_opts = @config.before['curl_opts']
171
+ curl_opts = config_curl_opts.clone.merge(curl_opts) if config_curl_opts
172
+ fetcher = Fetch.new(
173
+ @cache,
174
+ @config.paths,
175
+ @config.setting(:interval),
176
+ @config.setting(:concurrency),
177
+ curl_opts,
178
+ @debug,
179
+ before: @config.before_url,
180
+ after: @config.after_url
181
+ )
182
+
183
+ # Run the Fetcher with "process results" as a callback.
184
+ fetcher.run(&method(:process_results))
110
185
 
111
186
  # Order by original path order
112
- @results = @config.paths.map { |p| @results[p] }
113
- return results.map{ |r| r unless r.success? }.compact.length
187
+ @results = @config.paths.map { |path| @results[path] }
188
+ results.map { |r| r unless r.success? }.compact.length
114
189
  end
115
190
 
116
- # Dump results to disk
117
- def dump(dir, report_before, report_after)
118
- report_before ||= before
119
- report_after ||= after
120
- dir = Pathname.new(dir)
121
- dir.mkpath unless dir.directory?
122
-
123
- # store diffs of each failing case, first wipe out existing diffs
124
- diff_dir = dir + DIFFS_DIR
125
- diff_dir.rmtree if diff_dir.exist?
126
- results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
127
- SiteDiff::log "All diff files were dumped inside #{dir.expand_path}"
128
-
129
- # store failing paths
130
- failures = dir + FAILURES_FILE
131
- SiteDiff::log "Writing failures to #{failures.expand_path}"
132
- failures.open('w') do |f|
133
- results.each { |r| f.puts r.path unless r.success? }
191
+ ##
192
+ # Get a reporter object to help with report generation.
193
+ def report
194
+ if @results.nil?
195
+ raise SiteDiffException(
196
+ 'No results detected. Run SiteDiff.run before SiteDiff.report.'
197
+ )
134
198
  end
135
199
 
136
- # create report of results
137
- report = Diff::generate_html_report(results, report_before, report_after,
138
- @cache)
139
- dir.+(REPORT_FILE).open('w') { |f| f.write(report) }
200
+ Report.new(@config, @cache, @results)
201
+ end
140
202
 
141
- # serve some settings
142
- settings = { 'before' => report_before, 'after' => report_after,
143
- 'cached' => @cache.read_tags.map { |t| t.to_s } }
144
- dir.+(SETTINGS_FILE).open('w') { |f| YAML.dump(settings, f) }
203
+ ##
204
+ # Get SiteDiff gemspec.
205
+ def self.gemspec
206
+ file = ROOT_DIR + '/sitediff.gemspec'
207
+ Gem::Specification.load(file)
208
+ end
209
+
210
+ ##
211
+ # Ensures that a directory exists and returns a Pathname for it.
212
+ #
213
+ # @param [String] dir
214
+ # path/to/directory
215
+ def self.ensure_dir(dir)
216
+ dir = Pathname.new(dir) unless dir.is_a? Pathname
217
+ dir.mkpath unless dir.directory?
218
+ dir
145
219
  end
146
220
  end
@@ -0,0 +1,265 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sitediff'
4
+ require 'sitediff/cache'
5
+ require 'sitediff/config'
6
+ require 'sitediff/config/creator'
7
+ require 'sitediff/config/preset'
8
+ require 'sitediff/fetch'
9
+ require 'sitediff/webserver/resultserver'
10
+
11
+ class SiteDiff
12
+ ##
13
+ # Sitediff API interface.
14
+ class Api
15
+ ##
16
+ # Initializes new Api object.
17
+ def initialize(directory, config_file = nil)
18
+ @dir = get_dir(directory)
19
+ @config = SiteDiff::Config.new(config_file, @dir)
20
+ end
21
+
22
+ ##
23
+ # Intialize a SiteDiff project.
24
+ #
25
+ # Calling:
26
+ # SiteDiff::Api.init(
27
+ # depth: 3,
28
+ # directory: 'sitediff',
29
+ # concurrency: 3,
30
+ # interval: 0,
31
+ # include: nil,
32
+ # exclude: '*.pdf',
33
+ # preset: 'drupal',
34
+ # curl_opts: {timeout: 60},
35
+ # crawl: false
36
+ # )
37
+ def self.init(options)
38
+ # Prepare a config object and write it to the file system.
39
+ creator = SiteDiff::Config::Creator.new(options[:debug], options[:before_url], options[:after_url])
40
+ include_regex = Config.create_regexp(options[:include])
41
+ exclude_regex = Config.create_regexp(options[:exclude])
42
+ creator.create(
43
+ depth: options[:depth],
44
+ directory: options[:directory],
45
+ concurrency: options[:concurrency],
46
+ interval: options[:interval],
47
+ include: include_regex,
48
+ exclude: exclude_regex,
49
+ preset: options[:preset],
50
+ curl_opts: options[:curl_opts]
51
+ )
52
+ SiteDiff.log "Created #{creator.config_file.expand_path}", :success
53
+
54
+ # TODO: implement crawl ^^^
55
+ # Discover paths, if enabled.
56
+ # if options[:crawl]
57
+ # crawl(creator.config_file)
58
+ # SiteDiff.log 'You can now run "sitediff diff".', :success
59
+ # else
60
+ # SiteDiff.log 'Run "sitediff crawl" to discover paths. You should then be able to run "sitediff diff".', :info
61
+ # end
62
+ end
63
+
64
+ ##
65
+ # Diff the `before` and `after`.
66
+ #
67
+ # Calling:
68
+ # Api.diff(
69
+ # paths: options['paths'],
70
+ # paths_file: options['paths-file'],
71
+ # ignore_whitespace: options['ignore-whitespace'],
72
+ # export: options['export'],
73
+ # before: options['before'],
74
+ # after: options['after'],
75
+ # cached: options['cached'],
76
+ # verbose: options['verbose'],
77
+ # report_format: options['report-format'],
78
+ # before_report: options['before-report'],
79
+ # after_report: options['after-report'],
80
+ # cli_mode: false
81
+ # )
82
+ def diff(options)
83
+ @config.ignore_whitespace = options[:ignore_whitespace]
84
+ @config.export = options[:export]
85
+ # Apply "paths" override, if any.
86
+ if options[:paths]
87
+ @config.paths = options[:paths]
88
+ else
89
+ paths_file = options[:paths_file]
90
+ paths_file ||= File.join(@dir, Config::DEFAULT_PATHS_FILENAME)
91
+ paths_file = File.expand_path(paths_file)
92
+
93
+ paths_count = @config.paths_file_read(paths_file)
94
+ SiteDiff.log "Read #{paths_count} paths from: #{paths_file}"
95
+ end
96
+
97
+ # TODO: Why do we allow before and after override during diff?
98
+ @config.before['url'] = options[:before] if options[:before]
99
+ @config.after['url'] = options[:after] if options[:after]
100
+
101
+ # Prepare cache.
102
+ cache = SiteDiff::Cache.new(
103
+ create: options[:cached] != 'none',
104
+ directory: @dir
105
+ )
106
+ cache.read_tags << :before if %w[before all].include?(options[:cached])
107
+ cache.read_tags << :after if %w[after all].include?(options[:cached])
108
+ cache.write_tags << :before << :after
109
+
110
+ # Run sitediff.
111
+ sitediff = SiteDiff.new(
112
+ @config,
113
+ cache,
114
+ options[:verbose],
115
+ options[:debug]
116
+ )
117
+ num_failing = sitediff.run
118
+ exit_code = num_failing.positive? ? 2 : 0
119
+
120
+ # Generate HTML report.
121
+ if options[:report_format] == 'html' || @config.export
122
+ sitediff.report.generate_html(
123
+ @dir,
124
+ options[:before_report],
125
+ options[:after_report]
126
+ )
127
+ end
128
+
129
+ # Generate JSON report.
130
+ if options[:report_format] == 'json' && @config.export == false
131
+ sitediff.report.generate_json @dir
132
+ end
133
+
134
+ SiteDiff.log 'Run "sitediff serve" to see a report.' unless options[:export]
135
+ rescue Config::InvalidConfig => e
136
+ SiteDiff.log "Invalid configuration: #{e.message}", :error
137
+ SiteDiff.log e.backtrace, :error if options[:verbose]
138
+ rescue Config::ConfigNotFound => e
139
+ SiteDiff.log "Invalid configuration: #{e.message}", :error
140
+ SiteDiff.log e.backtrace, :error if options[:verbose]
141
+ else # no exception was raised
142
+ # Thor::Error --> exit(1), guaranteed by exit_on_failure?
143
+ # Failing diff --> exit(2), populated above
144
+ exit(exit_code) if options[:cli_mode]
145
+ end
146
+
147
+ ##
148
+ # Crawl the `before` site to determine `paths`.
149
+ def crawl
150
+ # Prepare cache.
151
+ @cache = SiteDiff::Cache.new(
152
+ create: true,
153
+ directory: @dir
154
+ )
155
+ @cache.write_tags << :before << :after
156
+
157
+ # Crawl with Hydra to discover paths.
158
+ hydra = Typhoeus::Hydra.new(
159
+ max_concurrency: @config.setting(:concurrency)
160
+ )
161
+ @paths = {}
162
+ @config.roots.each do |tag, url|
163
+ Crawler.new(
164
+ hydra,
165
+ url,
166
+ @config.setting(:interval),
167
+ @config.setting(:include),
168
+ @config.setting(:exclude),
169
+ @config.setting(:depth),
170
+ @config.curl_opts,
171
+ @debug
172
+ ) do |info|
173
+ SiteDiff.log "Visited #{info.uri}, cached."
174
+ after_crawl(tag, info)
175
+ end
176
+ end
177
+ hydra.run
178
+
179
+ # Write paths to a file.
180
+ @paths = @paths.values.reduce(&:|).to_a.sort
181
+ @config.paths_file_write(@paths)
182
+
183
+ # Log output.
184
+ file = Pathname.new(@dir) + Config::DEFAULT_PATHS_FILENAME
185
+ SiteDiff.log ''
186
+ SiteDiff.log "#{@paths.length} page(s) found."
187
+ SiteDiff.log "Created #{file.expand_path}.", :success, 'done'
188
+ end
189
+
190
+ ##
191
+ # Serves SiteDiff report for accessing in the browser.
192
+ #
193
+ # Calling:
194
+ # api.serve(browse: true, port: 13080)
195
+ def serve(options)
196
+ @cache = Cache.new(directory: @dir)
197
+ @cache.read_tags << :before << :after
198
+
199
+ SiteDiff::Webserver::ResultServer.new(
200
+ options[:port],
201
+ @dir,
202
+ browse: options[:browse],
203
+ cache: @cache,
204
+ config: @config
205
+ ).wait
206
+ rescue SiteDiffException => e
207
+ SiteDiff.log e.message, :error
208
+ SiteDiff.log e.backtrace, :error if options[:verbose]
209
+ end
210
+
211
+ ##
212
+ #
213
+ def store(options)
214
+ # TODO: Figure out how to remove this config.validate call.
215
+ @config.validate(need_before: false)
216
+ @config.paths_file_read
217
+
218
+ @cache = SiteDiff::Cache.new(directory: @dir, create: true)
219
+ @cache.write_tags << :before
220
+
221
+ base = options[:url] || @config.after['url']
222
+ fetcher = SiteDiff::Fetch.new(@cache,
223
+ @config.paths,
224
+ @config.setting(:interval),
225
+ @config.setting(:concurrency),
226
+ get_curl_opts(@config.settings),
227
+ options[:debug],
228
+ before: base)
229
+ fetcher.run do |path, _res|
230
+ SiteDiff.log "Visited #{path}, cached"
231
+ end
232
+ end
233
+
234
+ private
235
+
236
+ ##
237
+ # Ensures that the given directory exists.
238
+ def get_dir(directory)
239
+ # Create the dir. Must go before cache initialization!
240
+ @dir = Pathname.new(directory || '.')
241
+ @dir.mkpath unless @dir.directory?
242
+ @dir.to_s
243
+ end
244
+
245
+ ##
246
+ # Processes a crawled path.
247
+ def after_crawl(tag, info)
248
+ path = UriWrapper.canonicalize(info.relative)
249
+
250
+ # Register the path.
251
+ @paths[tag] = [] unless @paths[tag]
252
+ @paths[tag] << path
253
+
254
+ result = info.read_result
255
+
256
+ # Write result to applicable cache.
257
+ @cache.set(tag, path, result)
258
+ # If single-site, cache "after" as "before".
259
+ @cache.set(:before, path, result) unless @config.roots[:before]
260
+
261
+ # TODO: Restore application of rules.
262
+ # @rules.handle_page(tag, res.content, info.document) if @rules && !res.error
263
+ end
264
+ end
265
+ end