sitediff 0.0.2 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: f75892f718764c8fd2c18d7f3f7e7cf8908d60ea07c2a765510c8ef409b9f0c1
4
+ data.tar.gz: 3b3744eca0dda04821152aab596fb67891204a1599b4db72e13b4af484693e65
5
+ SHA512:
6
+ metadata.gz: 97e9098b290742f1b3efe3c284e9392be95ffd0f7576df413a6ec612142b0573acf8b8b4d43369961c154d801db6284fcc1a8d69cea7da8ed99b64a0a1f1af75
7
+ data.tar.gz: c4b0e93bc4e0acb3d675c8d675d8f6235035aae72421794495f25223cb086eaa4c87d2cde63caa0eda257b0d91f374a0efbbb416ef8ee88c2f0ffde89a608831
@@ -1,10 +1,16 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  # when run as gem, $0 is /usr/local/bin/sitediff not this file
4
- if $0 == __FILE__
5
- $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
5
+ if $PROGRAM_NAME == __FILE__
6
+ $LOAD_PATH.unshift File.expand_path('../lib', __dir__)
6
7
  end
7
8
 
8
9
  require 'sitediff/cli'
9
10
 
10
- SiteDiff::Cli.start
11
+ begin
12
+ SiteDiff::Cli.start
13
+ rescue Interrupt
14
+ puts("\n")
15
+ SiteDiff.log('Stopping. Interrupted by user.')
16
+ end
@@ -1,146 +1,220 @@
1
1
  #!/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  require 'sitediff/config'
5
+ require 'sitediff/diff'
3
6
  require 'sitediff/fetch'
4
7
  require 'sitediff/result'
8
+ require 'sitediff/report'
5
9
  require 'pathname'
6
10
  require 'rainbow'
11
+ require 'rubygems'
7
12
  require 'yaml'
8
13
 
14
+ # SiteDiff Object.
9
15
  class SiteDiff
10
- # path to misc. static files (e.g. erb, css files)
16
+ attr_reader :config, :results
17
+
18
+ # SiteDiff installation directory.
19
+ ROOT_DIR = File.dirname(File.dirname(__FILE__))
20
+
21
+ # Path to misc files. Ex: *.erb, *.css.
11
22
  FILES_DIR = File.join(File.dirname(__FILE__), 'sitediff', 'files')
12
23
 
13
- # subdirectory containing all failing diffs
14
- DIFFS_DIR = 'diffs'
15
-
16
- # files in output
17
- FAILURES_FILE = 'failures.txt'
18
- REPORT_FILE = 'report.html'
19
- SETTINGS_FILE = 'settings.yaml'
20
-
21
- # label will be colorized and str will not be.
22
- # type dictates the color: can be :success, :error, or :failure
23
- def self.log(str, type=:info, label=nil)
24
- label = label ? "[sitediff] #{label}" : '[sitediff]'
25
- bg = fg = nil
26
- case type
27
- when :info
28
- when :diff_success
29
- bg = :green
24
+ # Logs a message.
25
+ #
26
+ # Label will be colorized and message will not.
27
+ # Type dictates the color: can be :success, :error, or :failure.
28
+ #
29
+ # TODO: Only print :debug messages in debug mode.
30
+ def self.log(message, type = :info, label = nil)
31
+ # Prepare label.
32
+ label ||= type unless type == :info
33
+ label = label.to_s
34
+ unless label.empty?
35
+ # Colorize label.
30
36
  fg = :black
31
- when :diff_failure
32
- bg = :red
33
- when :warn
34
- bg = :yellow
35
- fg = :black
36
- when :error
37
- bg = :red
37
+ bg = :blue
38
+
39
+ case type
40
+ when :info
41
+ bg = :cyan
42
+ when :success
43
+ bg = :green
44
+ when :error
45
+ bg = :red
46
+ when :warning
47
+ bg = :yellow
48
+ end
49
+
50
+ label = '[' + label.to_s + ']'
51
+ label = Rainbow(label)
52
+ label = label.bg(bg) if bg
53
+ label = label.fg(fg) if fg
54
+
55
+ # Add a space after the label.
56
+ label += ' '
38
57
  end
39
- label = Rainbow(label)
40
- label = label.bg(bg) if bg
41
- label = label.fg(fg) if fg
42
- puts label + ' ' + str
58
+
59
+ puts label + message
43
60
  end
44
61
 
45
- attr_reader :config, :results
62
+ ##
63
+ # Returns the "before" site's URL.
64
+ #
65
+ # TODO: Remove in favor of config.before_url.
46
66
  def before
47
67
  @config.before['url']
48
68
  end
69
+
70
+ ##
71
+ # Returns the "after" site's URL.
72
+ #
73
+ # TODO: Remove in favor of config.after_url.
49
74
  def after
50
75
  @config.after['url']
51
76
  end
52
77
 
53
- def initialize(config, cache, verbose=true)
78
+ # Initialize SiteDiff.
79
+ def initialize(config, cache, verbose = true, debug = false)
54
80
  @cache = cache
55
81
  @verbose = verbose
82
+ @debug = debug
56
83
 
57
84
  # Check for single-site mode
58
85
  validate_opts = {}
59
86
  if !config.before['url'] && @cache.tag?(:before)
60
- raise SiteDiffException,
61
- "A cached 'before' is required for single-site mode" \
62
- unless @cache.read_tags.include?(:before)
87
+ unless @cache.read_tags.include?(:before)
88
+ raise SiteDiffException,
89
+ "A cached 'before' is required for single-site mode"
90
+ end
63
91
  validate_opts[:need_before] = false
64
92
  end
65
93
  config.validate(validate_opts)
66
-
94
+ # Configure diff.
95
+ Diff.diff_config(config)
67
96
  @config = config
68
97
  end
69
98
 
70
- # Sanitize HTML
99
+ # Sanitize HTML.
71
100
  def sanitize(path, read_results)
72
- [:before, :after].map do |tag|
101
+ %i[before after].map do |tag|
73
102
  html = read_results[tag].content
74
- config = @config.send(tag)
75
- Sanitizer.new(html, config, :path => path).sanitize
103
+ # TODO: See why encoding is empty while running tests.
104
+ #
105
+ # The presence of an "encoding" value used to be used to determine
106
+ # if the sanitizer would be called. However, encoding turns up blank
107
+ # during rspec tests for some reason.
108
+ encoding = read_results[tag].encoding
109
+ if encoding || html.length.positive?
110
+ section = @config.send(tag, true)
111
+ opts = { path: path }
112
+ opts[:output] = @config.output if @config.output
113
+ Sanitizer.new(html, section, opts).sanitize
114
+ else
115
+ html
116
+ end
76
117
  end
77
118
  end
78
119
 
79
- # Process a set of read results
120
+ ##
121
+ # Process a set of read results.
122
+ #
123
+ # This is the callback that processes items fetched by the Fetcher.
80
124
  def process_results(path, read_results)
81
- if error = read_results[:before].error || read_results[:after].error
82
- diff = Result.new(path, nil, nil, error)
125
+ error = (read_results[:before].error || read_results[:after].error)
126
+ if error
127
+ diff = Result.new(path, nil, nil, nil, nil, error)
83
128
  else
84
- diff = Result.new(path, *sanitize(path, read_results), nil)
129
+ begin
130
+ diff = Result.new(
131
+ path,
132
+ *sanitize(path, read_results),
133
+ read_results[:before].encoding,
134
+ read_results[:after].encoding,
135
+ nil
136
+ )
137
+ rescue StandardError => e
138
+ raise if @debug
139
+
140
+ Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
141
+ end
85
142
  end
86
143
  @results[path] = diff
87
144
 
88
145
  # Print results in order!
89
- while next_diff = @results[@ordered.first]
146
+ while (next_diff = @results[@ordered.first])
90
147
  next_diff.log(@verbose)
91
148
  @ordered.shift
92
149
  end
93
150
  end
94
151
 
95
- # Perform the comparison, populate @results and return the number of failing
96
- # paths (paths with non-zero diff).
152
+ ##
153
+ # Compute diff as per config.
154
+ #
155
+ # @return [Integer]
156
+ # Number of paths which have diffs.
97
157
  def run
98
158
  # Map of path -> Result object, populated by process_results
99
159
  @results = {}
100
160
  @ordered = @config.paths.dup
101
161
 
102
162
  unless @cache.read_tags.empty?
103
- SiteDiff.log("Using sites from cache: " +
104
- @cache.read_tags.sort.join(', '))
163
+ SiteDiff.log('Using sites from cache: ' + @cache.read_tags.sort.join(', '))
105
164
  end
106
165
 
107
- fetcher = Fetch.new(@cache, @config.paths,
108
- :before => before, :after => after)
109
- fetcher.run(&self.method(:process_results))
166
+ # TODO: Fix this after config merge refactor!
167
+ # Not quite right. We are not passing @config.before or @config.after
168
+ # so passing this instead but @config.after['curl_opts'] is ignored.
169
+ curl_opts = @config.setting :curl_opts
170
+ config_curl_opts = @config.before['curl_opts']
171
+ curl_opts = config_curl_opts.clone.merge(curl_opts) if config_curl_opts
172
+ fetcher = Fetch.new(
173
+ @cache,
174
+ @config.paths,
175
+ @config.setting(:interval),
176
+ @config.setting(:concurrency),
177
+ curl_opts,
178
+ @debug,
179
+ before: @config.before_url,
180
+ after: @config.after_url
181
+ )
182
+
183
+ # Run the Fetcher with "process results" as a callback.
184
+ fetcher.run(&method(:process_results))
110
185
 
111
186
  # Order by original path order
112
- @results = @config.paths.map { |p| @results[p] }
113
- return results.map{ |r| r unless r.success? }.compact.length
187
+ @results = @config.paths.map { |path| @results[path] }
188
+ results.map { |r| r unless r.success? }.compact.length
114
189
  end
115
190
 
116
- # Dump results to disk
117
- def dump(dir, report_before, report_after)
118
- report_before ||= before
119
- report_after ||= after
120
- dir = Pathname.new(dir)
121
- dir.mkpath unless dir.directory?
122
-
123
- # store diffs of each failing case, first wipe out existing diffs
124
- diff_dir = dir + DIFFS_DIR
125
- diff_dir.rmtree if diff_dir.exist?
126
- results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
127
- SiteDiff::log "All diff files were dumped inside #{dir.expand_path}"
128
-
129
- # store failing paths
130
- failures = dir + FAILURES_FILE
131
- SiteDiff::log "Writing failures to #{failures.expand_path}"
132
- failures.open('w') do |f|
133
- results.each { |r| f.puts r.path unless r.success? }
191
+ ##
192
+ # Get a reporter object to help with report generation.
193
+ def report
194
+ if @results.nil?
195
+ raise SiteDiffException(
196
+ 'No results detected. Run SiteDiff.run before SiteDiff.report.'
197
+ )
134
198
  end
135
199
 
136
- # create report of results
137
- report = Diff::generate_html_report(results, report_before, report_after,
138
- @cache)
139
- dir.+(REPORT_FILE).open('w') { |f| f.write(report) }
200
+ Report.new(@config, @cache, @results)
201
+ end
140
202
 
141
- # serve some settings
142
- settings = { 'before' => report_before, 'after' => report_after,
143
- 'cached' => @cache.read_tags.map { |t| t.to_s } }
144
- dir.+(SETTINGS_FILE).open('w') { |f| YAML.dump(settings, f) }
203
+ ##
204
+ # Get SiteDiff gemspec.
205
+ def self.gemspec
206
+ file = ROOT_DIR + '/sitediff.gemspec'
207
+ Gem::Specification.load(file)
208
+ end
209
+
210
+ ##
211
+ # Ensures that a directory exists and returns a Pathname for it.
212
+ #
213
+ # @param [String] dir
214
+ # path/to/directory
215
+ def self.ensure_dir(dir)
216
+ dir = Pathname.new(dir) unless dir.is_a? Pathname
217
+ dir.mkpath unless dir.directory?
218
+ dir
145
219
  end
146
220
  end
@@ -0,0 +1,265 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sitediff'
4
+ require 'sitediff/cache'
5
+ require 'sitediff/config'
6
+ require 'sitediff/config/creator'
7
+ require 'sitediff/config/preset'
8
+ require 'sitediff/fetch'
9
+ require 'sitediff/webserver/resultserver'
10
+
11
+ class SiteDiff
12
+ ##
13
+ # Sitediff API interface.
14
+ class Api
15
+ ##
16
+ # Initializes new Api object.
17
+ def initialize(directory, config_file = nil)
18
+ @dir = get_dir(directory)
19
+ @config = SiteDiff::Config.new(config_file, @dir)
20
+ end
21
+
22
+ ##
23
+ # Intialize a SiteDiff project.
24
+ #
25
+ # Calling:
26
+ # SiteDiff::Api.init(
27
+ # depth: 3,
28
+ # directory: 'sitediff',
29
+ # concurrency: 3,
30
+ # interval: 0,
31
+ # include: nil,
32
+ # exclude: '*.pdf',
33
+ # preset: 'drupal',
34
+ # curl_opts: {timeout: 60},
35
+ # crawl: false
36
+ # )
37
+ def self.init(options)
38
+ # Prepare a config object and write it to the file system.
39
+ creator = SiteDiff::Config::Creator.new(options[:debug], options[:before_url], options[:after_url])
40
+ include_regex = Config.create_regexp(options[:include])
41
+ exclude_regex = Config.create_regexp(options[:exclude])
42
+ creator.create(
43
+ depth: options[:depth],
44
+ directory: options[:directory],
45
+ concurrency: options[:concurrency],
46
+ interval: options[:interval],
47
+ include: include_regex,
48
+ exclude: exclude_regex,
49
+ preset: options[:preset],
50
+ curl_opts: options[:curl_opts]
51
+ )
52
+ SiteDiff.log "Created #{creator.config_file.expand_path}", :success
53
+
54
+ # TODO: implement crawl ^^^
55
+ # Discover paths, if enabled.
56
+ # if options[:crawl]
57
+ # crawl(creator.config_file)
58
+ # SiteDiff.log 'You can now run "sitediff diff".', :success
59
+ # else
60
+ # SiteDiff.log 'Run "sitediff crawl" to discover paths. You should then be able to run "sitediff diff".', :info
61
+ # end
62
+ end
63
+
64
+ ##
65
+ # Diff the `before` and `after`.
66
+ #
67
+ # Calling:
68
+ # Api.diff(
69
+ # paths: options['paths'],
70
+ # paths_file: options['paths-file'],
71
+ # ignore_whitespace: options['ignore-whitespace'],
72
+ # export: options['export'],
73
+ # before: options['before'],
74
+ # after: options['after'],
75
+ # cached: options['cached'],
76
+ # verbose: options['verbose'],
77
+ # report_format: options['report-format'],
78
+ # before_report: options['before-report'],
79
+ # after_report: options['after-report'],
80
+ # cli_mode: false
81
+ # )
82
+ def diff(options)
83
+ @config.ignore_whitespace = options[:ignore_whitespace]
84
+ @config.export = options[:export]
85
+ # Apply "paths" override, if any.
86
+ if options[:paths]
87
+ @config.paths = options[:paths]
88
+ else
89
+ paths_file = options[:paths_file]
90
+ paths_file ||= File.join(@dir, Config::DEFAULT_PATHS_FILENAME)
91
+ paths_file = File.expand_path(paths_file)
92
+
93
+ paths_count = @config.paths_file_read(paths_file)
94
+ SiteDiff.log "Read #{paths_count} paths from: #{paths_file}"
95
+ end
96
+
97
+ # TODO: Why do we allow before and after override during diff?
98
+ @config.before['url'] = options[:before] if options[:before]
99
+ @config.after['url'] = options[:after] if options[:after]
100
+
101
+ # Prepare cache.
102
+ cache = SiteDiff::Cache.new(
103
+ create: options[:cached] != 'none',
104
+ directory: @dir
105
+ )
106
+ cache.read_tags << :before if %w[before all].include?(options[:cached])
107
+ cache.read_tags << :after if %w[after all].include?(options[:cached])
108
+ cache.write_tags << :before << :after
109
+
110
+ # Run sitediff.
111
+ sitediff = SiteDiff.new(
112
+ @config,
113
+ cache,
114
+ options[:verbose],
115
+ options[:debug]
116
+ )
117
+ num_failing = sitediff.run
118
+ exit_code = num_failing.positive? ? 2 : 0
119
+
120
+ # Generate HTML report.
121
+ if options[:report_format] == 'html' || @config.export
122
+ sitediff.report.generate_html(
123
+ @dir,
124
+ options[:before_report],
125
+ options[:after_report]
126
+ )
127
+ end
128
+
129
+ # Generate JSON report.
130
+ if options[:report_format] == 'json' && @config.export == false
131
+ sitediff.report.generate_json @dir
132
+ end
133
+
134
+ SiteDiff.log 'Run "sitediff serve" to see a report.' unless options[:export]
135
+ rescue Config::InvalidConfig => e
136
+ SiteDiff.log "Invalid configuration: #{e.message}", :error
137
+ SiteDiff.log e.backtrace, :error if options[:verbose]
138
+ rescue Config::ConfigNotFound => e
139
+ SiteDiff.log "Invalid configuration: #{e.message}", :error
140
+ SiteDiff.log e.backtrace, :error if options[:verbose]
141
+ else # no exception was raised
142
+ # Thor::Error --> exit(1), guaranteed by exit_on_failure?
143
+ # Failing diff --> exit(2), populated above
144
+ exit(exit_code) if options[:cli_mode]
145
+ end
146
+
147
+ ##
148
+ # Crawl the `before` site to determine `paths`.
149
+ def crawl
150
+ # Prepare cache.
151
+ @cache = SiteDiff::Cache.new(
152
+ create: true,
153
+ directory: @dir
154
+ )
155
+ @cache.write_tags << :before << :after
156
+
157
+ # Crawl with Hydra to discover paths.
158
+ hydra = Typhoeus::Hydra.new(
159
+ max_concurrency: @config.setting(:concurrency)
160
+ )
161
+ @paths = {}
162
+ @config.roots.each do |tag, url|
163
+ Crawler.new(
164
+ hydra,
165
+ url,
166
+ @config.setting(:interval),
167
+ @config.setting(:include),
168
+ @config.setting(:exclude),
169
+ @config.setting(:depth),
170
+ @config.curl_opts,
171
+ @debug
172
+ ) do |info|
173
+ SiteDiff.log "Visited #{info.uri}, cached."
174
+ after_crawl(tag, info)
175
+ end
176
+ end
177
+ hydra.run
178
+
179
+ # Write paths to a file.
180
+ @paths = @paths.values.reduce(&:|).to_a.sort
181
+ @config.paths_file_write(@paths)
182
+
183
+ # Log output.
184
+ file = Pathname.new(@dir) + Config::DEFAULT_PATHS_FILENAME
185
+ SiteDiff.log ''
186
+ SiteDiff.log "#{@paths.length} page(s) found."
187
+ SiteDiff.log "Created #{file.expand_path}.", :success, 'done'
188
+ end
189
+
190
+ ##
191
+ # Serves SiteDiff report for accessing in the browser.
192
+ #
193
+ # Calling:
194
+ # api.serve(browse: true, port: 13080)
195
+ def serve(options)
196
+ @cache = Cache.new(directory: @dir)
197
+ @cache.read_tags << :before << :after
198
+
199
+ SiteDiff::Webserver::ResultServer.new(
200
+ options[:port],
201
+ @dir,
202
+ browse: options[:browse],
203
+ cache: @cache,
204
+ config: @config
205
+ ).wait
206
+ rescue SiteDiffException => e
207
+ SiteDiff.log e.message, :error
208
+ SiteDiff.log e.backtrace, :error if options[:verbose]
209
+ end
210
+
211
+ ##
212
+ #
213
+ def store(options)
214
+ # TODO: Figure out how to remove this config.validate call.
215
+ @config.validate(need_before: false)
216
+ @config.paths_file_read
217
+
218
+ @cache = SiteDiff::Cache.new(directory: @dir, create: true)
219
+ @cache.write_tags << :before
220
+
221
+ base = options[:url] || @config.after['url']
222
+ fetcher = SiteDiff::Fetch.new(@cache,
223
+ @config.paths,
224
+ @config.setting(:interval),
225
+ @config.setting(:concurrency),
226
+ get_curl_opts(@config.settings),
227
+ options[:debug],
228
+ before: base)
229
+ fetcher.run do |path, _res|
230
+ SiteDiff.log "Visited #{path}, cached"
231
+ end
232
+ end
233
+
234
+ private
235
+
236
+ ##
237
+ # Ensures that the given directory exists.
238
+ def get_dir(directory)
239
+ # Create the dir. Must go before cache initialization!
240
+ @dir = Pathname.new(directory || '.')
241
+ @dir.mkpath unless @dir.directory?
242
+ @dir.to_s
243
+ end
244
+
245
+ ##
246
+ # Processes a crawled path.
247
+ def after_crawl(tag, info)
248
+ path = UriWrapper.canonicalize(info.relative)
249
+
250
+ # Register the path.
251
+ @paths[tag] = [] unless @paths[tag]
252
+ @paths[tag] << path
253
+
254
+ result = info.read_result
255
+
256
+ # Write result to applicable cache.
257
+ @cache.set(tag, path, result)
258
+ # If single-site, cache "after" as "before".
259
+ @cache.set(:before, path, result) unless @config.roots[:before]
260
+
261
+ # TODO: Restore application of rules.
262
+ # @rules.handle_page(tag, res.content, info.document) if @rules && !res.error
263
+ end
264
+ end
265
+ end