sitediff 0.0.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 1dc3a624b91cd4b7ef1c926116630cd795532024
4
- data.tar.gz: e49f227ae303f574b704ffe3a226f79a120ae30f
2
+ SHA256:
3
+ metadata.gz: 1b7854497b5e81f48d810acec8106cbc66e33492d046e032e5516f76db26f142
4
+ data.tar.gz: a9349a79953237dd017600d49d38b8e734afc561f0ce09a1f8732e0e933530c9
5
5
  SHA512:
6
- metadata.gz: 90ca5508b834d32ac7c96aa6a94a6aa8488921e978e76890e142b1249da20bc620ddcfa237f3defc1e6928d83dd0a22583c9dded150855c320f94140e1bffdf1
7
- data.tar.gz: 24bf7969b6f17c269bb407d1ff1684f6556318d0cfa7c6c92a8327ddd0d86ee4f153778affa4d1ef115e47a3b69b31b4dac5f01ed8b7f464a05fd98f9f98212b
6
+ metadata.gz: 0e91f665f3c59b8a65f16c6942ec49b9cc9ee7fd12b0a777eadb844a0b9819ab1fd9485495bf2c757ca7342a6198dcccb5ae546c4ddf2682f234d015b64309b2
7
+ data.tar.gz: 64b7980bbbade8710b6069af19a67083678c2bd5fa99674df3360c1c6a3ddf8a15de7c5be4e8349ec298fc1c0dc27535b816089cd4f8852b8c8633861d72a178
@@ -1,10 +1,16 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  # when run as gem, $0 is /usr/local/bin/sitediff not this file
4
- if $0 == __FILE__
5
- $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
5
+ if $PROGRAM_NAME == __FILE__
6
+ $LOAD_PATH.unshift File.expand_path('../lib', __dir__)
6
7
  end
7
8
 
8
- require 'sitediff'
9
+ require 'sitediff/cli'
9
10
 
10
- SiteDiff::Cli.start
11
+ begin
12
+ SiteDiff::Cli.start
13
+ rescue Interrupt
14
+ puts("\n")
15
+ SiteDiff.log('Stopping. Interrupted by user.')
16
+ end
@@ -1,130 +1,218 @@
1
1
  #!/bin/env ruby
2
- require 'sitediff/cli.rb'
3
- require 'sitediff/config.rb'
4
- require 'sitediff/result.rb'
5
- require 'sitediff/uriwrapper'
6
- require 'sitediff/util/cache'
7
- require 'typhoeus'
2
+ # frozen_string_literal: true
3
+
4
+ require 'sitediff/config'
5
+ require 'sitediff/diff'
6
+ require 'sitediff/fetch'
7
+ require 'sitediff/result'
8
+ require 'sitediff/report'
9
+ require 'pathname'
8
10
  require 'rainbow'
11
+ require 'rubygems'
12
+ require 'yaml'
9
13
 
14
+ # SiteDiff Object.
10
15
  class SiteDiff
11
- # path to misc. static files (e.g. erb, css files)
16
+ attr_reader :config, :results
17
+
18
+ # SiteDiff installation directory.
19
+ ROOT_DIR = File.dirname(File.dirname(__FILE__))
20
+
21
+ # Path to misc files. Ex: *.erb, *.css.
12
22
  FILES_DIR = File.join(File.dirname(__FILE__), 'sitediff', 'files')
13
23
 
14
- # subdirectory containing all failing diffs
15
- DIFFS_DIR = 'diffs'
16
-
17
- # label will be colorized and str will not be.
18
- # type dictates the color: can be :success, :error, or :failure
19
- def self.log(str, type=nil, label=nil)
20
- label = label ? "[sitediff] #{label}" : '[sitediff]'
21
- bg = fg = nil
22
- case type
23
- when :success
24
- bg = :green
25
- fg = :black
26
- when :failure
27
- bg = :red
28
- when :error
29
- bg = :yellow
24
+ # Logs a message.
25
+ #
26
+ # Label will be colorized and message will not.
27
+ # Type dictates the color: can be :success, :error, or :failure.
28
+ #
29
+ # TODO: Only print :debug messages in debug mode.
30
+ def self.log(message, type = :info, label = nil)
31
+ # Prepare label.
32
+ label ||= type unless type == :info
33
+ label = label.to_s
34
+ unless label.empty?
35
+ # Colorize label.
30
36
  fg = :black
37
+ bg = :blue
38
+
39
+ case type
40
+ when :info
41
+ bg = :cyan
42
+ when :success
43
+ bg = :green
44
+ when :error
45
+ bg = :red
46
+ when :warning
47
+ bg = :yellow
48
+ end
49
+
50
+ label = '[' + label.to_s + ']'
51
+ label = Rainbow(label)
52
+ label = label.bg(bg) if bg
53
+ label = label.fg(fg) if fg
54
+
55
+ # Add a space after the label.
56
+ label += ' '
31
57
  end
32
- label = Rainbow(label)
33
- label = label.bg(bg) if bg
34
- label = label.fg(fg) if fg
35
- puts label + ' ' + str
58
+
59
+ puts label + message
36
60
  end
37
61
 
38
- attr_reader :config, :results
62
+ ##
63
+ # Returns the "before" site's URL.
64
+ #
65
+ # TODO: Remove in favor of config.before_url.
39
66
  def before
40
67
  @config.before['url']
41
68
  end
69
+
70
+ ##
71
+ # Returns the "after" site's URL.
72
+ #
73
+ # TODO: Remove in favor of config.after_url.
42
74
  def after
43
75
  @config.after['url']
44
76
  end
45
77
 
46
- def cache=(file)
47
- # FIXME: Non-global cache would be nice
48
- return unless file
49
- if Gem::Version.new(Typhoeus::VERSION) >= Gem::Version.new('0.6.4')
50
- Typhoeus::Config.cache = SiteDiff::Util::Cache.new(file)
51
- else
52
- # Bug, see: https://github.com/typhoeus/typhoeus/pull/296
53
- SiteDiff::log("Cache unsupported on Typhoeus version < 0.6.4", :failure)
78
+ # Initialize SiteDiff.
79
+ def initialize(config, cache, verbose = true, debug = false)
80
+ @cache = cache
81
+ @verbose = verbose
82
+ @debug = debug
83
+
84
+ # Check for single-site mode
85
+ validate_opts = {}
86
+ if !config.before['url'] && @cache.tag?(:before)
87
+ unless @cache.read_tags.include?(:before)
88
+ raise SiteDiffException,
89
+ "A cached 'before' is required for single-site mode"
90
+ end
91
+ validate_opts[:need_before] = false
54
92
  end
55
- end
56
-
57
- def initialize(config, cache)
58
- config.validate
93
+ config.validate(validate_opts)
94
+ # Configure diff.
95
+ Diff.diff_config(config)
59
96
  @config = config
60
- self.cache = cache
61
97
  end
62
98
 
63
- # Sanitize an HTML string based on configuration for either before or after
64
- def sanitize(html, pos)
65
- Sanitize::sanitize(html, @config.send(pos))
99
+ # Sanitize HTML.
100
+ def sanitize(path, read_results)
101
+ %i[before after].map do |tag|
102
+ html = read_results[tag].content
103
+ # TODO: See why encoding is empty while running tests.
104
+ #
105
+ # The presence of an "encoding" value used to be used to determine
106
+ # if the sanitizer would be called. However, encoding turns up blank
107
+ # during rspec tests for some reason.
108
+ encoding = read_results[tag].encoding
109
+ if encoding || html.length.positive?
110
+ section = @config.send(tag, true)
111
+ Sanitizer.new(html, section, path: path).sanitize
112
+ else
113
+ html
114
+ end
115
+ end
66
116
  end
67
117
 
68
- # Queues fetching before and after URLs with a Typhoeus::Hydra instance
118
+ ##
119
+ # Process a set of read results.
69
120
  #
70
- # Upon completion of both before and after, prints and saves the diff to
71
- # @results.
72
- def queue_read(hydra, path)
73
- # ( :before | after ) => ReadResult object
74
- reads = {}
75
- [:before, :after].each do |pos|
76
- uri = UriWrapper.new(send(pos) + path)
77
-
78
- uri.queue(hydra) do |res|
79
- reads[pos] = res
80
- next unless reads.size == 2
81
-
82
- # we have read both before and after; calculate diff
83
- if error = reads[:before].error || reads[:after].error
84
- diff = Result.new(path, nil, nil, error)
85
- else
86
- diff = Result.new(path, sanitize(reads[:before].content, :before),
87
- sanitize(reads[:after].content,:after), nil)
88
- end
89
- diff.log
90
- @results[path] = diff
121
+ # This is the callback that processes items fetched by the Fetcher.
122
+ def process_results(path, read_results)
123
+ error = (read_results[:before].error || read_results[:after].error)
124
+ if error
125
+ diff = Result.new(path, nil, nil, nil, nil, error)
126
+ else
127
+ begin
128
+ diff = Result.new(
129
+ path,
130
+ *sanitize(path, read_results),
131
+ read_results[:before].encoding,
132
+ read_results[:after].encoding,
133
+ nil
134
+ )
135
+ rescue StandardError => e
136
+ raise if @debug
137
+
138
+ Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
91
139
  end
92
140
  end
141
+ @results[path] = diff
142
+
143
+ # Print results in order!
144
+ while (next_diff = @results[@ordered.first])
145
+ next_diff.log(@verbose)
146
+ @ordered.shift
147
+ end
93
148
  end
94
149
 
95
- # Perform the comparison
150
+ ##
151
+ # Compute diff as per config.
152
+ #
153
+ # @return [Integer]
154
+ # Number of paths which have diffs.
96
155
  def run
97
- # Map of path -> Result object, queue_read sets callbacks to populate this
156
+ # Map of path -> Result object, populated by process_results
98
157
  @results = {}
158
+ @ordered = @config.paths.dup
99
159
 
100
- hydra = Typhoeus::Hydra.new(max_concurrency: 3)
101
- @config.paths.each { |path| queue_read(hydra, path) }
102
- hydra.run
160
+ unless @cache.read_tags.empty?
161
+ SiteDiff.log('Using sites from cache: ' + @cache.read_tags.sort.join(', '))
162
+ end
163
+
164
+ # TODO: Fix this after config merge refactor!
165
+ # Not quite right. We are not passing @config.before or @config.after
166
+ # so passing this instead but @config.after['curl_opts'] is ignored.
167
+ curl_opts = @config.setting :curl_opts
168
+ config_curl_opts = @config.before['curl_opts']
169
+ curl_opts = config_curl_opts.clone.merge(curl_opts) if config_curl_opts
170
+ fetcher = Fetch.new(
171
+ @cache,
172
+ @config.paths,
173
+ @config.setting(:interval),
174
+ @config.setting(:concurrency),
175
+ curl_opts,
176
+ @debug,
177
+ before: @config.before_url,
178
+ after: @config.after_url
179
+ )
180
+
181
+ # Run the Fetcher with "process results" as a callback.
182
+ fetcher.run(&method(:process_results))
103
183
 
104
184
  # Order by original path order
105
- @results = @config.paths.map { |p| @results[p] }
185
+ @results = @config.paths.map { |path| @results[path] }
186
+ results.map { |r| r unless r.success? }.compact.length
106
187
  end
107
188
 
108
- # Dump results to disk
109
- def dump(dir, report_before, report_after, failing_paths)
110
- report_before ||= before
111
- report_after ||= after
112
- FileUtils.mkdir_p(dir)
113
-
114
- # store diffs of each failing case, first wipe out existing diffs
115
- diff_dir = File.join(dir, DIFFS_DIR)
116
- FileUtils.rm_rf(diff_dir)
117
- results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
118
- SiteDiff::log "All diff files were dumped inside #{dir}"
119
-
120
- # store failing paths
121
- SiteDiff::log "Writing failures to #{failing_paths}"
122
- File.open(failing_paths, 'w') do |f|
123
- results.each { |r| f.puts r.path unless r.success? }
189
+ ##
190
+ # Get a reporter object to help with report generation.
191
+ def report
192
+ if @results.nil?
193
+ raise SiteDiffException(
194
+ 'No results detected. Run SiteDiff.run before SiteDiff.report.'
195
+ )
124
196
  end
125
197
 
126
- # create report of results
127
- report = Diff::generate_html_report(results, report_before, report_after)
128
- File.open(File.join(dir, "/report.html") , 'w') { |f| f.write(report) }
198
+ Report.new(@config, @cache, @results)
199
+ end
200
+
201
+ ##
202
+ # Get SiteDiff gemspec.
203
+ def self.gemspec
204
+ file = ROOT_DIR + '/sitediff.gemspec'
205
+ Gem::Specification.load(file)
206
+ end
207
+
208
+ ##
209
+ # Ensures that a directory exists and returns a Pathname for it.
210
+ #
211
+ # @param [String] dir
212
+ # path/to/directory
213
+ def self.ensure_dir(dir)
214
+ dir = Pathname.new(dir) unless dir.is_a? Pathname
215
+ dir.mkpath unless dir.directory?
216
+ dir
129
217
  end
130
218
  end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+ require 'fileutils'
5
+
6
+ class SiteDiff
7
+ # SiteDiff Cache Handler.
8
+ class Cache
9
+ attr_accessor :read_tags, :write_tags
10
+
11
+ ##
12
+ # Creates a Cache object.
13
+ def initialize(opts = {})
14
+ @create = opts[:create]
15
+
16
+ # Read and Write tags are sets that can contain :before and :after.
17
+ # They indicate whether we should use the cache for reading or writing.
18
+ @read_tags = Set.new
19
+ @write_tags = Set.new
20
+
21
+ # The directory used by the cache for storage.
22
+ @dir = opts[:directory] || '.'
23
+ end
24
+
25
+ ##
26
+ # Is a tag cached?
27
+ # TODO: Rename it to is_cached? as it makes more sense.
28
+ def tag?(tag)
29
+ File.directory?(File.join(@dir, 'snapshot', tag.to_s))
30
+ end
31
+
32
+ ##
33
+ # Get data from cache.
34
+ def get(tag, path)
35
+ return nil unless @read_tags.include? tag
36
+
37
+ filename = File.join(
38
+ @dir,
39
+ 'snapshot',
40
+ tag.to_s,
41
+ *path.split(File::SEPARATOR)
42
+ )
43
+
44
+ filename = File.join(filename, 'index.html') if File.directory?(filename)
45
+ return nil unless File.file? filename
46
+
47
+ Marshal.load(File.read(filename))
48
+ end
49
+
50
+ ##
51
+ # Set data to cache.
52
+ def set(tag, path, result)
53
+ return unless @write_tags.include? tag
54
+
55
+ filename = File.join(
56
+ @dir,
57
+ 'snapshot',
58
+ tag.to_s,
59
+ *path.split(File::SEPARATOR)
60
+ )
61
+
62
+ filename = File.join(filename, 'index.html') if File.directory?(filename)
63
+ filepath = Pathname.new(filename)
64
+ unless filepath.dirname.directory?
65
+ begin
66
+ filepath.dirname.mkpath
67
+ rescue Errno::EEXIST
68
+ curdir = filepath
69
+ curdir = curdir.parent until curdir.exist?
70
+ tempname = curdir.dirname + (curdir.basename.to_s + '.temporary')
71
+ # May cause problems if action is not atomic!
72
+ # Move existing file to dir/index.html first
73
+ # Not robust! Should generate an UUID or something.
74
+ if File.exist?(tempname)
75
+ SiteDiff.log "Overwriting file #{tempname}", :warning
76
+ end
77
+ curdir.rename(tempname)
78
+ filepath.dirname.mkpath
79
+ # Should only happen in strange situations such as when the path
80
+ # is foo/index.html/bar (i.e., index.html is a directory)
81
+ if (curdir + 'index.html').exist?
82
+ SiteDiff.log "Overwriting file #{tempname}", :warning
83
+ end
84
+ tempname.rename(curdir + 'index.html')
85
+ end
86
+ end
87
+ File.open(filename, 'w') { |file| file.write(Marshal.dump(result)) }
88
+ end
89
+
90
+ ##
91
+ # TODO: Document this or remove it if unused.
92
+ def key(tag, path)
93
+ # Ensure encoding stays the same!
94
+ Marshal.dump([tag, path.encode('UTF-8')])
95
+ end
96
+
97
+ ##
98
+ # Ensures that a directory exists.
99
+ def get_dir(directory)
100
+ # Create the dir. Must go before cache initialization!
101
+ @dir = Pathname.new(directory || '.')
102
+ @dir.mkpath unless @dir.directory?
103
+ @dir.to_s
104
+ end
105
+ end
106
+ end