sitediff 0.0.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 1dc3a624b91cd4b7ef1c926116630cd795532024
4
- data.tar.gz: e49f227ae303f574b704ffe3a226f79a120ae30f
2
+ SHA256:
3
+ metadata.gz: 1b7854497b5e81f48d810acec8106cbc66e33492d046e032e5516f76db26f142
4
+ data.tar.gz: a9349a79953237dd017600d49d38b8e734afc561f0ce09a1f8732e0e933530c9
5
5
  SHA512:
6
- metadata.gz: 90ca5508b834d32ac7c96aa6a94a6aa8488921e978e76890e142b1249da20bc620ddcfa237f3defc1e6928d83dd0a22583c9dded150855c320f94140e1bffdf1
7
- data.tar.gz: 24bf7969b6f17c269bb407d1ff1684f6556318d0cfa7c6c92a8327ddd0d86ee4f153778affa4d1ef115e47a3b69b31b4dac5f01ed8b7f464a05fd98f9f98212b
6
+ metadata.gz: 0e91f665f3c59b8a65f16c6942ec49b9cc9ee7fd12b0a777eadb844a0b9819ab1fd9485495bf2c757ca7342a6198dcccb5ae546c4ddf2682f234d015b64309b2
7
+ data.tar.gz: 64b7980bbbade8710b6069af19a67083678c2bd5fa99674df3360c1c6a3ddf8a15de7c5be4e8349ec298fc1c0dc27535b816089cd4f8852b8c8633861d72a178
@@ -1,10 +1,16 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  # when run as gem, $0 is /usr/local/bin/sitediff not this file
4
- if $0 == __FILE__
5
- $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
5
+ if $PROGRAM_NAME == __FILE__
6
+ $LOAD_PATH.unshift File.expand_path('../lib', __dir__)
6
7
  end
7
8
 
8
- require 'sitediff'
9
+ require 'sitediff/cli'
9
10
 
10
- SiteDiff::Cli.start
11
+ begin
12
+ SiteDiff::Cli.start
13
+ rescue Interrupt
14
+ puts("\n")
15
+ SiteDiff.log('Stopping. Interrupted by user.')
16
+ end
@@ -1,130 +1,218 @@
1
1
  #!/bin/env ruby
2
- require 'sitediff/cli.rb'
3
- require 'sitediff/config.rb'
4
- require 'sitediff/result.rb'
5
- require 'sitediff/uriwrapper'
6
- require 'sitediff/util/cache'
7
- require 'typhoeus'
2
+ # frozen_string_literal: true
3
+
4
+ require 'sitediff/config'
5
+ require 'sitediff/diff'
6
+ require 'sitediff/fetch'
7
+ require 'sitediff/result'
8
+ require 'sitediff/report'
9
+ require 'pathname'
8
10
  require 'rainbow'
11
+ require 'rubygems'
12
+ require 'yaml'
9
13
 
14
+ # SiteDiff Object.
10
15
  class SiteDiff
11
- # path to misc. static files (e.g. erb, css files)
16
+ attr_reader :config, :results
17
+
18
+ # SiteDiff installation directory.
19
+ ROOT_DIR = File.dirname(File.dirname(__FILE__))
20
+
21
+ # Path to misc files. Ex: *.erb, *.css.
12
22
  FILES_DIR = File.join(File.dirname(__FILE__), 'sitediff', 'files')
13
23
 
14
- # subdirectory containing all failing diffs
15
- DIFFS_DIR = 'diffs'
16
-
17
- # label will be colorized and str will not be.
18
- # type dictates the color: can be :success, :error, or :failure
19
- def self.log(str, type=nil, label=nil)
20
- label = label ? "[sitediff] #{label}" : '[sitediff]'
21
- bg = fg = nil
22
- case type
23
- when :success
24
- bg = :green
25
- fg = :black
26
- when :failure
27
- bg = :red
28
- when :error
29
- bg = :yellow
24
+ # Logs a message.
25
+ #
26
+ # Label will be colorized and message will not.
27
+ # Type dictates the color: can be :success, :error, or :failure.
28
+ #
29
+ # TODO: Only print :debug messages in debug mode.
30
+ def self.log(message, type = :info, label = nil)
31
+ # Prepare label.
32
+ label ||= type unless type == :info
33
+ label = label.to_s
34
+ unless label.empty?
35
+ # Colorize label.
30
36
  fg = :black
37
+ bg = :blue
38
+
39
+ case type
40
+ when :info
41
+ bg = :cyan
42
+ when :success
43
+ bg = :green
44
+ when :error
45
+ bg = :red
46
+ when :warning
47
+ bg = :yellow
48
+ end
49
+
50
+ label = '[' + label.to_s + ']'
51
+ label = Rainbow(label)
52
+ label = label.bg(bg) if bg
53
+ label = label.fg(fg) if fg
54
+
55
+ # Add a space after the label.
56
+ label += ' '
31
57
  end
32
- label = Rainbow(label)
33
- label = label.bg(bg) if bg
34
- label = label.fg(fg) if fg
35
- puts label + ' ' + str
58
+
59
+ puts label + message
36
60
  end
37
61
 
38
- attr_reader :config, :results
62
+ ##
63
+ # Returns the "before" site's URL.
64
+ #
65
+ # TODO: Remove in favor of config.before_url.
39
66
  def before
40
67
  @config.before['url']
41
68
  end
69
+
70
+ ##
71
+ # Returns the "after" site's URL.
72
+ #
73
+ # TODO: Remove in favor of config.after_url.
42
74
  def after
43
75
  @config.after['url']
44
76
  end
45
77
 
46
- def cache=(file)
47
- # FIXME: Non-global cache would be nice
48
- return unless file
49
- if Gem::Version.new(Typhoeus::VERSION) >= Gem::Version.new('0.6.4')
50
- Typhoeus::Config.cache = SiteDiff::Util::Cache.new(file)
51
- else
52
- # Bug, see: https://github.com/typhoeus/typhoeus/pull/296
53
- SiteDiff::log("Cache unsupported on Typhoeus version < 0.6.4", :failure)
78
+ # Initialize SiteDiff.
79
+ def initialize(config, cache, verbose = true, debug = false)
80
+ @cache = cache
81
+ @verbose = verbose
82
+ @debug = debug
83
+
84
+ # Check for single-site mode
85
+ validate_opts = {}
86
+ if !config.before['url'] && @cache.tag?(:before)
87
+ unless @cache.read_tags.include?(:before)
88
+ raise SiteDiffException,
89
+ "A cached 'before' is required for single-site mode"
90
+ end
91
+ validate_opts[:need_before] = false
54
92
  end
55
- end
56
-
57
- def initialize(config, cache)
58
- config.validate
93
+ config.validate(validate_opts)
94
+ # Configure diff.
95
+ Diff.diff_config(config)
59
96
  @config = config
60
- self.cache = cache
61
97
  end
62
98
 
63
- # Sanitize an HTML string based on configuration for either before or after
64
- def sanitize(html, pos)
65
- Sanitize::sanitize(html, @config.send(pos))
99
+ # Sanitize HTML.
100
+ def sanitize(path, read_results)
101
+ %i[before after].map do |tag|
102
+ html = read_results[tag].content
103
+ # TODO: See why encoding is empty while running tests.
104
+ #
105
+ # The presence of an "encoding" value used to be used to determine
106
+ # if the sanitizer would be called. However, encoding turns up blank
107
+ # during rspec tests for some reason.
108
+ encoding = read_results[tag].encoding
109
+ if encoding || html.length.positive?
110
+ section = @config.send(tag, true)
111
+ Sanitizer.new(html, section, path: path).sanitize
112
+ else
113
+ html
114
+ end
115
+ end
66
116
  end
67
117
 
68
- # Queues fetching before and after URLs with a Typhoeus::Hydra instance
118
+ ##
119
+ # Process a set of read results.
69
120
  #
70
- # Upon completion of both before and after, prints and saves the diff to
71
- # @results.
72
- def queue_read(hydra, path)
73
- # ( :before | after ) => ReadResult object
74
- reads = {}
75
- [:before, :after].each do |pos|
76
- uri = UriWrapper.new(send(pos) + path)
77
-
78
- uri.queue(hydra) do |res|
79
- reads[pos] = res
80
- next unless reads.size == 2
81
-
82
- # we have read both before and after; calculate diff
83
- if error = reads[:before].error || reads[:after].error
84
- diff = Result.new(path, nil, nil, error)
85
- else
86
- diff = Result.new(path, sanitize(reads[:before].content, :before),
87
- sanitize(reads[:after].content,:after), nil)
88
- end
89
- diff.log
90
- @results[path] = diff
121
+ # This is the callback that processes items fetched by the Fetcher.
122
+ def process_results(path, read_results)
123
+ error = (read_results[:before].error || read_results[:after].error)
124
+ if error
125
+ diff = Result.new(path, nil, nil, nil, nil, error)
126
+ else
127
+ begin
128
+ diff = Result.new(
129
+ path,
130
+ *sanitize(path, read_results),
131
+ read_results[:before].encoding,
132
+ read_results[:after].encoding,
133
+ nil
134
+ )
135
+ rescue StandardError => e
136
+ raise if @debug
137
+
138
+ Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
91
139
  end
92
140
  end
141
+ @results[path] = diff
142
+
143
+ # Print results in order!
144
+ while (next_diff = @results[@ordered.first])
145
+ next_diff.log(@verbose)
146
+ @ordered.shift
147
+ end
93
148
  end
94
149
 
95
- # Perform the comparison
150
+ ##
151
+ # Compute diff as per config.
152
+ #
153
+ # @return [Integer]
154
+ # Number of paths which have diffs.
96
155
  def run
97
- # Map of path -> Result object, queue_read sets callbacks to populate this
156
+ # Map of path -> Result object, populated by process_results
98
157
  @results = {}
158
+ @ordered = @config.paths.dup
99
159
 
100
- hydra = Typhoeus::Hydra.new(max_concurrency: 3)
101
- @config.paths.each { |path| queue_read(hydra, path) }
102
- hydra.run
160
+ unless @cache.read_tags.empty?
161
+ SiteDiff.log('Using sites from cache: ' + @cache.read_tags.sort.join(', '))
162
+ end
163
+
164
+ # TODO: Fix this after config merge refactor!
165
+ # Not quite right. We are not passing @config.before or @config.after
166
+ # so passing this instead but @config.after['curl_opts'] is ignored.
167
+ curl_opts = @config.setting :curl_opts
168
+ config_curl_opts = @config.before['curl_opts']
169
+ curl_opts = config_curl_opts.clone.merge(curl_opts) if config_curl_opts
170
+ fetcher = Fetch.new(
171
+ @cache,
172
+ @config.paths,
173
+ @config.setting(:interval),
174
+ @config.setting(:concurrency),
175
+ curl_opts,
176
+ @debug,
177
+ before: @config.before_url,
178
+ after: @config.after_url
179
+ )
180
+
181
+ # Run the Fetcher with "process results" as a callback.
182
+ fetcher.run(&method(:process_results))
103
183
 
104
184
  # Order by original path order
105
- @results = @config.paths.map { |p| @results[p] }
185
+ @results = @config.paths.map { |path| @results[path] }
186
+ results.map { |r| r unless r.success? }.compact.length
106
187
  end
107
188
 
108
- # Dump results to disk
109
- def dump(dir, report_before, report_after, failing_paths)
110
- report_before ||= before
111
- report_after ||= after
112
- FileUtils.mkdir_p(dir)
113
-
114
- # store diffs of each failing case, first wipe out existing diffs
115
- diff_dir = File.join(dir, DIFFS_DIR)
116
- FileUtils.rm_rf(diff_dir)
117
- results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
118
- SiteDiff::log "All diff files were dumped inside #{dir}"
119
-
120
- # store failing paths
121
- SiteDiff::log "Writing failures to #{failing_paths}"
122
- File.open(failing_paths, 'w') do |f|
123
- results.each { |r| f.puts r.path unless r.success? }
189
+ ##
190
+ # Get a reporter object to help with report generation.
191
+ def report
192
+ if @results.nil?
193
+ raise SiteDiffException(
194
+ 'No results detected. Run SiteDiff.run before SiteDiff.report.'
195
+ )
124
196
  end
125
197
 
126
- # create report of results
127
- report = Diff::generate_html_report(results, report_before, report_after)
128
- File.open(File.join(dir, "/report.html") , 'w') { |f| f.write(report) }
198
+ Report.new(@config, @cache, @results)
199
+ end
200
+
201
+ ##
202
+ # Get SiteDiff gemspec.
203
+ def self.gemspec
204
+ file = ROOT_DIR + '/sitediff.gemspec'
205
+ Gem::Specification.load(file)
206
+ end
207
+
208
+ ##
209
+ # Ensures that a directory exists and returns a Pathname for it.
210
+ #
211
+ # @param [String] dir
212
+ # path/to/directory
213
+ def self.ensure_dir(dir)
214
+ dir = Pathname.new(dir) unless dir.is_a? Pathname
215
+ dir.mkpath unless dir.directory?
216
+ dir
129
217
  end
130
218
  end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+ require 'fileutils'
5
+
6
+ class SiteDiff
7
+ # SiteDiff Cache Handler.
8
+ class Cache
9
+ attr_accessor :read_tags, :write_tags
10
+
11
+ ##
12
+ # Creates a Cache object.
13
+ def initialize(opts = {})
14
+ @create = opts[:create]
15
+
16
+ # Read and Write tags are sets that can contain :before and :after.
17
+ # They indicate whether we should use the cache for reading or writing.
18
+ @read_tags = Set.new
19
+ @write_tags = Set.new
20
+
21
+ # The directory used by the cache for storage.
22
+ @dir = opts[:directory] || '.'
23
+ end
24
+
25
+ ##
26
+ # Is a tag cached?
27
+ # TODO: Rename it to is_cached? as it makes more sense.
28
+ def tag?(tag)
29
+ File.directory?(File.join(@dir, 'snapshot', tag.to_s))
30
+ end
31
+
32
+ ##
33
+ # Get data from cache.
34
+ def get(tag, path)
35
+ return nil unless @read_tags.include? tag
36
+
37
+ filename = File.join(
38
+ @dir,
39
+ 'snapshot',
40
+ tag.to_s,
41
+ *path.split(File::SEPARATOR)
42
+ )
43
+
44
+ filename = File.join(filename, 'index.html') if File.directory?(filename)
45
+ return nil unless File.file? filename
46
+
47
+ Marshal.load(File.read(filename))
48
+ end
49
+
50
+ ##
51
+ # Set data to cache.
52
+ def set(tag, path, result)
53
+ return unless @write_tags.include? tag
54
+
55
+ filename = File.join(
56
+ @dir,
57
+ 'snapshot',
58
+ tag.to_s,
59
+ *path.split(File::SEPARATOR)
60
+ )
61
+
62
+ filename = File.join(filename, 'index.html') if File.directory?(filename)
63
+ filepath = Pathname.new(filename)
64
+ unless filepath.dirname.directory?
65
+ begin
66
+ filepath.dirname.mkpath
67
+ rescue Errno::EEXIST
68
+ curdir = filepath
69
+ curdir = curdir.parent until curdir.exist?
70
+ tempname = curdir.dirname + (curdir.basename.to_s + '.temporary')
71
+ # May cause problems if action is not atomic!
72
+ # Move existing file to dir/index.html first
73
+ # Not robust! Should generate an UUID or something.
74
+ if File.exist?(tempname)
75
+ SiteDiff.log "Overwriting file #{tempname}", :warning
76
+ end
77
+ curdir.rename(tempname)
78
+ filepath.dirname.mkpath
79
+ # Should only happen in strange situations such as when the path
80
+ # is foo/index.html/bar (i.e., index.html is a directory)
81
+ if (curdir + 'index.html').exist?
82
+ SiteDiff.log "Overwriting file #{tempname}", :warning
83
+ end
84
+ tempname.rename(curdir + 'index.html')
85
+ end
86
+ end
87
+ File.open(filename, 'w') { |file| file.write(Marshal.dump(result)) }
88
+ end
89
+
90
+ ##
91
+ # TODO: Document this or remove it if unused.
92
+ def key(tag, path)
93
+ # Ensure encoding stays the same!
94
+ Marshal.dump([tag, path.encode('UTF-8')])
95
+ end
96
+
97
+ ##
98
+ # Ensures that a directory exists.
99
+ def get_dir(directory)
100
+ # Create the dir. Must go before cache initialization!
101
+ @dir = Pathname.new(directory || '.')
102
+ @dir.mkpath unless @dir.directory?
103
+ @dir.to_s
104
+ end
105
+ end
106
+ end