sitediff 0.0.6 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +5 -5
  2. data/.eslintignore +1 -0
  3. data/.eslintrc.json +28 -0
  4. data/.project +11 -0
  5. data/.rubocop.yml +179 -0
  6. data/.rubocop_todo.yml +51 -0
  7. data/CHANGELOG.md +28 -0
  8. data/Dockerfile +33 -0
  9. data/Gemfile +11 -0
  10. data/Gemfile.lock +85 -0
  11. data/INSTALLATION.md +146 -0
  12. data/LICENSE +339 -0
  13. data/README.md +810 -0
  14. data/Rakefile +12 -0
  15. data/Thorfile +135 -0
  16. data/bin/sitediff +9 -2
  17. data/config/.gitkeep +0 -0
  18. data/config/sanitize_domains.example.yaml +8 -0
  19. data/config/sitediff.example.yaml +81 -0
  20. data/docker-compose.test.yml +3 -0
  21. data/lib/sitediff/api.rb +276 -0
  22. data/lib/sitediff/cache.rb +57 -8
  23. data/lib/sitediff/cli.rb +156 -176
  24. data/lib/sitediff/config/creator.rb +61 -77
  25. data/lib/sitediff/config/preset.rb +75 -0
  26. data/lib/sitediff/config.rb +436 -31
  27. data/lib/sitediff/crawler.rb +27 -21
  28. data/lib/sitediff/diff.rb +32 -9
  29. data/lib/sitediff/fetch.rb +10 -3
  30. data/lib/sitediff/files/diff.html.erb +20 -2
  31. data/lib/sitediff/files/jquery.min.js +2 -0
  32. data/lib/sitediff/files/normalize.css +349 -0
  33. data/lib/sitediff/files/report.html.erb +171 -0
  34. data/lib/sitediff/files/sidebyside.html.erb +5 -2
  35. data/lib/sitediff/files/sitediff.css +303 -30
  36. data/lib/sitediff/files/sitediff.js +367 -0
  37. data/lib/sitediff/presets/drupal.yaml +63 -0
  38. data/lib/sitediff/report.rb +254 -0
  39. data/lib/sitediff/result.rb +50 -20
  40. data/lib/sitediff/sanitize/dom_transform.rb +47 -8
  41. data/lib/sitediff/sanitize/regexp.rb +24 -3
  42. data/lib/sitediff/sanitize.rb +81 -12
  43. data/lib/sitediff/uriwrapper.rb +65 -23
  44. data/lib/sitediff/webserver/resultserver.rb +30 -33
  45. data/lib/sitediff/webserver.rb +15 -3
  46. data/lib/sitediff.rb +130 -83
  47. data/misc/sitediff - overview report.png +0 -0
  48. data/misc/sitediff - page report.png +0 -0
  49. data/package-lock.json +878 -0
  50. data/package.json +25 -0
  51. data/sitediff.gemspec +51 -0
  52. metadata +91 -29
  53. data/lib/sitediff/files/html_report.html.erb +0 -66
  54. data/lib/sitediff/files/rules/drupal.yaml +0 -63
  55. data/lib/sitediff/rules.rb +0 -65
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require 'rubocop/rake_task'
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
data/Thorfile ADDED
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # TODO: Determine the utility of this file.
5
+
6
+ LIB_DIR = File.join(File.dirname(__FILE__), 'lib')
7
+ $LOAD_PATH << LIB_DIR
8
+ require 'sitediff/webserver'
9
+ require 'sitediff/webserver/resultserver'
10
+
11
+ # Thor Base class.
12
+ class Base < Thor
13
+ method_options local: true
14
+ # Adds the option to all Base subclasses.
15
+ # method_options() takes different arguments than option().
16
+ def initialize(*args)
17
+ super(*args)
18
+ @local = options['local']
19
+ end
20
+
21
+ # gives us run()
22
+ include Thor::Actions
23
+
24
+ # Thor, by default, exits with 0 no matter what!
25
+ def self.exit_on_failure?
26
+ true
27
+ end
28
+
29
+ protected
30
+
31
+ def executable(gem)
32
+ gem = './bin/sitediff' if (gem == 'sitediff') && @local
33
+ "#{'bundle exec' if @local} #{gem}"
34
+ end
35
+ end
36
+
37
+ # Thor for Docker.
38
+ class Docker < Base
39
+ IMAGE = 'evolvingweb/sitediff'
40
+
41
+ desc 'build', 'Build a docker image for sitediff'
42
+ # Make a build image for docker.
43
+ def build
44
+ run "docker build -t #{IMAGE} . "
45
+ end
46
+
47
+ desc 'run', 'Run a rake task (or a login shell if none given) inside docker'
48
+ # NOTE: We can't override run() (which is reserved by Thor). Luckily, Thor only
49
+ # checks for the first N necessary characters to match a command with a
50
+ # method. Cf. Thor::normalize_command_name()
51
+ def run_(task = 'bash')
52
+ docker_opts = ['-t', "-v #{File.dirname(__FILE__)}:/sitediff"]
53
+ finish_exec(task, docker_opts)
54
+ end
55
+
56
+ desc 'compose', 'Run a task inside docker without volume mounting (not supported with compose)'
57
+ # Run a task inside docker without volume mounting.
58
+ def compose(task = 'bash')
59
+ docker_opts = ['-t']
60
+ finish_exec(task, docker_opts)
61
+ end
62
+
63
+ no_commands do
64
+ # Finished exec
65
+ def finish_exec(task, docker_opts)
66
+ if task == 'bash'
67
+ cmd = 'bash'
68
+ docker_opts << '-i'
69
+ else
70
+ # pass down the local flag to docker command
71
+ cmd = "#{executable('thor')} #{task} #{@local ? '--local' : '--no-local'}"
72
+ end
73
+ puts "docker run #{docker_opts.join(' ')} #{IMAGE} #{cmd}"
74
+ run "docker run #{docker_opts.join(' ')} #{IMAGE} #{cmd}"
75
+ end
76
+ end
77
+ end
78
+
79
+ # Thor for Spec.
80
+ class Spec < Base
81
+ desc 'unit', 'run RSpec unit tests'
82
+ # Run RSpec unit tests.
83
+ def unit
84
+ puts "#{executable('rspec')} spec/unit"
85
+ run "#{executable('rspec')} spec/unit"
86
+ end
87
+
88
+ desc 'fixture', 'run RSpec integration tests'
89
+ # Run RSpec integration tests.
90
+ def fixture
91
+ puts "#{executable('rspec')} spec/unit"
92
+ run "#{executable('rspec')} spec/fixtures"
93
+ end
94
+
95
+ desc 'all', 'runs both unit and fixture tests', hide: true
96
+ # hidden task to lump together multiple tasks
97
+ def all
98
+ unit
99
+ fixture
100
+ end
101
+ default_task :all
102
+ end
103
+
104
+ # Thor for fixtures.
105
+ class Fixture < Base
106
+ desc 'local', 'Run a sitediff test case'
107
+ # Run a sitediff test case.
108
+ def local
109
+ run "#{executable('sitediff')} diff --cached=none spec/fixtures/cli/config.yaml"
110
+ end
111
+
112
+ desc 'http', 'Run a sitediff test case, using web servers'
113
+ # Run a sitediff test case, using web servers.
114
+ def http
115
+ cmd = "#{executable('sitediff')} diff --cached=none spec/fixtures/cli/config.yaml"
116
+ http_fixtures(cmd).kill
117
+ end
118
+
119
+ desc 'serve', 'Serve the result of the fixture test'
120
+ # Serve the result of the fixture test.
121
+ def serve
122
+ cmd = "#{executable('sitediff')} diff --cached=none --paths-file=spec/sites/ruby-doc.org/paths.txt spec/unit/cli/config.yaml"
123
+ http_fixtures(cmd)
124
+ SiteDiff::Webserver::ResultServer.new(nil, 'sitediff', quiet: true).wait
125
+ end
126
+
127
+ private
128
+
129
+ # HTTP Fixtures.
130
+ def http_fixtures(cmd)
131
+ serv = SiteDiff::Webserver::FixtureServer.new
132
+ run "#{cmd} --before #{serv.before} --after #{serv.after}"
133
+ serv
134
+ end
135
+ end
data/bin/sitediff CHANGED
@@ -2,8 +2,15 @@
2
2
  # frozen_string_literal: true
3
3
 
4
4
  # when run as gem, $0 is /usr/local/bin/sitediff not this file
5
- $LOAD_PATH.unshift File.expand_path('../lib', __dir__) if $PROGRAM_NAME == __FILE__
5
+ if $PROGRAM_NAME == __FILE__
6
+ $LOAD_PATH.unshift File.expand_path('../lib', __dir__)
7
+ end
6
8
 
7
9
  require 'sitediff/cli'
8
10
 
9
- SiteDiff::Cli.start
11
+ begin
12
+ SiteDiff::Cli.start
13
+ rescue Interrupt
14
+ puts("\n")
15
+ SiteDiff.log('Stopping. Interrupted by user.')
16
+ end
data/config/.gitkeep ADDED
File without changes
@@ -0,0 +1,8 @@
1
+ sanitization:
2
+ -
3
+ title:
4
+ 'strip_domains_from_absolute_urls'
5
+ pattern:
6
+ 'http:\/\/[a-zA-Z0-9.:-]+'
7
+ substitute:
8
+ '__domain__'
@@ -0,0 +1,81 @@
1
+ # Include other configuration files, merging them with this file.
2
+ includes:
3
+ - extra-rules.yaml
4
+
5
+ # Settings.
6
+ #
7
+ # If you use "sitediff init" with the right parameters, it will generate
8
+ # this section for you.
9
+ settings:
10
+ # Crawl 2 levels deep.
11
+ depth: 2
12
+ # Wait for 250ms between requests.
13
+ interval: 250
14
+ # Make only 1 request at a time - no simultaneous requests.
15
+ # Concurrency has to be one when an interval is set.
16
+ concurrency: 1
17
+ # Don't follow links to PDF files.
18
+ exclude: '.*\.pdf'
19
+ # Curl options, if any.
20
+ curl_opts:
21
+ max_recv_speed_large: 10000
22
+
23
+ # Rules under this element apply only to the 'before' site.
24
+ before:
25
+ # URL of the 'before' version of the site.
26
+ url: http://localhost/old
27
+
28
+ # Sanitizations and DOM transformations, just like the general ones
29
+ # demonstrated above, but applied only to the 'before' site.
30
+ dom_transform:
31
+ - title: Example
32
+ type: remove
33
+ selector: div.updates-required
34
+
35
+ # Rules under this element apply only to the 'after' site.
36
+ after:
37
+ # URL of the 'after' version of the site.
38
+ url: http://localhost/new
39
+
40
+ # The root element to compare.
41
+ #
42
+ # Usually, sitediff compares the HTML of the entire page. If you'd rather
43
+ # check just a subset of the page, specify a selector here. For example, the
44
+ # line below causes only the body to be compared, ignoring the HTML head.
45
+ selector: 'body'
46
+
47
+ # General regular expression rules, applied to both versions of the site.
48
+ sanitization:
49
+ # Normalize input tags containg random tokens.
50
+ - title: Remove form-build-id
51
+ pattern: '<input type="hidden" name="form_build_id" value="form-[a-zA-Z0-9_-]+" *\/?>'
52
+ substitute: '<input type="hidden" name="form_build_id" value="__form_build_id__">'
53
+
54
+ # Replace meta property="twitter:*" with meta name="twitter:*".
55
+ - title: Meta 'property' changed to 'name'
56
+ pattern: 'property="twitter:'
57
+ substitute: 'name="twitter:'
58
+ # 'selector' limits this rule to only within the selected elements.
59
+ selector: meta
60
+ # 'path' limits this rule to only certain pages.
61
+ path: /user
62
+
63
+ # General DOM transforms, applied to both versions of the site.
64
+ dom_transform:
65
+ # Remove article elements, replacing them with their content
66
+ - title: Unwrap article elements
67
+ type: unwrap
68
+ selector: article
69
+
70
+ # Remove classes from divs
71
+ - title: Remove classes bar and baz from divs
72
+ type: remove_class
73
+ selector: div
74
+ class:
75
+ - class-bar
76
+ - class-baz
77
+
78
+ # Remove a div ID.
79
+ - title: Remove block containing current time.
80
+ type: remove
81
+ selector: div#block-time
@@ -0,0 +1,3 @@
1
+ sut:
2
+ build: .
3
+ command: bundle exec rspec
@@ -0,0 +1,276 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sitediff'
4
+ require 'sitediff/cache'
5
+ require 'sitediff/config'
6
+ require 'sitediff/config/creator'
7
+ require 'sitediff/config/preset'
8
+ require 'sitediff/fetch'
9
+ require 'sitediff/webserver/resultserver'
10
+
11
+ class SiteDiff
12
+ ##
13
+ # Sitediff API interface.
14
+ class Api
15
+ ##
16
+ # Initializes new Api object.
17
+ def initialize(directory, config_file = nil)
18
+ @dir = get_dir(directory)
19
+ @config = SiteDiff::Config.new(config_file, @dir)
20
+ end
21
+
22
+ ##
23
+ # Intialize a SiteDiff project.
24
+ #
25
+ # Calling:
26
+ # SiteDiff::Api.init(
27
+ # depth: 3,
28
+ # directory: 'sitediff',
29
+ # concurrency: 3,
30
+ # interval: 0,
31
+ # include: nil,
32
+ # exclude: '*.pdf',
33
+ # preset: 'drupal',
34
+ # curl_opts: {timeout: 60},
35
+ # crawl: false
36
+ # )
37
+ def self.init(options)
38
+ # Prepare a config object and write it to the file system.
39
+ creator = SiteDiff::Config::Creator.new(options[:debug], options[:before_url], options[:after_url])
40
+ include_regex = Config.create_regexp(options[:include])
41
+ exclude_regex = Config.create_regexp(options[:exclude])
42
+ creator.create(
43
+ depth: options[:depth],
44
+ directory: options[:directory],
45
+ concurrency: options[:concurrency],
46
+ interval: options[:interval],
47
+ include: include_regex,
48
+ exclude: exclude_regex,
49
+ preset: options[:preset],
50
+ curl_opts: options[:curl_opts]
51
+ )
52
+ SiteDiff.log "Created #{creator.config_file.expand_path}", :success
53
+
54
+ # TODO: implement crawl ^^^
55
+ # Discover paths, if enabled.
56
+ # if options[:crawl]
57
+ # crawl(creator.config_file)
58
+ # SiteDiff.log 'You can now run "sitediff diff".', :success
59
+ # else
60
+ # SiteDiff.log 'Run "sitediff crawl" to discover paths. You should then be able to run "sitediff diff".', :info
61
+ # end
62
+ end
63
+
64
+ ##
65
+ # Diff the `before` and `after`.
66
+ #
67
+ # Calling:
68
+ # Api.diff(
69
+ # paths: options['paths'],
70
+ # paths_file: options['paths-file'],
71
+ # ignore_whitespace: options['ignore-whitespace'],
72
+ # export: options['export'],
73
+ # before: options['before'],
74
+ # after: options['after'],
75
+ # cached: options['cached'],
76
+ # verbose: options['verbose'],
77
+ # report_format: options['report-format'],
78
+ # before_report: options['before-report'],
79
+ # after_report: options['after-report'],
80
+ # cli_mode: false
81
+ # )
82
+ def diff(options)
83
+ @config.ignore_whitespace = options[:ignore_whitespace]
84
+ @config.export = options[:export]
85
+ # Apply "paths" override, if any.
86
+ if options[:paths]
87
+ @config.paths = options[:paths]
88
+ else
89
+ paths_file = options[:paths_file]
90
+ paths_file ||= File.join(@dir, Config::DEFAULT_PATHS_FILENAME)
91
+ paths_file = File.expand_path(paths_file)
92
+
93
+ paths_count = @config.paths_file_read(paths_file)
94
+ SiteDiff.log "Read #{paths_count} paths from: #{paths_file}"
95
+ end
96
+
97
+ # TODO: Why do we allow before and after override during diff?
98
+ @config.before['url'] = options[:before] if options[:before]
99
+ @config.after['url'] = options[:after] if options[:after]
100
+
101
+ # Prepare cache.
102
+ cache = SiteDiff::Cache.new(
103
+ create: options[:cached] != 'none',
104
+ directory: @dir
105
+ )
106
+ cache.read_tags << :before if %w[before all].include?(options[:cached])
107
+ cache.read_tags << :after if %w[after all].include?(options[:cached])
108
+ cache.write_tags << :before << :after
109
+
110
+ # Run sitediff.
111
+ sitediff = SiteDiff.new(
112
+ @config,
113
+ cache,
114
+ verbose: options[:verbose],
115
+ debug: options[:debug]
116
+ )
117
+ num_failing = sitediff.run
118
+ exit_code = num_failing.positive? ? 2 : 0
119
+
120
+ # Generate HTML report.
121
+ if options[:report_format] == 'html' || @config.export
122
+ sitediff.report.generate_html(
123
+ @dir,
124
+ options[:before_report],
125
+ options[:after_report]
126
+ )
127
+ end
128
+
129
+ # Generate JSON report.
130
+ if options[:report_format] == 'json' && @config.export == false
131
+ sitediff.report.generate_json @dir
132
+ end
133
+
134
+ SiteDiff.log 'Run "sitediff serve" to see a report.' unless options[:export]
135
+ rescue Config::InvalidConfig => e
136
+ SiteDiff.log "Invalid configuration: #{e.message}", :error
137
+ SiteDiff.log e.backtrace, :error if options[:verbose]
138
+ rescue Config::ConfigNotFound => e
139
+ SiteDiff.log "Invalid configuration: #{e.message}", :error
140
+ SiteDiff.log e.backtrace, :error if options[:verbose]
141
+ else # no exception was raised
142
+ # Thor::Error --> exit(1), guaranteed by exit_on_failure?
143
+ # Failing diff --> exit(2), populated above
144
+ exit(exit_code) if options[:cli_mode]
145
+ end
146
+
147
+ ##
148
+ # Crawl the `before` site to determine `paths`.
149
+ def crawl
150
+ # Prepare cache.
151
+ @cache = SiteDiff::Cache.new(
152
+ create: true,
153
+ directory: @dir
154
+ )
155
+ @cache.write_tags << :before << :after
156
+
157
+ # Crawl with Hydra to discover paths.
158
+ hydra = Typhoeus::Hydra.new(
159
+ max_concurrency: @config.setting(:concurrency)
160
+ )
161
+ @paths = {}
162
+ @config.roots.each do |tag, url|
163
+ Crawler.new(
164
+ hydra,
165
+ url,
166
+ @config.setting(:interval),
167
+ @config.setting(:include),
168
+ @config.setting(:exclude),
169
+ @config.setting(:depth),
170
+ @config.curl_opts,
171
+ debug: @debug
172
+ ) do |info|
173
+ SiteDiff.log "Visited #{info.uri}, cached."
174
+ after_crawl(tag, info)
175
+ end
176
+ end
177
+ hydra.run
178
+
179
+ # Write paths to a file.
180
+ @paths = @paths.values.reduce(&:|).to_a.sort
181
+ @config.paths_file_write(@paths)
182
+
183
+ # Log output.
184
+ file = Pathname.new(@dir) + Config::DEFAULT_PATHS_FILENAME
185
+ SiteDiff.log ''
186
+ SiteDiff.log "#{@paths.length} page(s) found."
187
+ SiteDiff.log "Created #{file.expand_path}.", :success, 'done'
188
+ end
189
+
190
+ ##
191
+ # Serves SiteDiff report for accessing in the browser.
192
+ #
193
+ # Calling:
194
+ # api.serve(browse: true, port: 13080)
195
+ def serve(options)
196
+ @cache = Cache.new(directory: @dir)
197
+ @cache.read_tags << :before << :after
198
+
199
+ SiteDiff::Webserver::ResultServer.new(
200
+ options[:port],
201
+ @dir,
202
+ browse: options[:browse],
203
+ cache: @cache,
204
+ config: @config
205
+ ).wait
206
+ rescue SiteDiffException => e
207
+ SiteDiff.log e.message, :error
208
+ SiteDiff.log e.backtrace, :error if options[:verbose]
209
+ end
210
+
211
+ ##
212
+ #
213
+ def store(options)
214
+ # TODO: Figure out how to remove this config.validate call.
215
+ @config.validate(need_before: false)
216
+ @config.paths_file_read
217
+
218
+ @cache = SiteDiff::Cache.new(directory: @dir, create: true)
219
+ @cache.write_tags << :before
220
+
221
+ base = options[:url] || @config.after['url']
222
+ fetcher = SiteDiff::Fetch.new(@cache,
223
+ @config.paths,
224
+ @config.setting(:interval),
225
+ @config.setting(:concurrency),
226
+ get_curl_opts(@config.settings),
227
+ options[:debug],
228
+ before: base)
229
+ fetcher.run do |path, _res|
230
+ SiteDiff.log "Visited #{path}, cached"
231
+ end
232
+ end
233
+
234
+ private
235
+
236
+ ##
237
+ # Ensures that the given directory exists.
238
+ def get_dir(directory)
239
+ # Create the dir. Must go before cache initialization!
240
+ @dir = Pathname.new(directory || '.')
241
+ @dir.mkpath unless @dir.directory?
242
+ @dir.to_s
243
+ end
244
+
245
+ ##
246
+ # Processes a crawled path.
247
+ def after_crawl(tag, info)
248
+ path = UriWrapper.canonicalize(info.relative)
249
+
250
+ # Register the path.
251
+ @paths[tag] = [] unless @paths[tag]
252
+ @paths[tag] << path
253
+
254
+ result = info.read_result
255
+
256
+ # Write result to applicable cache.
257
+ # @cache.set(tag, path, result)
258
+ @cache.set(:before, path, result) if tag == 'before'
259
+ @cache.set(:after, path, result) if tag == 'after'
260
+
261
+ # TODO: Restore application of rules.
262
+ # @rules.handle_page(tag, res.content, info.document) if @rules && !res.error
263
+ end
264
+
265
+ def get_curl_opts(options)
266
+ # We do want string keys here
267
+ bool_hash = { 'true' => true, 'false' => false }
268
+ curl_opts = UriWrapper::DEFAULT_CURL_OPTS
269
+ .clone
270
+ .merge(options['curl_options'] || {})
271
+ .merge(options['curl_opts'] || {})
272
+ curl_opts.each { |k, v| curl_opts[k] = bool_hash.fetch(v, v) }
273
+ curl_opts
274
+ end
275
+ end
276
+ end
@@ -4,28 +4,45 @@ require 'set'
4
4
  require 'fileutils'
5
5
 
6
6
  class SiteDiff
7
+ # SiteDiff Cache Handler.
7
8
  class Cache
9
+ TIMESTAMP_FILE = 'timestamp'
10
+
8
11
  attr_accessor :read_tags, :write_tags
9
12
 
13
+ ##
14
+ # Creates a Cache object.
10
15
  def initialize(opts = {})
11
16
  @create = opts[:create]
12
17
 
13
- # Read and Write tags are sets that can contain :before and :after
14
- # They indicate whether we should use the cache for reading or writing
18
+ # Read and Write tags are sets that can contain :before and :after.
19
+ # They indicate whether we should use the cache for reading or writing.
15
20
  @read_tags = Set.new
16
21
  @write_tags = Set.new
22
+ @timestamp_flag = { before: false, after: false }
23
+
24
+ # The directory used by the cache for storage.
17
25
  @dir = opts[:directory] || '.'
18
26
  end
19
27
 
28
+ ##
20
29
  # Is a tag cached?
30
+ # TODO: Rename it to is_cached? as it makes more sense.
21
31
  def tag?(tag)
22
32
  File.directory?(File.join(@dir, 'snapshot', tag.to_s))
23
33
  end
24
34
 
35
+ ##
36
+ # Get data from cache.
25
37
  def get(tag, path)
26
38
  return nil unless @read_tags.include? tag
27
39
 
28
- filename = File.join(@dir, 'snapshot', tag.to_s, *path.split(File::SEPARATOR))
40
+ filename = File.join(
41
+ @dir,
42
+ 'snapshot',
43
+ tag.to_s,
44
+ *path.split(File::SEPARATOR)
45
+ )
29
46
 
30
47
  filename = File.join(filename, 'index.html') if File.directory?(filename)
31
48
  return nil unless File.file? filename
@@ -33,10 +50,18 @@ class SiteDiff
33
50
  Marshal.load(File.read(filename))
34
51
  end
35
52
 
53
+ ##
54
+ # Set data to cache.
36
55
  def set(tag, path, result)
37
56
  return unless @write_tags.include? tag
38
57
 
39
- filename = File.join(@dir, 'snapshot', tag.to_s, *path.split(File::SEPARATOR))
58
+ save_timestamp(tag)
59
+ filename = File.join(
60
+ @dir,
61
+ 'snapshot',
62
+ tag.to_s,
63
+ *path.split(File::SEPARATOR)
64
+ )
40
65
 
41
66
  filename = File.join(filename, 'index.html') if File.directory?(filename)
42
67
  filepath = Pathname.new(filename)
@@ -46,32 +71,56 @@ class SiteDiff
46
71
  rescue Errno::EEXIST
47
72
  curdir = filepath
48
73
  curdir = curdir.parent until curdir.exist?
49
- tempname = curdir.dirname + (curdir.basename.to_s + '.temporary')
74
+ tempname = "#{curdir.dirname}/#{curdir.basename}.temporary"
75
+ # tempname = curdir.dirname + (curdir.basename.to_s + '.temporary')
50
76
  # May cause problems if action is not atomic!
51
77
  # Move existing file to dir/index.html first
52
78
  # Not robust! Should generate an UUID or something.
53
- SiteDiff.log "Overwriting file #{tempname}", :warn if File.exist?(tempname)
79
+ if File.exist?(tempname)
80
+ SiteDiff.log "Overwriting file #{tempname}", :warning
81
+ end
54
82
  curdir.rename(tempname)
55
83
  filepath.dirname.mkpath
56
84
  # Should only happen in strange situations such as when the path
57
85
  # is foo/index.html/bar (i.e., index.html is a directory)
58
- SiteDiff.log "Overwriting file #{tempname}", :warn if (curdir + 'index.html').exist?
59
- tempname.rename(curdir + 'index.html')
86
+ if File.exist?("#{curdir}/index.html")
87
+ SiteDiff.log "Overwriting file #{tempname}", :warning
88
+ end
89
+ File.rename(tempname, "#{curdir}/index.html")
90
+ # tempname.rename(curdir + 'index.html')
60
91
  end
61
92
  end
62
93
  File.open(filename, 'w') { |file| file.write(Marshal.dump(result)) }
63
94
  end
64
95
 
96
+ ##
97
+ # TODO: Document this or remove it if unused.
65
98
  def key(tag, path)
66
99
  # Ensure encoding stays the same!
67
100
  Marshal.dump([tag, path.encode('UTF-8')])
68
101
  end
69
102
 
103
+ ##
104
+ # Ensures that a directory exists.
70
105
  def get_dir(directory)
71
106
  # Create the dir. Must go before cache initialization!
72
107
  @dir = Pathname.new(directory || '.')
73
108
  @dir.mkpath unless @dir.directory?
74
109
  @dir.to_s
75
110
  end
111
+
112
+ private
113
+
114
+ def save_timestamp(tag)
115
+ # run once
116
+ return if @timestamp_flag[tag]
117
+
118
+ @timestamp_flag[tag] = true
119
+ cache_dir = File.join(@dir, 'snapshot', tag.to_s)
120
+ if File.exist? cache_dir
121
+ file = File.join(cache_dir, TIMESTAMP_FILE)
122
+ FileUtils.touch(file)
123
+ end
124
+ end
76
125
  end
77
126
  end