sitediff 0.0.6 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +5 -5
  2. data/.eslintignore +1 -0
  3. data/.eslintrc.json +28 -0
  4. data/.project +11 -0
  5. data/.rubocop.yml +179 -0
  6. data/.rubocop_todo.yml +51 -0
  7. data/CHANGELOG.md +28 -0
  8. data/Dockerfile +33 -0
  9. data/Gemfile +11 -0
  10. data/Gemfile.lock +85 -0
  11. data/INSTALLATION.md +146 -0
  12. data/LICENSE +339 -0
  13. data/README.md +810 -0
  14. data/Rakefile +12 -0
  15. data/Thorfile +135 -0
  16. data/bin/sitediff +9 -2
  17. data/config/.gitkeep +0 -0
  18. data/config/sanitize_domains.example.yaml +8 -0
  19. data/config/sitediff.example.yaml +81 -0
  20. data/docker-compose.test.yml +3 -0
  21. data/lib/sitediff/api.rb +276 -0
  22. data/lib/sitediff/cache.rb +57 -8
  23. data/lib/sitediff/cli.rb +156 -176
  24. data/lib/sitediff/config/creator.rb +61 -77
  25. data/lib/sitediff/config/preset.rb +75 -0
  26. data/lib/sitediff/config.rb +436 -31
  27. data/lib/sitediff/crawler.rb +27 -21
  28. data/lib/sitediff/diff.rb +32 -9
  29. data/lib/sitediff/fetch.rb +10 -3
  30. data/lib/sitediff/files/diff.html.erb +20 -2
  31. data/lib/sitediff/files/jquery.min.js +2 -0
  32. data/lib/sitediff/files/normalize.css +349 -0
  33. data/lib/sitediff/files/report.html.erb +171 -0
  34. data/lib/sitediff/files/sidebyside.html.erb +5 -2
  35. data/lib/sitediff/files/sitediff.css +303 -30
  36. data/lib/sitediff/files/sitediff.js +367 -0
  37. data/lib/sitediff/presets/drupal.yaml +63 -0
  38. data/lib/sitediff/report.rb +254 -0
  39. data/lib/sitediff/result.rb +50 -20
  40. data/lib/sitediff/sanitize/dom_transform.rb +47 -8
  41. data/lib/sitediff/sanitize/regexp.rb +24 -3
  42. data/lib/sitediff/sanitize.rb +81 -12
  43. data/lib/sitediff/uriwrapper.rb +65 -23
  44. data/lib/sitediff/webserver/resultserver.rb +30 -33
  45. data/lib/sitediff/webserver.rb +15 -3
  46. data/lib/sitediff.rb +130 -83
  47. data/misc/sitediff - overview report.png +0 -0
  48. data/misc/sitediff - page report.png +0 -0
  49. data/package-lock.json +878 -0
  50. data/package.json +25 -0
  51. data/sitediff.gemspec +51 -0
  52. metadata +91 -29
  53. data/lib/sitediff/files/html_report.html.erb +0 -66
  54. data/lib/sitediff/files/rules/drupal.yaml +0 -63
  55. data/lib/sitediff/rules.rb +0 -65
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require 'rubocop/rake_task'
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
data/Thorfile ADDED
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # TODO: Determine the utility of this file.
5
+
6
+ LIB_DIR = File.join(File.dirname(__FILE__), 'lib')
7
+ $LOAD_PATH << LIB_DIR
8
+ require 'sitediff/webserver'
9
+ require 'sitediff/webserver/resultserver'
10
+
11
+ # Thor Base class.
12
+ class Base < Thor
13
+ method_options local: true
14
+ # Adds the option to all Base subclasses.
15
+ # method_options() takes different arguments than option().
16
+ def initialize(*args)
17
+ super(*args)
18
+ @local = options['local']
19
+ end
20
+
21
+ # gives us run()
22
+ include Thor::Actions
23
+
24
+ # Thor, by default, exits with 0 no matter what!
25
+ def self.exit_on_failure?
26
+ true
27
+ end
28
+
29
+ protected
30
+
31
+ def executable(gem)
32
+ gem = './bin/sitediff' if (gem == 'sitediff') && @local
33
+ "#{'bundle exec' if @local} #{gem}"
34
+ end
35
+ end
36
+
37
+ # Thor for Docker.
38
+ class Docker < Base
39
+ IMAGE = 'evolvingweb/sitediff'
40
+
41
+ desc 'build', 'Build a docker image for sitediff'
42
+ # Make a build image for docker.
43
+ def build
44
+ run "docker build -t #{IMAGE} . "
45
+ end
46
+
47
+ desc 'run', 'Run a rake task (or a login shell if none given) inside docker'
48
+ # NOTE: We can't override run() (which is reserved by Thor). Luckily, Thor only
49
+ # checks for the first N necessary characters to match a command with a
50
+ # method. Cf. Thor::normalize_command_name()
51
+ def run_(task = 'bash')
52
+ docker_opts = ['-t', "-v #{File.dirname(__FILE__)}:/sitediff"]
53
+ finish_exec(task, docker_opts)
54
+ end
55
+
56
+ desc 'compose', 'Run a task inside docker without volume mounting (not supported with compose)'
57
+ # Run a task inside docker without volume mounting.
58
+ def compose(task = 'bash')
59
+ docker_opts = ['-t']
60
+ finish_exec(task, docker_opts)
61
+ end
62
+
63
+ no_commands do
64
+ # Finished exec
65
+ def finish_exec(task, docker_opts)
66
+ if task == 'bash'
67
+ cmd = 'bash'
68
+ docker_opts << '-i'
69
+ else
70
+ # pass down the local flag to docker command
71
+ cmd = "#{executable('thor')} #{task} #{@local ? '--local' : '--no-local'}"
72
+ end
73
+ puts "docker run #{docker_opts.join(' ')} #{IMAGE} #{cmd}"
74
+ run "docker run #{docker_opts.join(' ')} #{IMAGE} #{cmd}"
75
+ end
76
+ end
77
+ end
78
+
79
+ # Thor for Spec.
80
+ class Spec < Base
81
+ desc 'unit', 'run RSpec unit tests'
82
+ # Run RSpec unit tests.
83
+ def unit
84
+ puts "#{executable('rspec')} spec/unit"
85
+ run "#{executable('rspec')} spec/unit"
86
+ end
87
+
88
+ desc 'fixture', 'run RSpec integration tests'
89
+ # Run RSpec integration tests.
90
+ def fixture
91
+ puts "#{executable('rspec')} spec/unit"
92
+ run "#{executable('rspec')} spec/fixtures"
93
+ end
94
+
95
+ desc 'all', 'runs both unit and fixture tests', hide: true
96
+ # hidden task to lump together multiple tasks
97
+ def all
98
+ unit
99
+ fixture
100
+ end
101
+ default_task :all
102
+ end
103
+
104
+ # Thor for fixtures.
105
+ class Fixture < Base
106
+ desc 'local', 'Run a sitediff test case'
107
+ # Run a sitediff test case.
108
+ def local
109
+ run "#{executable('sitediff')} diff --cached=none spec/fixtures/cli/config.yaml"
110
+ end
111
+
112
+ desc 'http', 'Run a sitediff test case, using web servers'
113
+ # Run a sitediff test case, using web servers.
114
+ def http
115
+ cmd = "#{executable('sitediff')} diff --cached=none spec/fixtures/cli/config.yaml"
116
+ http_fixtures(cmd).kill
117
+ end
118
+
119
+ desc 'serve', 'Serve the result of the fixture test'
120
+ # Serve the result of the fixture test.
121
+ def serve
122
+ cmd = "#{executable('sitediff')} diff --cached=none --paths-file=spec/sites/ruby-doc.org/paths.txt spec/unit/cli/config.yaml"
123
+ http_fixtures(cmd)
124
+ SiteDiff::Webserver::ResultServer.new(nil, 'sitediff', quiet: true).wait
125
+ end
126
+
127
+ private
128
+
129
+ # HTTP Fixtures.
130
+ def http_fixtures(cmd)
131
+ serv = SiteDiff::Webserver::FixtureServer.new
132
+ run "#{cmd} --before #{serv.before} --after #{serv.after}"
133
+ serv
134
+ end
135
+ end
data/bin/sitediff CHANGED
@@ -2,8 +2,15 @@
2
2
  # frozen_string_literal: true
3
3
 
4
4
  # when run as gem, $0 is /usr/local/bin/sitediff not this file
5
- $LOAD_PATH.unshift File.expand_path('../lib', __dir__) if $PROGRAM_NAME == __FILE__
5
+ if $PROGRAM_NAME == __FILE__
6
+ $LOAD_PATH.unshift File.expand_path('../lib', __dir__)
7
+ end
6
8
 
7
9
  require 'sitediff/cli'
8
10
 
9
- SiteDiff::Cli.start
11
+ begin
12
+ SiteDiff::Cli.start
13
+ rescue Interrupt
14
+ puts("\n")
15
+ SiteDiff.log('Stopping. Interrupted by user.')
16
+ end
data/config/.gitkeep ADDED
File without changes
@@ -0,0 +1,8 @@
1
+ sanitization:
2
+ -
3
+ title:
4
+ 'strip_domains_from_absolute_urls'
5
+ pattern:
6
+ 'http:\/\/[a-zA-Z0-9.:-]+'
7
+ substitute:
8
+ '__domain__'
@@ -0,0 +1,81 @@
1
+ # Include other configuration files, merging them with this file.
2
+ includes:
3
+ - extra-rules.yaml
4
+
5
+ # Settings.
6
+ #
7
+ # If you use "sitediff init" with the right parameters, it will generate
8
+ # this section for you.
9
+ settings:
10
+ # Crawl 2 levels deep.
11
+ depth: 2
12
+ # Wait for 250ms between requests.
13
+ interval: 250
14
+ # Make only 1 request at a time - no simultaneous requests.
15
+ # Concurrency has to be one when an interval is set.
16
+ concurrency: 1
17
+ # Don't follow links to PDF files.
18
+ exclude: '.*\.pdf'
19
+ # Curl options, if any.
20
+ curl_opts:
21
+ max_recv_speed_large: 10000
22
+
23
+ # Rules under this element apply only to the 'before' site.
24
+ before:
25
+ # URL of the 'before' version of the site.
26
+ url: http://localhost/old
27
+
28
+ # Sanitizations and DOM transformations, just like the general ones
29
+ # demonstrated above, but applied only to the 'before' site.
30
+ dom_transform:
31
+ - title: Example
32
+ type: remove
33
+ selector: div.updates-required
34
+
35
+ # Rules under this element apply only to the 'after' site.
36
+ after:
37
+ # URL of the 'after' version of the site.
38
+ url: http://localhost/new
39
+
40
+ # The root element to compare.
41
+ #
42
+ # Usually, sitediff compares the HTML of the entire page. If you'd rather
43
+ # check just a subset of the page, specify a selector here. For example, the
44
+ # line below causes only the body to be compared, ignoring the HTML head.
45
+ selector: 'body'
46
+
47
+ # General regular expression rules, applied to both versions of the site.
48
+ sanitization:
49
+ # Normalize input tags containg random tokens.
50
+ - title: Remove form-build-id
51
+ pattern: '<input type="hidden" name="form_build_id" value="form-[a-zA-Z0-9_-]+" *\/?>'
52
+ substitute: '<input type="hidden" name="form_build_id" value="__form_build_id__">'
53
+
54
+ # Replace meta property="twitter:*" with meta name="twitter:*".
55
+ - title: Meta 'property' changed to 'name'
56
+ pattern: 'property="twitter:'
57
+ substitute: 'name="twitter:'
58
+ # 'selector' limits this rule to only within the selected elements.
59
+ selector: meta
60
+ # 'path' limits this rule to only certain pages.
61
+ path: /user
62
+
63
+ # General DOM transforms, applied to both versions of the site.
64
+ dom_transform:
65
+ # Remove article elements, replacing them with their content
66
+ - title: Unwrap article elements
67
+ type: unwrap
68
+ selector: article
69
+
70
+ # Remove classes from divs
71
+ - title: Remove classes bar and baz from divs
72
+ type: remove_class
73
+ selector: div
74
+ class:
75
+ - class-bar
76
+ - class-baz
77
+
78
+ # Remove a div ID.
79
+ - title: Remove block containing current time.
80
+ type: remove
81
+ selector: div#block-time
@@ -0,0 +1,3 @@
1
+ sut:
2
+ build: .
3
+ command: bundle exec rspec
@@ -0,0 +1,276 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sitediff'
4
+ require 'sitediff/cache'
5
+ require 'sitediff/config'
6
+ require 'sitediff/config/creator'
7
+ require 'sitediff/config/preset'
8
+ require 'sitediff/fetch'
9
+ require 'sitediff/webserver/resultserver'
10
+
11
+ class SiteDiff
12
+ ##
13
+ # Sitediff API interface.
14
+ class Api
15
+ ##
16
+ # Initializes new Api object.
17
+ def initialize(directory, config_file = nil)
18
+ @dir = get_dir(directory)
19
+ @config = SiteDiff::Config.new(config_file, @dir)
20
+ end
21
+
22
+ ##
23
+ # Intialize a SiteDiff project.
24
+ #
25
+ # Calling:
26
+ # SiteDiff::Api.init(
27
+ # depth: 3,
28
+ # directory: 'sitediff',
29
+ # concurrency: 3,
30
+ # interval: 0,
31
+ # include: nil,
32
+ # exclude: '*.pdf',
33
+ # preset: 'drupal',
34
+ # curl_opts: {timeout: 60},
35
+ # crawl: false
36
+ # )
37
+ def self.init(options)
38
+ # Prepare a config object and write it to the file system.
39
+ creator = SiteDiff::Config::Creator.new(options[:debug], options[:before_url], options[:after_url])
40
+ include_regex = Config.create_regexp(options[:include])
41
+ exclude_regex = Config.create_regexp(options[:exclude])
42
+ creator.create(
43
+ depth: options[:depth],
44
+ directory: options[:directory],
45
+ concurrency: options[:concurrency],
46
+ interval: options[:interval],
47
+ include: include_regex,
48
+ exclude: exclude_regex,
49
+ preset: options[:preset],
50
+ curl_opts: options[:curl_opts]
51
+ )
52
+ SiteDiff.log "Created #{creator.config_file.expand_path}", :success
53
+
54
+ # TODO: implement crawl ^^^
55
+ # Discover paths, if enabled.
56
+ # if options[:crawl]
57
+ # crawl(creator.config_file)
58
+ # SiteDiff.log 'You can now run "sitediff diff".', :success
59
+ # else
60
+ # SiteDiff.log 'Run "sitediff crawl" to discover paths. You should then be able to run "sitediff diff".', :info
61
+ # end
62
+ end
63
+
64
+ ##
65
+ # Diff the `before` and `after`.
66
+ #
67
+ # Calling:
68
+ # Api.diff(
69
+ # paths: options['paths'],
70
+ # paths_file: options['paths-file'],
71
+ # ignore_whitespace: options['ignore-whitespace'],
72
+ # export: options['export'],
73
+ # before: options['before'],
74
+ # after: options['after'],
75
+ # cached: options['cached'],
76
+ # verbose: options['verbose'],
77
+ # report_format: options['report-format'],
78
+ # before_report: options['before-report'],
79
+ # after_report: options['after-report'],
80
+ # cli_mode: false
81
+ # )
82
+ def diff(options)
83
+ @config.ignore_whitespace = options[:ignore_whitespace]
84
+ @config.export = options[:export]
85
+ # Apply "paths" override, if any.
86
+ if options[:paths]
87
+ @config.paths = options[:paths]
88
+ else
89
+ paths_file = options[:paths_file]
90
+ paths_file ||= File.join(@dir, Config::DEFAULT_PATHS_FILENAME)
91
+ paths_file = File.expand_path(paths_file)
92
+
93
+ paths_count = @config.paths_file_read(paths_file)
94
+ SiteDiff.log "Read #{paths_count} paths from: #{paths_file}"
95
+ end
96
+
97
+ # TODO: Why do we allow before and after override during diff?
98
+ @config.before['url'] = options[:before] if options[:before]
99
+ @config.after['url'] = options[:after] if options[:after]
100
+
101
+ # Prepare cache.
102
+ cache = SiteDiff::Cache.new(
103
+ create: options[:cached] != 'none',
104
+ directory: @dir
105
+ )
106
+ cache.read_tags << :before if %w[before all].include?(options[:cached])
107
+ cache.read_tags << :after if %w[after all].include?(options[:cached])
108
+ cache.write_tags << :before << :after
109
+
110
+ # Run sitediff.
111
+ sitediff = SiteDiff.new(
112
+ @config,
113
+ cache,
114
+ verbose: options[:verbose],
115
+ debug: options[:debug]
116
+ )
117
+ num_failing = sitediff.run
118
+ exit_code = num_failing.positive? ? 2 : 0
119
+
120
+ # Generate HTML report.
121
+ if options[:report_format] == 'html' || @config.export
122
+ sitediff.report.generate_html(
123
+ @dir,
124
+ options[:before_report],
125
+ options[:after_report]
126
+ )
127
+ end
128
+
129
+ # Generate JSON report.
130
+ if options[:report_format] == 'json' && @config.export == false
131
+ sitediff.report.generate_json @dir
132
+ end
133
+
134
+ SiteDiff.log 'Run "sitediff serve" to see a report.' unless options[:export]
135
+ rescue Config::InvalidConfig => e
136
+ SiteDiff.log "Invalid configuration: #{e.message}", :error
137
+ SiteDiff.log e.backtrace, :error if options[:verbose]
138
+ rescue Config::ConfigNotFound => e
139
+ SiteDiff.log "Invalid configuration: #{e.message}", :error
140
+ SiteDiff.log e.backtrace, :error if options[:verbose]
141
+ else # no exception was raised
142
+ # Thor::Error --> exit(1), guaranteed by exit_on_failure?
143
+ # Failing diff --> exit(2), populated above
144
+ exit(exit_code) if options[:cli_mode]
145
+ end
146
+
147
+ ##
148
+ # Crawl the `before` site to determine `paths`.
149
+ def crawl
150
+ # Prepare cache.
151
+ @cache = SiteDiff::Cache.new(
152
+ create: true,
153
+ directory: @dir
154
+ )
155
+ @cache.write_tags << :before << :after
156
+
157
+ # Crawl with Hydra to discover paths.
158
+ hydra = Typhoeus::Hydra.new(
159
+ max_concurrency: @config.setting(:concurrency)
160
+ )
161
+ @paths = {}
162
+ @config.roots.each do |tag, url|
163
+ Crawler.new(
164
+ hydra,
165
+ url,
166
+ @config.setting(:interval),
167
+ @config.setting(:include),
168
+ @config.setting(:exclude),
169
+ @config.setting(:depth),
170
+ @config.curl_opts,
171
+ debug: @debug
172
+ ) do |info|
173
+ SiteDiff.log "Visited #{info.uri}, cached."
174
+ after_crawl(tag, info)
175
+ end
176
+ end
177
+ hydra.run
178
+
179
+ # Write paths to a file.
180
+ @paths = @paths.values.reduce(&:|).to_a.sort
181
+ @config.paths_file_write(@paths)
182
+
183
+ # Log output.
184
+ file = Pathname.new(@dir) + Config::DEFAULT_PATHS_FILENAME
185
+ SiteDiff.log ''
186
+ SiteDiff.log "#{@paths.length} page(s) found."
187
+ SiteDiff.log "Created #{file.expand_path}.", :success, 'done'
188
+ end
189
+
190
+ ##
191
+ # Serves SiteDiff report for accessing in the browser.
192
+ #
193
+ # Calling:
194
+ # api.serve(browse: true, port: 13080)
195
+ def serve(options)
196
+ @cache = Cache.new(directory: @dir)
197
+ @cache.read_tags << :before << :after
198
+
199
+ SiteDiff::Webserver::ResultServer.new(
200
+ options[:port],
201
+ @dir,
202
+ browse: options[:browse],
203
+ cache: @cache,
204
+ config: @config
205
+ ).wait
206
+ rescue SiteDiffException => e
207
+ SiteDiff.log e.message, :error
208
+ SiteDiff.log e.backtrace, :error if options[:verbose]
209
+ end
210
+
211
+ ##
212
+ #
213
+ def store(options)
214
+ # TODO: Figure out how to remove this config.validate call.
215
+ @config.validate(need_before: false)
216
+ @config.paths_file_read
217
+
218
+ @cache = SiteDiff::Cache.new(directory: @dir, create: true)
219
+ @cache.write_tags << :before
220
+
221
+ base = options[:url] || @config.after['url']
222
+ fetcher = SiteDiff::Fetch.new(@cache,
223
+ @config.paths,
224
+ @config.setting(:interval),
225
+ @config.setting(:concurrency),
226
+ get_curl_opts(@config.settings),
227
+ options[:debug],
228
+ before: base)
229
+ fetcher.run do |path, _res|
230
+ SiteDiff.log "Visited #{path}, cached"
231
+ end
232
+ end
233
+
234
+ private
235
+
236
+ ##
237
+ # Ensures that the given directory exists.
238
+ def get_dir(directory)
239
+ # Create the dir. Must go before cache initialization!
240
+ @dir = Pathname.new(directory || '.')
241
+ @dir.mkpath unless @dir.directory?
242
+ @dir.to_s
243
+ end
244
+
245
+ ##
246
+ # Processes a crawled path.
247
+ def after_crawl(tag, info)
248
+ path = UriWrapper.canonicalize(info.relative)
249
+
250
+ # Register the path.
251
+ @paths[tag] = [] unless @paths[tag]
252
+ @paths[tag] << path
253
+
254
+ result = info.read_result
255
+
256
+ # Write result to applicable cache.
257
+ # @cache.set(tag, path, result)
258
+ @cache.set(:before, path, result) if tag == 'before'
259
+ @cache.set(:after, path, result) if tag == 'after'
260
+
261
+ # TODO: Restore application of rules.
262
+ # @rules.handle_page(tag, res.content, info.document) if @rules && !res.error
263
+ end
264
+
265
+ def get_curl_opts(options)
266
+ # We do want string keys here
267
+ bool_hash = { 'true' => true, 'false' => false }
268
+ curl_opts = UriWrapper::DEFAULT_CURL_OPTS
269
+ .clone
270
+ .merge(options['curl_options'] || {})
271
+ .merge(options['curl_opts'] || {})
272
+ curl_opts.each { |k, v| curl_opts[k] = bool_hash.fetch(v, v) }
273
+ curl_opts
274
+ end
275
+ end
276
+ end
@@ -4,28 +4,45 @@ require 'set'
4
4
  require 'fileutils'
5
5
 
6
6
  class SiteDiff
7
+ # SiteDiff Cache Handler.
7
8
  class Cache
9
+ TIMESTAMP_FILE = 'timestamp'
10
+
8
11
  attr_accessor :read_tags, :write_tags
9
12
 
13
+ ##
14
+ # Creates a Cache object.
10
15
  def initialize(opts = {})
11
16
  @create = opts[:create]
12
17
 
13
- # Read and Write tags are sets that can contain :before and :after
14
- # They indicate whether we should use the cache for reading or writing
18
+ # Read and Write tags are sets that can contain :before and :after.
19
+ # They indicate whether we should use the cache for reading or writing.
15
20
  @read_tags = Set.new
16
21
  @write_tags = Set.new
22
+ @timestamp_flag = { before: false, after: false }
23
+
24
+ # The directory used by the cache for storage.
17
25
  @dir = opts[:directory] || '.'
18
26
  end
19
27
 
28
+ ##
20
29
  # Is a tag cached?
30
+ # TODO: Rename it to is_cached? as it makes more sense.
21
31
  def tag?(tag)
22
32
  File.directory?(File.join(@dir, 'snapshot', tag.to_s))
23
33
  end
24
34
 
35
+ ##
36
+ # Get data from cache.
25
37
  def get(tag, path)
26
38
  return nil unless @read_tags.include? tag
27
39
 
28
- filename = File.join(@dir, 'snapshot', tag.to_s, *path.split(File::SEPARATOR))
40
+ filename = File.join(
41
+ @dir,
42
+ 'snapshot',
43
+ tag.to_s,
44
+ *path.split(File::SEPARATOR)
45
+ )
29
46
 
30
47
  filename = File.join(filename, 'index.html') if File.directory?(filename)
31
48
  return nil unless File.file? filename
@@ -33,10 +50,18 @@ class SiteDiff
33
50
  Marshal.load(File.read(filename))
34
51
  end
35
52
 
53
+ ##
54
+ # Set data to cache.
36
55
  def set(tag, path, result)
37
56
  return unless @write_tags.include? tag
38
57
 
39
- filename = File.join(@dir, 'snapshot', tag.to_s, *path.split(File::SEPARATOR))
58
+ save_timestamp(tag)
59
+ filename = File.join(
60
+ @dir,
61
+ 'snapshot',
62
+ tag.to_s,
63
+ *path.split(File::SEPARATOR)
64
+ )
40
65
 
41
66
  filename = File.join(filename, 'index.html') if File.directory?(filename)
42
67
  filepath = Pathname.new(filename)
@@ -46,32 +71,56 @@ class SiteDiff
46
71
  rescue Errno::EEXIST
47
72
  curdir = filepath
48
73
  curdir = curdir.parent until curdir.exist?
49
- tempname = curdir.dirname + (curdir.basename.to_s + '.temporary')
74
+ tempname = "#{curdir.dirname}/#{curdir.basename}.temporary"
75
+ # tempname = curdir.dirname + (curdir.basename.to_s + '.temporary')
50
76
  # May cause problems if action is not atomic!
51
77
  # Move existing file to dir/index.html first
52
78
  # Not robust! Should generate an UUID or something.
53
- SiteDiff.log "Overwriting file #{tempname}", :warn if File.exist?(tempname)
79
+ if File.exist?(tempname)
80
+ SiteDiff.log "Overwriting file #{tempname}", :warning
81
+ end
54
82
  curdir.rename(tempname)
55
83
  filepath.dirname.mkpath
56
84
  # Should only happen in strange situations such as when the path
57
85
  # is foo/index.html/bar (i.e., index.html is a directory)
58
- SiteDiff.log "Overwriting file #{tempname}", :warn if (curdir + 'index.html').exist?
59
- tempname.rename(curdir + 'index.html')
86
+ if File.exist?("#{curdir}/index.html")
87
+ SiteDiff.log "Overwriting file #{tempname}", :warning
88
+ end
89
+ File.rename(tempname, "#{curdir}/index.html")
90
+ # tempname.rename(curdir + 'index.html')
60
91
  end
61
92
  end
62
93
  File.open(filename, 'w') { |file| file.write(Marshal.dump(result)) }
63
94
  end
64
95
 
96
+ ##
97
+ # TODO: Document this or remove it if unused.
65
98
  def key(tag, path)
66
99
  # Ensure encoding stays the same!
67
100
  Marshal.dump([tag, path.encode('UTF-8')])
68
101
  end
69
102
 
103
+ ##
104
+ # Ensures that a directory exists.
70
105
  def get_dir(directory)
71
106
  # Create the dir. Must go before cache initialization!
72
107
  @dir = Pathname.new(directory || '.')
73
108
  @dir.mkpath unless @dir.directory?
74
109
  @dir.to_s
75
110
  end
111
+
112
+ private
113
+
114
+ def save_timestamp(tag)
115
+ # run once
116
+ return if @timestamp_flag[tag]
117
+
118
+ @timestamp_flag[tag] = true
119
+ cache_dir = File.join(@dir, 'snapshot', tag.to_s)
120
+ if File.exist? cache_dir
121
+ file = File.join(cache_dir, TIMESTAMP_FILE)
122
+ FileUtils.touch(file)
123
+ end
124
+ end
76
125
  end
77
126
  end