sitediff 1.0.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1b7854497b5e81f48d810acec8106cbc66e33492d046e032e5516f76db26f142
4
- data.tar.gz: a9349a79953237dd017600d49d38b8e734afc561f0ce09a1f8732e0e933530c9
3
+ metadata.gz: f75892f718764c8fd2c18d7f3f7e7cf8908d60ea07c2a765510c8ef409b9f0c1
4
+ data.tar.gz: 3b3744eca0dda04821152aab596fb67891204a1599b4db72e13b4af484693e65
5
5
  SHA512:
6
- metadata.gz: 0e91f665f3c59b8a65f16c6942ec49b9cc9ee7fd12b0a777eadb844a0b9819ab1fd9485495bf2c757ca7342a6198dcccb5ae546c4ddf2682f234d015b64309b2
7
- data.tar.gz: 64b7980bbbade8710b6069af19a67083678c2bd5fa99674df3360c1c6a3ddf8a15de7c5be4e8349ec298fc1c0dc27535b816089cd4f8852b8c8633861d72a178
6
+ metadata.gz: 97e9098b290742f1b3efe3c284e9392be95ffd0f7576df413a6ec612142b0573acf8b8b4d43369961c154d801db6284fcc1a8d69cea7da8ed99b64a0a1f1af75
7
+ data.tar.gz: c4b0e93bc4e0acb3d675c8d675d8f6235035aae72421794495f25223cb086eaa4c87d2cde63caa0eda257b0d91f374a0efbbb416ef8ee88c2f0ffde89a608831
@@ -108,7 +108,9 @@ class SiteDiff
108
108
  encoding = read_results[tag].encoding
109
109
  if encoding || html.length.positive?
110
110
  section = @config.send(tag, true)
111
- Sanitizer.new(html, section, path: path).sanitize
111
+ opts = { path: path }
112
+ opts[:output] = @config.output if @config.output
113
+ Sanitizer.new(html, section, opts).sanitize
112
114
  else
113
115
  html
114
116
  end
@@ -0,0 +1,265 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sitediff'
4
+ require 'sitediff/cache'
5
+ require 'sitediff/config'
6
+ require 'sitediff/config/creator'
7
+ require 'sitediff/config/preset'
8
+ require 'sitediff/fetch'
9
+ require 'sitediff/webserver/resultserver'
10
+
11
+ class SiteDiff
12
+ ##
13
+ # Sitediff API interface.
14
+ class Api
15
+ ##
16
+ # Initializes new Api object.
17
+ def initialize(directory, config_file = nil)
18
+ @dir = get_dir(directory)
19
+ @config = SiteDiff::Config.new(config_file, @dir)
20
+ end
21
+
22
+ ##
23
+ # Intialize a SiteDiff project.
24
+ #
25
+ # Calling:
26
+ # SiteDiff::Api.init(
27
+ # depth: 3,
28
+ # directory: 'sitediff',
29
+ # concurrency: 3,
30
+ # interval: 0,
31
+ # include: nil,
32
+ # exclude: '*.pdf',
33
+ # preset: 'drupal',
34
+ # curl_opts: {timeout: 60},
35
+ # crawl: false
36
+ # )
37
+ def self.init(options)
38
+ # Prepare a config object and write it to the file system.
39
+ creator = SiteDiff::Config::Creator.new(options[:debug], options[:before_url], options[:after_url])
40
+ include_regex = Config.create_regexp(options[:include])
41
+ exclude_regex = Config.create_regexp(options[:exclude])
42
+ creator.create(
43
+ depth: options[:depth],
44
+ directory: options[:directory],
45
+ concurrency: options[:concurrency],
46
+ interval: options[:interval],
47
+ include: include_regex,
48
+ exclude: exclude_regex,
49
+ preset: options[:preset],
50
+ curl_opts: options[:curl_opts]
51
+ )
52
+ SiteDiff.log "Created #{creator.config_file.expand_path}", :success
53
+
54
+ # TODO: implement crawl ^^^
55
+ # Discover paths, if enabled.
56
+ # if options[:crawl]
57
+ # crawl(creator.config_file)
58
+ # SiteDiff.log 'You can now run "sitediff diff".', :success
59
+ # else
60
+ # SiteDiff.log 'Run "sitediff crawl" to discover paths. You should then be able to run "sitediff diff".', :info
61
+ # end
62
+ end
63
+
64
+ ##
65
+ # Diff the `before` and `after`.
66
+ #
67
+ # Calling:
68
+ # Api.diff(
69
+ # paths: options['paths'],
70
+ # paths_file: options['paths-file'],
71
+ # ignore_whitespace: options['ignore-whitespace'],
72
+ # export: options['export'],
73
+ # before: options['before'],
74
+ # after: options['after'],
75
+ # cached: options['cached'],
76
+ # verbose: options['verbose'],
77
+ # report_format: options['report-format'],
78
+ # before_report: options['before-report'],
79
+ # after_report: options['after-report'],
80
+ # cli_mode: false
81
+ # )
82
+ def diff(options)
83
+ @config.ignore_whitespace = options[:ignore_whitespace]
84
+ @config.export = options[:export]
85
+ # Apply "paths" override, if any.
86
+ if options[:paths]
87
+ @config.paths = options[:paths]
88
+ else
89
+ paths_file = options[:paths_file]
90
+ paths_file ||= File.join(@dir, Config::DEFAULT_PATHS_FILENAME)
91
+ paths_file = File.expand_path(paths_file)
92
+
93
+ paths_count = @config.paths_file_read(paths_file)
94
+ SiteDiff.log "Read #{paths_count} paths from: #{paths_file}"
95
+ end
96
+
97
+ # TODO: Why do we allow before and after override during diff?
98
+ @config.before['url'] = options[:before] if options[:before]
99
+ @config.after['url'] = options[:after] if options[:after]
100
+
101
+ # Prepare cache.
102
+ cache = SiteDiff::Cache.new(
103
+ create: options[:cached] != 'none',
104
+ directory: @dir
105
+ )
106
+ cache.read_tags << :before if %w[before all].include?(options[:cached])
107
+ cache.read_tags << :after if %w[after all].include?(options[:cached])
108
+ cache.write_tags << :before << :after
109
+
110
+ # Run sitediff.
111
+ sitediff = SiteDiff.new(
112
+ @config,
113
+ cache,
114
+ options[:verbose],
115
+ options[:debug]
116
+ )
117
+ num_failing = sitediff.run
118
+ exit_code = num_failing.positive? ? 2 : 0
119
+
120
+ # Generate HTML report.
121
+ if options[:report_format] == 'html' || @config.export
122
+ sitediff.report.generate_html(
123
+ @dir,
124
+ options[:before_report],
125
+ options[:after_report]
126
+ )
127
+ end
128
+
129
+ # Generate JSON report.
130
+ if options[:report_format] == 'json' && @config.export == false
131
+ sitediff.report.generate_json @dir
132
+ end
133
+
134
+ SiteDiff.log 'Run "sitediff serve" to see a report.' unless options[:export]
135
+ rescue Config::InvalidConfig => e
136
+ SiteDiff.log "Invalid configuration: #{e.message}", :error
137
+ SiteDiff.log e.backtrace, :error if options[:verbose]
138
+ rescue Config::ConfigNotFound => e
139
+ SiteDiff.log "Invalid configuration: #{e.message}", :error
140
+ SiteDiff.log e.backtrace, :error if options[:verbose]
141
+ else # no exception was raised
142
+ # Thor::Error --> exit(1), guaranteed by exit_on_failure?
143
+ # Failing diff --> exit(2), populated above
144
+ exit(exit_code) if options[:cli_mode]
145
+ end
146
+
147
+ ##
148
+ # Crawl the `before` site to determine `paths`.
149
+ def crawl
150
+ # Prepare cache.
151
+ @cache = SiteDiff::Cache.new(
152
+ create: true,
153
+ directory: @dir
154
+ )
155
+ @cache.write_tags << :before << :after
156
+
157
+ # Crawl with Hydra to discover paths.
158
+ hydra = Typhoeus::Hydra.new(
159
+ max_concurrency: @config.setting(:concurrency)
160
+ )
161
+ @paths = {}
162
+ @config.roots.each do |tag, url|
163
+ Crawler.new(
164
+ hydra,
165
+ url,
166
+ @config.setting(:interval),
167
+ @config.setting(:include),
168
+ @config.setting(:exclude),
169
+ @config.setting(:depth),
170
+ @config.curl_opts,
171
+ @debug
172
+ ) do |info|
173
+ SiteDiff.log "Visited #{info.uri}, cached."
174
+ after_crawl(tag, info)
175
+ end
176
+ end
177
+ hydra.run
178
+
179
+ # Write paths to a file.
180
+ @paths = @paths.values.reduce(&:|).to_a.sort
181
+ @config.paths_file_write(@paths)
182
+
183
+ # Log output.
184
+ file = Pathname.new(@dir) + Config::DEFAULT_PATHS_FILENAME
185
+ SiteDiff.log ''
186
+ SiteDiff.log "#{@paths.length} page(s) found."
187
+ SiteDiff.log "Created #{file.expand_path}.", :success, 'done'
188
+ end
189
+
190
+ ##
191
+ # Serves SiteDiff report for accessing in the browser.
192
+ #
193
+ # Calling:
194
+ # api.serve(browse: true, port: 13080)
195
+ def serve(options)
196
+ @cache = Cache.new(directory: @dir)
197
+ @cache.read_tags << :before << :after
198
+
199
+ SiteDiff::Webserver::ResultServer.new(
200
+ options[:port],
201
+ @dir,
202
+ browse: options[:browse],
203
+ cache: @cache,
204
+ config: @config
205
+ ).wait
206
+ rescue SiteDiffException => e
207
+ SiteDiff.log e.message, :error
208
+ SiteDiff.log e.backtrace, :error if options[:verbose]
209
+ end
210
+
211
+ ##
212
+ #
213
+ def store(options)
214
+ # TODO: Figure out how to remove this config.validate call.
215
+ @config.validate(need_before: false)
216
+ @config.paths_file_read
217
+
218
+ @cache = SiteDiff::Cache.new(directory: @dir, create: true)
219
+ @cache.write_tags << :before
220
+
221
+ base = options[:url] || @config.after['url']
222
+ fetcher = SiteDiff::Fetch.new(@cache,
223
+ @config.paths,
224
+ @config.setting(:interval),
225
+ @config.setting(:concurrency),
226
+ get_curl_opts(@config.settings),
227
+ options[:debug],
228
+ before: base)
229
+ fetcher.run do |path, _res|
230
+ SiteDiff.log "Visited #{path}, cached"
231
+ end
232
+ end
233
+
234
+ private
235
+
236
+ ##
237
+ # Ensures that the given directory exists.
238
+ def get_dir(directory)
239
+ # Create the dir. Must go before cache initialization!
240
+ @dir = Pathname.new(directory || '.')
241
+ @dir.mkpath unless @dir.directory?
242
+ @dir.to_s
243
+ end
244
+
245
+ ##
246
+ # Processes a crawled path.
247
+ def after_crawl(tag, info)
248
+ path = UriWrapper.canonicalize(info.relative)
249
+
250
+ # Register the path.
251
+ @paths[tag] = [] unless @paths[tag]
252
+ @paths[tag] << path
253
+
254
+ result = info.read_result
255
+
256
+ # Write result to applicable cache.
257
+ @cache.set(tag, path, result)
258
+ # If single-site, cache "after" as "before".
259
+ @cache.set(:before, path, result) unless @config.roots[:before]
260
+
261
+ # TODO: Restore application of rules.
262
+ # @rules.handle_page(tag, res.content, info.document) if @rules && !res.error
263
+ end
264
+ end
265
+ end
@@ -6,6 +6,8 @@ require 'fileutils'
6
6
  class SiteDiff
7
7
  # SiteDiff Cache Handler.
8
8
  class Cache
9
+ TIMESTAMP_FILE = 'timestamp'
10
+
9
11
  attr_accessor :read_tags, :write_tags
10
12
 
11
13
  ##
@@ -17,6 +19,7 @@ class SiteDiff
17
19
  # They indicate whether we should use the cache for reading or writing.
18
20
  @read_tags = Set.new
19
21
  @write_tags = Set.new
22
+ @timestamp_flag = { before: false, after: false }
20
23
 
21
24
  # The directory used by the cache for storage.
22
25
  @dir = opts[:directory] || '.'
@@ -52,6 +55,7 @@ class SiteDiff
52
55
  def set(tag, path, result)
53
56
  return unless @write_tags.include? tag
54
57
 
58
+ save_timestamp(tag)
55
59
  filename = File.join(
56
60
  @dir,
57
61
  'snapshot',
@@ -102,5 +106,19 @@ class SiteDiff
102
106
  @dir.mkpath unless @dir.directory?
103
107
  @dir.to_s
104
108
  end
109
+
110
+ private
111
+
112
+ def save_timestamp(tag)
113
+ # run once
114
+ return if @timestamp_flag[tag]
115
+
116
+ @timestamp_flag[tag] = true
117
+ cache_dir = File.join(@dir, 'snapshot', tag.to_s)
118
+ if File.exist? cache_dir
119
+ file = File.join(cache_dir, TIMESTAMP_FILE)
120
+ FileUtils.touch(file)
121
+ end
122
+ end
105
123
  end
106
124
  end
@@ -2,16 +2,10 @@
2
2
 
3
3
  require 'thor'
4
4
  require 'sitediff'
5
- require 'sitediff/cache'
6
- require 'sitediff/config'
7
- require 'sitediff/config/creator'
8
- require 'sitediff/config/preset'
9
- require 'sitediff/fetch'
10
- require 'sitediff/webserver/resultserver'
5
+ require 'sitediff/api'
11
6
 
12
7
  class SiteDiff
13
8
  # SiteDiff CLI.
14
- # TODO: Use config.defaults to feed default values for sitediff.yaml params?
15
9
  class Cli < Thor
16
10
  class_option 'directory',
17
11
  type: :string,
@@ -78,7 +72,6 @@ class SiteDiff
78
72
  enum: %w[html json],
79
73
  default: 'html',
80
74
  desc: 'The format in which a report should be generated.'
81
- # TODO: Deprecate the parameters before-report / after-report?
82
75
  option 'before-report',
83
76
  type: :string,
84
77
  desc: 'URL to use in reports. Useful if port forwarding.',
@@ -107,82 +100,31 @@ class SiteDiff
107
100
  ##
108
101
  # Computes diffs.
109
102
  def diff(config_file = nil)
110
- @dir = get_dir(options['directory'])
111
- config = SiteDiff::Config.new(config_file, @dir)
112
-
113
103
  # Determine "paths" override based on options.
114
104
  if options['paths'] && options['paths-file']
115
105
  SiteDiff.log "Can't specify both --paths-file and --paths.", :error
116
106
  exit(-1)
117
107
  end
118
108
 
119
- # Ignore whitespace option.
120
- config.ignore_whitespace = options['ignore-whitespace'] if options['ignore-whitespace']
121
-
122
- # Export report option.
123
- config.export = options['export']
124
-
125
- # Apply "paths" override, if any.
126
- config.paths = options['paths'] if options['paths']
127
-
128
- # Determine and apply "paths-file", if "paths" is not specified.
129
- unless options['paths']
130
- paths_file = options['paths-file']
131
- paths_file ||= File.join(@dir, Config::DEFAULT_PATHS_FILENAME)
132
- paths_file = File.expand_path(paths_file)
133
-
134
- paths_count = config.paths_file_read(paths_file)
135
- SiteDiff.log "Read #{paths_count} paths from: #{paths_file}"
136
- end
137
-
138
- # TODO: Why do we allow before and after override during diff?
139
- config.before['url'] = options['before'] if options['before']
140
- config.after['url'] = options['after'] if options['after']
141
-
142
- # Prepare cache.
143
- cache = SiteDiff::Cache.new(
144
- create: options['cached'] != 'none',
145
- directory: @dir
146
- )
147
- cache.read_tags << :before if %w[before all].include?(options['cached'])
148
- cache.read_tags << :after if %w[after all].include?(options['cached'])
149
- cache.write_tags << :before << :after
150
-
151
- # Run sitediff.
152
- sitediff = SiteDiff.new(
153
- config,
154
- cache,
155
- options['verbose'],
156
- options[:debug]
157
- )
158
- num_failing = sitediff.run
159
- exit_code = num_failing.positive? ? 2 : 0
160
-
161
- # Generate HTML report.
162
- if options['report-format'] == 'html' || config.export
163
- sitediff.report.generate_html(
164
- @dir,
165
- options['before-report'],
166
- options['after-report']
109
+ api = Api.new(options['directory'], config_file)
110
+ api_options =
111
+ clean_keys(
112
+ options,
113
+ :paths,
114
+ :paths_file,
115
+ :ignore_whitespace,
116
+ :export,
117
+ :before,
118
+ :after,
119
+ :cached,
120
+ :verbose,
121
+ :debug,
122
+ :report_format,
123
+ :before_report,
124
+ :after_report
167
125
  )
168
- end
169
-
170
- # Generate JSON report.
171
- if options['report-format'] == 'json' && config.export == false
172
- sitediff.report.generate_json @dir
173
- end
174
-
175
- SiteDiff.log 'Run "sitediff serve" to see a report.' unless options['export']
176
- rescue Config::InvalidConfig => e
177
- SiteDiff.log "Invalid configuration: #{e.message}", :error
178
- SiteDiff.log e.backtrace, :error if options[:verbose]
179
- rescue Config::ConfigNotFound => e
180
- SiteDiff.log "Invalid configuration: #{e.message}", :error
181
- SiteDiff.log e.backtrace, :error if options[:verbose]
182
- else # no exception was raised
183
- # Thor::Error --> exit(1), guaranteed by exit_on_failure?
184
- # Failing diff --> exit(2), populated above
185
- exit(exit_code)
126
+ api_options[:cli_mode] = true
127
+ api.diff(api_options)
186
128
  end
187
129
 
188
130
  option :port,
@@ -198,22 +140,9 @@ class SiteDiff
198
140
  ##
199
141
  # Serves SiteDiff report for accessing in the browser.
200
142
  def serve(config_file = nil)
201
- @dir = get_dir(options['directory'])
202
- config = SiteDiff::Config.new(config_file, @dir)
203
-
204
- cache = Cache.new(directory: @dir)
205
- cache.read_tags << :before << :after
206
-
207
- SiteDiff::Webserver::ResultServer.new(
208
- options[:port],
209
- options['directory'],
210
- browse: options[:browse],
211
- cache: cache,
212
- config: config
213
- ).wait
214
- rescue SiteDiffException => e
215
- SiteDiff.log e.message, :error
216
- SiteDiff.log e.backtrace, :error if options[:verbose]
143
+ api = Api.new(options['directory'], config_file)
144
+ api_options = clean_keys(options, :browse, :port)
145
+ api.serve(api_options)
217
146
  end
218
147
 
219
148
  option :depth,
@@ -236,19 +165,14 @@ class SiteDiff
236
165
  type: :numeric,
237
166
  default: Config::DEFAULT_CONFIG['settings']['interval'],
238
167
  desc: 'Crawling delay - interval in milliseconds.'
239
- option :whitelist,
168
+ option :include,
240
169
  type: :string,
241
- default: Config::DEFAULT_CONFIG['settings']['whitelist'],
242
- desc: 'Optional whitelist for crawling.'
243
- option :blacklist,
170
+ default: Config::DEFAULT_CONFIG['settings']['include'],
171
+ desc: 'Optional URL include regex for crawling.'
172
+ option :exclude,
244
173
  type: :string,
245
- default: Config::DEFAULT_CONFIG['settings']['blacklist'],
246
- desc: 'Optional blacklist for crawling.'
247
- # TODO: Remove this option. Always ignore SSL errors.
248
- option :insecure,
249
- type: :boolean,
250
- default: false,
251
- desc: 'Ignore many HTTPS/SSL errors'
174
+ default: Config::DEFAULT_CONFIG['settings']['exclude'],
175
+ desc: 'Optional URL exclude regex for crawling.'
252
176
  option :curl_options,
253
177
  type: :hash,
254
178
  default: {},
@@ -261,29 +185,26 @@ class SiteDiff
261
185
  SiteDiff.log 'sitediff init requires one or two URLs', :error
262
186
  exit(2)
263
187
  end
264
-
265
- # Prepare a config object and write it to the file system.
266
- @dir = get_dir(options['directory'])
267
- creator = SiteDiff::Config::Creator.new(options[:debug], *urls)
268
- creator.create(
269
- depth: options[:depth],
270
- directory: @dir,
271
- concurrency: options[:concurrency],
272
- interval: options[:interval],
273
- whitelist: Config.create_regexp(options['whitelist']),
274
- blacklist: Config.create_regexp(options['blacklist']),
275
- preset: options[:preset],
276
- curl_opts: get_curl_opts(options)
277
- )
278
- SiteDiff.log "Created #{creator.config_file.expand_path}", :success
279
-
280
- # Discover paths, if enabled.
281
- if options[:crawl]
282
- crawl(creator.config_file)
283
- SiteDiff.log 'You can now run "sitediff diff".', :success
284
- else
285
- SiteDiff.log 'Run "sitediff crawl" to discover paths. You should then be able to run "sitediff diff".', :info
286
- end
188
+ api_options =
189
+ clean_keys(
190
+ options,
191
+ :depth,
192
+ :concurrency,
193
+ :interval,
194
+ :include,
195
+ :exclude,
196
+ :preset,
197
+ :crawl
198
+ )
199
+ .merge(
200
+ {
201
+ after_url: urls.pop,
202
+ before_url: urls.pop, # may be nil
203
+ directory: get_dir(options['directory']),
204
+ curl_opts: get_curl_opts(options)
205
+ }
206
+ )
207
+ Api.init(api_options)
287
208
  end
288
209
 
289
210
  option :url,
@@ -294,26 +215,9 @@ class SiteDiff
294
215
  ##
295
216
  # Caches the current version of the site.
296
217
  def store(config_file = nil)
297
- @dir = get_dir(options['directory'])
298
- config = SiteDiff::Config.new(config_file, @dir)
299
- # TODO: Figure out how to remove this config.validate call.
300
- config.validate(need_before: false)
301
- config.paths_file_read
302
-
303
- cache = SiteDiff::Cache.new(directory: @dir, create: true)
304
- cache.write_tags << :before
305
-
306
- base = options[:url] || config.after['url']
307
- fetcher = SiteDiff::Fetch.new(cache,
308
- config.paths,
309
- config.setting(:interval),
310
- config.setting(:concurrency),
311
- get_curl_opts(config.settings),
312
- options[:debug],
313
- before: base)
314
- fetcher.run do |path, _res|
315
- SiteDiff.log "Visited #{path}, cached"
316
- end
218
+ api = Api.new(options['directory'], config_file)
219
+ api_options = clean_keys(options, :url, :debug)
220
+ api.store(api_options)
317
221
  end
318
222
 
319
223
  desc 'crawl [CONFIG-FILE]',
@@ -321,58 +225,15 @@ class SiteDiff
321
225
  ##
322
226
  # Crawls the "before" site to determine "paths".
323
227
  #
324
- # TODO: Move actual crawling to sitediff.crawl(config).
325
- # TODO: Switch to paths = sitediff.crawl().
326
228
  def crawl(config_file = nil)
327
- # Prepare configuration.
328
- @dir = get_dir(options['directory'])
329
- @config = SiteDiff::Config.new(config_file, @dir)
330
-
331
- # Prepare cache.
332
- @cache = SiteDiff::Cache.new(
333
- create: options['cached'] != 'none',
334
- directory: @dir
335
- )
336
- @cache.write_tags << :before << :after
337
-
338
- # Crawl with Hydra to discover paths.
339
- hydra = Typhoeus::Hydra.new(
340
- max_concurrency: @config.setting(:concurrency)
341
- )
342
- @paths = {}
343
- @config.roots.each do |tag, url|
344
- Crawler.new(
345
- hydra,
346
- url,
347
- @config.setting(:interval),
348
- @config.setting(:whitelist),
349
- @config.setting(:blacklist),
350
- @config.setting(:depth),
351
- get_curl_opts(@config.settings),
352
- @debug
353
- ) do |info|
354
- SiteDiff.log "Visited #{info.uri}, cached."
355
- after_crawl(tag, info)
356
- end
357
- end
358
- hydra.run
359
-
360
- # Write paths to a file.
361
- @paths = @paths.values.reduce(&:|).to_a.sort
362
- @config.paths_file_write(@paths)
363
-
364
- # Log output.
365
- file = Pathname.new(@dir) + Config::DEFAULT_PATHS_FILENAME
366
- SiteDiff.log ''
367
- SiteDiff.log "#{@paths.length} page(s) found."
368
- SiteDiff.log "Created #{file.expand_path}.", :success, 'done'
229
+ api = Api.new(options['directory'], config_file)
230
+ api.crawl
369
231
  end
370
232
 
371
233
  no_commands do
372
234
  # Generates CURL options.
373
235
  #
374
- # TODO: This should be in the config class instead.
375
- # TODO: Make all requests insecure and avoid custom curl-opts.
236
+ # TODO: Possibly move to API class.
376
237
  def get_curl_opts(options)
377
238
  # We do want string keys here
378
239
  bool_hash = { 'true' => true, 'false' => false }
@@ -381,10 +242,6 @@ class SiteDiff
381
242
  .merge(options['curl_options'] || {})
382
243
  .merge(options['curl_opts'] || {})
383
244
  curl_opts.each { |k, v| curl_opts[k] = bool_hash.fetch(v, v) }
384
- if options[:insecure]
385
- curl_opts[:ssl_verifypeer] = false
386
- curl_opts[:ssl_verifyhost] = 0
387
- end
388
245
  curl_opts
389
246
  end
390
247
 
@@ -398,23 +255,10 @@ class SiteDiff
398
255
  end
399
256
 
400
257
  ##
401
- # Processes a crawled path.
402
- def after_crawl(tag, info)
403
- path = UriWrapper.canonicalize(info.relative)
404
-
405
- # Register the path.
406
- @paths[tag] = [] unless @paths[tag]
407
- @paths[tag] << path
408
-
409
- result = info.read_result
410
-
411
- # Write result to applicable cache.
412
- @cache.set(tag, path, result)
413
- # If single-site, cache "after" as "before".
414
- @cache.set(:before, path, result) unless @config.roots[:before]
415
-
416
- # TODO: Restore application of rules.
417
- # @rules.handle_page(tag, res.content, info.document) if @rules && !res.error
258
+ # Clean keys - return a subset of a hash with keys as symbols.
259
+ def clean_keys(hash, *keys)
260
+ new_hash = hash.transform_keys { |k| k.tr('-', '_').to_sym }
261
+ new_hash.slice(*keys)
418
262
  end
419
263
  end
420
264
  end