sitediff 1.0.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1b7854497b5e81f48d810acec8106cbc66e33492d046e032e5516f76db26f142
4
- data.tar.gz: a9349a79953237dd017600d49d38b8e734afc561f0ce09a1f8732e0e933530c9
3
+ metadata.gz: f75892f718764c8fd2c18d7f3f7e7cf8908d60ea07c2a765510c8ef409b9f0c1
4
+ data.tar.gz: 3b3744eca0dda04821152aab596fb67891204a1599b4db72e13b4af484693e65
5
5
  SHA512:
6
- metadata.gz: 0e91f665f3c59b8a65f16c6942ec49b9cc9ee7fd12b0a777eadb844a0b9819ab1fd9485495bf2c757ca7342a6198dcccb5ae546c4ddf2682f234d015b64309b2
7
- data.tar.gz: 64b7980bbbade8710b6069af19a67083678c2bd5fa99674df3360c1c6a3ddf8a15de7c5be4e8349ec298fc1c0dc27535b816089cd4f8852b8c8633861d72a178
6
+ metadata.gz: 97e9098b290742f1b3efe3c284e9392be95ffd0f7576df413a6ec612142b0573acf8b8b4d43369961c154d801db6284fcc1a8d69cea7da8ed99b64a0a1f1af75
7
+ data.tar.gz: c4b0e93bc4e0acb3d675c8d675d8f6235035aae72421794495f25223cb086eaa4c87d2cde63caa0eda257b0d91f374a0efbbb416ef8ee88c2f0ffde89a608831
@@ -108,7 +108,9 @@ class SiteDiff
108
108
  encoding = read_results[tag].encoding
109
109
  if encoding || html.length.positive?
110
110
  section = @config.send(tag, true)
111
- Sanitizer.new(html, section, path: path).sanitize
111
+ opts = { path: path }
112
+ opts[:output] = @config.output if @config.output
113
+ Sanitizer.new(html, section, opts).sanitize
112
114
  else
113
115
  html
114
116
  end
@@ -0,0 +1,265 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sitediff'
4
+ require 'sitediff/cache'
5
+ require 'sitediff/config'
6
+ require 'sitediff/config/creator'
7
+ require 'sitediff/config/preset'
8
+ require 'sitediff/fetch'
9
+ require 'sitediff/webserver/resultserver'
10
+
11
+ class SiteDiff
12
+ ##
13
+ # Sitediff API interface.
14
+ class Api
15
+ ##
16
+ # Initializes new Api object.
17
+ def initialize(directory, config_file = nil)
18
+ @dir = get_dir(directory)
19
+ @config = SiteDiff::Config.new(config_file, @dir)
20
+ end
21
+
22
+ ##
23
+ # Intialize a SiteDiff project.
24
+ #
25
+ # Calling:
26
+ # SiteDiff::Api.init(
27
+ # depth: 3,
28
+ # directory: 'sitediff',
29
+ # concurrency: 3,
30
+ # interval: 0,
31
+ # include: nil,
32
+ # exclude: '*.pdf',
33
+ # preset: 'drupal',
34
+ # curl_opts: {timeout: 60},
35
+ # crawl: false
36
+ # )
37
+ def self.init(options)
38
+ # Prepare a config object and write it to the file system.
39
+ creator = SiteDiff::Config::Creator.new(options[:debug], options[:before_url], options[:after_url])
40
+ include_regex = Config.create_regexp(options[:include])
41
+ exclude_regex = Config.create_regexp(options[:exclude])
42
+ creator.create(
43
+ depth: options[:depth],
44
+ directory: options[:directory],
45
+ concurrency: options[:concurrency],
46
+ interval: options[:interval],
47
+ include: include_regex,
48
+ exclude: exclude_regex,
49
+ preset: options[:preset],
50
+ curl_opts: options[:curl_opts]
51
+ )
52
+ SiteDiff.log "Created #{creator.config_file.expand_path}", :success
53
+
54
+ # TODO: implement crawl ^^^
55
+ # Discover paths, if enabled.
56
+ # if options[:crawl]
57
+ # crawl(creator.config_file)
58
+ # SiteDiff.log 'You can now run "sitediff diff".', :success
59
+ # else
60
+ # SiteDiff.log 'Run "sitediff crawl" to discover paths. You should then be able to run "sitediff diff".', :info
61
+ # end
62
+ end
63
+
64
+ ##
65
+ # Diff the `before` and `after`.
66
+ #
67
+ # Calling:
68
+ # Api.diff(
69
+ # paths: options['paths'],
70
+ # paths_file: options['paths-file'],
71
+ # ignore_whitespace: options['ignore-whitespace'],
72
+ # export: options['export'],
73
+ # before: options['before'],
74
+ # after: options['after'],
75
+ # cached: options['cached'],
76
+ # verbose: options['verbose'],
77
+ # report_format: options['report-format'],
78
+ # before_report: options['before-report'],
79
+ # after_report: options['after-report'],
80
+ # cli_mode: false
81
+ # )
82
+ def diff(options)
83
+ @config.ignore_whitespace = options[:ignore_whitespace]
84
+ @config.export = options[:export]
85
+ # Apply "paths" override, if any.
86
+ if options[:paths]
87
+ @config.paths = options[:paths]
88
+ else
89
+ paths_file = options[:paths_file]
90
+ paths_file ||= File.join(@dir, Config::DEFAULT_PATHS_FILENAME)
91
+ paths_file = File.expand_path(paths_file)
92
+
93
+ paths_count = @config.paths_file_read(paths_file)
94
+ SiteDiff.log "Read #{paths_count} paths from: #{paths_file}"
95
+ end
96
+
97
+ # TODO: Why do we allow before and after override during diff?
98
+ @config.before['url'] = options[:before] if options[:before]
99
+ @config.after['url'] = options[:after] if options[:after]
100
+
101
+ # Prepare cache.
102
+ cache = SiteDiff::Cache.new(
103
+ create: options[:cached] != 'none',
104
+ directory: @dir
105
+ )
106
+ cache.read_tags << :before if %w[before all].include?(options[:cached])
107
+ cache.read_tags << :after if %w[after all].include?(options[:cached])
108
+ cache.write_tags << :before << :after
109
+
110
+ # Run sitediff.
111
+ sitediff = SiteDiff.new(
112
+ @config,
113
+ cache,
114
+ options[:verbose],
115
+ options[:debug]
116
+ )
117
+ num_failing = sitediff.run
118
+ exit_code = num_failing.positive? ? 2 : 0
119
+
120
+ # Generate HTML report.
121
+ if options[:report_format] == 'html' || @config.export
122
+ sitediff.report.generate_html(
123
+ @dir,
124
+ options[:before_report],
125
+ options[:after_report]
126
+ )
127
+ end
128
+
129
+ # Generate JSON report.
130
+ if options[:report_format] == 'json' && @config.export == false
131
+ sitediff.report.generate_json @dir
132
+ end
133
+
134
+ SiteDiff.log 'Run "sitediff serve" to see a report.' unless options[:export]
135
+ rescue Config::InvalidConfig => e
136
+ SiteDiff.log "Invalid configuration: #{e.message}", :error
137
+ SiteDiff.log e.backtrace, :error if options[:verbose]
138
+ rescue Config::ConfigNotFound => e
139
+ SiteDiff.log "Invalid configuration: #{e.message}", :error
140
+ SiteDiff.log e.backtrace, :error if options[:verbose]
141
+ else # no exception was raised
142
+ # Thor::Error --> exit(1), guaranteed by exit_on_failure?
143
+ # Failing diff --> exit(2), populated above
144
+ exit(exit_code) if options[:cli_mode]
145
+ end
146
+
147
+ ##
148
+ # Crawl the `before` site to determine `paths`.
149
+ def crawl
150
+ # Prepare cache.
151
+ @cache = SiteDiff::Cache.new(
152
+ create: true,
153
+ directory: @dir
154
+ )
155
+ @cache.write_tags << :before << :after
156
+
157
+ # Crawl with Hydra to discover paths.
158
+ hydra = Typhoeus::Hydra.new(
159
+ max_concurrency: @config.setting(:concurrency)
160
+ )
161
+ @paths = {}
162
+ @config.roots.each do |tag, url|
163
+ Crawler.new(
164
+ hydra,
165
+ url,
166
+ @config.setting(:interval),
167
+ @config.setting(:include),
168
+ @config.setting(:exclude),
169
+ @config.setting(:depth),
170
+ @config.curl_opts,
171
+ @debug
172
+ ) do |info|
173
+ SiteDiff.log "Visited #{info.uri}, cached."
174
+ after_crawl(tag, info)
175
+ end
176
+ end
177
+ hydra.run
178
+
179
+ # Write paths to a file.
180
+ @paths = @paths.values.reduce(&:|).to_a.sort
181
+ @config.paths_file_write(@paths)
182
+
183
+ # Log output.
184
+ file = Pathname.new(@dir) + Config::DEFAULT_PATHS_FILENAME
185
+ SiteDiff.log ''
186
+ SiteDiff.log "#{@paths.length} page(s) found."
187
+ SiteDiff.log "Created #{file.expand_path}.", :success, 'done'
188
+ end
189
+
190
+ ##
191
+ # Serves SiteDiff report for accessing in the browser.
192
+ #
193
+ # Calling:
194
+ # api.serve(browse: true, port: 13080)
195
+ def serve(options)
196
+ @cache = Cache.new(directory: @dir)
197
+ @cache.read_tags << :before << :after
198
+
199
+ SiteDiff::Webserver::ResultServer.new(
200
+ options[:port],
201
+ @dir,
202
+ browse: options[:browse],
203
+ cache: @cache,
204
+ config: @config
205
+ ).wait
206
+ rescue SiteDiffException => e
207
+ SiteDiff.log e.message, :error
208
+ SiteDiff.log e.backtrace, :error if options[:verbose]
209
+ end
210
+
211
+ ##
212
+ #
213
+ def store(options)
214
+ # TODO: Figure out how to remove this config.validate call.
215
+ @config.validate(need_before: false)
216
+ @config.paths_file_read
217
+
218
+ @cache = SiteDiff::Cache.new(directory: @dir, create: true)
219
+ @cache.write_tags << :before
220
+
221
+ base = options[:url] || @config.after['url']
222
+ fetcher = SiteDiff::Fetch.new(@cache,
223
+ @config.paths,
224
+ @config.setting(:interval),
225
+ @config.setting(:concurrency),
226
+ get_curl_opts(@config.settings),
227
+ options[:debug],
228
+ before: base)
229
+ fetcher.run do |path, _res|
230
+ SiteDiff.log "Visited #{path}, cached"
231
+ end
232
+ end
233
+
234
+ private
235
+
236
+ ##
237
+ # Ensures that the given directory exists.
238
+ def get_dir(directory)
239
+ # Create the dir. Must go before cache initialization!
240
+ @dir = Pathname.new(directory || '.')
241
+ @dir.mkpath unless @dir.directory?
242
+ @dir.to_s
243
+ end
244
+
245
+ ##
246
+ # Processes a crawled path.
247
+ def after_crawl(tag, info)
248
+ path = UriWrapper.canonicalize(info.relative)
249
+
250
+ # Register the path.
251
+ @paths[tag] = [] unless @paths[tag]
252
+ @paths[tag] << path
253
+
254
+ result = info.read_result
255
+
256
+ # Write result to applicable cache.
257
+ @cache.set(tag, path, result)
258
+ # If single-site, cache "after" as "before".
259
+ @cache.set(:before, path, result) unless @config.roots[:before]
260
+
261
+ # TODO: Restore application of rules.
262
+ # @rules.handle_page(tag, res.content, info.document) if @rules && !res.error
263
+ end
264
+ end
265
+ end
@@ -6,6 +6,8 @@ require 'fileutils'
6
6
  class SiteDiff
7
7
  # SiteDiff Cache Handler.
8
8
  class Cache
9
+ TIMESTAMP_FILE = 'timestamp'
10
+
9
11
  attr_accessor :read_tags, :write_tags
10
12
 
11
13
  ##
@@ -17,6 +19,7 @@ class SiteDiff
17
19
  # They indicate whether we should use the cache for reading or writing.
18
20
  @read_tags = Set.new
19
21
  @write_tags = Set.new
22
+ @timestamp_flag = { before: false, after: false }
20
23
 
21
24
  # The directory used by the cache for storage.
22
25
  @dir = opts[:directory] || '.'
@@ -52,6 +55,7 @@ class SiteDiff
52
55
  def set(tag, path, result)
53
56
  return unless @write_tags.include? tag
54
57
 
58
+ save_timestamp(tag)
55
59
  filename = File.join(
56
60
  @dir,
57
61
  'snapshot',
@@ -102,5 +106,19 @@ class SiteDiff
102
106
  @dir.mkpath unless @dir.directory?
103
107
  @dir.to_s
104
108
  end
109
+
110
+ private
111
+
112
+ def save_timestamp(tag)
113
+ # run once
114
+ return if @timestamp_flag[tag]
115
+
116
+ @timestamp_flag[tag] = true
117
+ cache_dir = File.join(@dir, 'snapshot', tag.to_s)
118
+ if File.exist? cache_dir
119
+ file = File.join(cache_dir, TIMESTAMP_FILE)
120
+ FileUtils.touch(file)
121
+ end
122
+ end
105
123
  end
106
124
  end
@@ -2,16 +2,10 @@
2
2
 
3
3
  require 'thor'
4
4
  require 'sitediff'
5
- require 'sitediff/cache'
6
- require 'sitediff/config'
7
- require 'sitediff/config/creator'
8
- require 'sitediff/config/preset'
9
- require 'sitediff/fetch'
10
- require 'sitediff/webserver/resultserver'
5
+ require 'sitediff/api'
11
6
 
12
7
  class SiteDiff
13
8
  # SiteDiff CLI.
14
- # TODO: Use config.defaults to feed default values for sitediff.yaml params?
15
9
  class Cli < Thor
16
10
  class_option 'directory',
17
11
  type: :string,
@@ -78,7 +72,6 @@ class SiteDiff
78
72
  enum: %w[html json],
79
73
  default: 'html',
80
74
  desc: 'The format in which a report should be generated.'
81
- # TODO: Deprecate the parameters before-report / after-report?
82
75
  option 'before-report',
83
76
  type: :string,
84
77
  desc: 'URL to use in reports. Useful if port forwarding.',
@@ -107,82 +100,31 @@ class SiteDiff
107
100
  ##
108
101
  # Computes diffs.
109
102
  def diff(config_file = nil)
110
- @dir = get_dir(options['directory'])
111
- config = SiteDiff::Config.new(config_file, @dir)
112
-
113
103
  # Determine "paths" override based on options.
114
104
  if options['paths'] && options['paths-file']
115
105
  SiteDiff.log "Can't specify both --paths-file and --paths.", :error
116
106
  exit(-1)
117
107
  end
118
108
 
119
- # Ignore whitespace option.
120
- config.ignore_whitespace = options['ignore-whitespace'] if options['ignore-whitespace']
121
-
122
- # Export report option.
123
- config.export = options['export']
124
-
125
- # Apply "paths" override, if any.
126
- config.paths = options['paths'] if options['paths']
127
-
128
- # Determine and apply "paths-file", if "paths" is not specified.
129
- unless options['paths']
130
- paths_file = options['paths-file']
131
- paths_file ||= File.join(@dir, Config::DEFAULT_PATHS_FILENAME)
132
- paths_file = File.expand_path(paths_file)
133
-
134
- paths_count = config.paths_file_read(paths_file)
135
- SiteDiff.log "Read #{paths_count} paths from: #{paths_file}"
136
- end
137
-
138
- # TODO: Why do we allow before and after override during diff?
139
- config.before['url'] = options['before'] if options['before']
140
- config.after['url'] = options['after'] if options['after']
141
-
142
- # Prepare cache.
143
- cache = SiteDiff::Cache.new(
144
- create: options['cached'] != 'none',
145
- directory: @dir
146
- )
147
- cache.read_tags << :before if %w[before all].include?(options['cached'])
148
- cache.read_tags << :after if %w[after all].include?(options['cached'])
149
- cache.write_tags << :before << :after
150
-
151
- # Run sitediff.
152
- sitediff = SiteDiff.new(
153
- config,
154
- cache,
155
- options['verbose'],
156
- options[:debug]
157
- )
158
- num_failing = sitediff.run
159
- exit_code = num_failing.positive? ? 2 : 0
160
-
161
- # Generate HTML report.
162
- if options['report-format'] == 'html' || config.export
163
- sitediff.report.generate_html(
164
- @dir,
165
- options['before-report'],
166
- options['after-report']
109
+ api = Api.new(options['directory'], config_file)
110
+ api_options =
111
+ clean_keys(
112
+ options,
113
+ :paths,
114
+ :paths_file,
115
+ :ignore_whitespace,
116
+ :export,
117
+ :before,
118
+ :after,
119
+ :cached,
120
+ :verbose,
121
+ :debug,
122
+ :report_format,
123
+ :before_report,
124
+ :after_report
167
125
  )
168
- end
169
-
170
- # Generate JSON report.
171
- if options['report-format'] == 'json' && config.export == false
172
- sitediff.report.generate_json @dir
173
- end
174
-
175
- SiteDiff.log 'Run "sitediff serve" to see a report.' unless options['export']
176
- rescue Config::InvalidConfig => e
177
- SiteDiff.log "Invalid configuration: #{e.message}", :error
178
- SiteDiff.log e.backtrace, :error if options[:verbose]
179
- rescue Config::ConfigNotFound => e
180
- SiteDiff.log "Invalid configuration: #{e.message}", :error
181
- SiteDiff.log e.backtrace, :error if options[:verbose]
182
- else # no exception was raised
183
- # Thor::Error --> exit(1), guaranteed by exit_on_failure?
184
- # Failing diff --> exit(2), populated above
185
- exit(exit_code)
126
+ api_options[:cli_mode] = true
127
+ api.diff(api_options)
186
128
  end
187
129
 
188
130
  option :port,
@@ -198,22 +140,9 @@ class SiteDiff
198
140
  ##
199
141
  # Serves SiteDiff report for accessing in the browser.
200
142
  def serve(config_file = nil)
201
- @dir = get_dir(options['directory'])
202
- config = SiteDiff::Config.new(config_file, @dir)
203
-
204
- cache = Cache.new(directory: @dir)
205
- cache.read_tags << :before << :after
206
-
207
- SiteDiff::Webserver::ResultServer.new(
208
- options[:port],
209
- options['directory'],
210
- browse: options[:browse],
211
- cache: cache,
212
- config: config
213
- ).wait
214
- rescue SiteDiffException => e
215
- SiteDiff.log e.message, :error
216
- SiteDiff.log e.backtrace, :error if options[:verbose]
143
+ api = Api.new(options['directory'], config_file)
144
+ api_options = clean_keys(options, :browse, :port)
145
+ api.serve(api_options)
217
146
  end
218
147
 
219
148
  option :depth,
@@ -236,19 +165,14 @@ class SiteDiff
236
165
  type: :numeric,
237
166
  default: Config::DEFAULT_CONFIG['settings']['interval'],
238
167
  desc: 'Crawling delay - interval in milliseconds.'
239
- option :whitelist,
168
+ option :include,
240
169
  type: :string,
241
- default: Config::DEFAULT_CONFIG['settings']['whitelist'],
242
- desc: 'Optional whitelist for crawling.'
243
- option :blacklist,
170
+ default: Config::DEFAULT_CONFIG['settings']['include'],
171
+ desc: 'Optional URL include regex for crawling.'
172
+ option :exclude,
244
173
  type: :string,
245
- default: Config::DEFAULT_CONFIG['settings']['blacklist'],
246
- desc: 'Optional blacklist for crawling.'
247
- # TODO: Remove this option. Always ignore SSL errors.
248
- option :insecure,
249
- type: :boolean,
250
- default: false,
251
- desc: 'Ignore many HTTPS/SSL errors'
174
+ default: Config::DEFAULT_CONFIG['settings']['exclude'],
175
+ desc: 'Optional URL exclude regex for crawling.'
252
176
  option :curl_options,
253
177
  type: :hash,
254
178
  default: {},
@@ -261,29 +185,26 @@ class SiteDiff
261
185
  SiteDiff.log 'sitediff init requires one or two URLs', :error
262
186
  exit(2)
263
187
  end
264
-
265
- # Prepare a config object and write it to the file system.
266
- @dir = get_dir(options['directory'])
267
- creator = SiteDiff::Config::Creator.new(options[:debug], *urls)
268
- creator.create(
269
- depth: options[:depth],
270
- directory: @dir,
271
- concurrency: options[:concurrency],
272
- interval: options[:interval],
273
- whitelist: Config.create_regexp(options['whitelist']),
274
- blacklist: Config.create_regexp(options['blacklist']),
275
- preset: options[:preset],
276
- curl_opts: get_curl_opts(options)
277
- )
278
- SiteDiff.log "Created #{creator.config_file.expand_path}", :success
279
-
280
- # Discover paths, if enabled.
281
- if options[:crawl]
282
- crawl(creator.config_file)
283
- SiteDiff.log 'You can now run "sitediff diff".', :success
284
- else
285
- SiteDiff.log 'Run "sitediff crawl" to discover paths. You should then be able to run "sitediff diff".', :info
286
- end
188
+ api_options =
189
+ clean_keys(
190
+ options,
191
+ :depth,
192
+ :concurrency,
193
+ :interval,
194
+ :include,
195
+ :exclude,
196
+ :preset,
197
+ :crawl
198
+ )
199
+ .merge(
200
+ {
201
+ after_url: urls.pop,
202
+ before_url: urls.pop, # may be nil
203
+ directory: get_dir(options['directory']),
204
+ curl_opts: get_curl_opts(options)
205
+ }
206
+ )
207
+ Api.init(api_options)
287
208
  end
288
209
 
289
210
  option :url,
@@ -294,26 +215,9 @@ class SiteDiff
294
215
  ##
295
216
  # Caches the current version of the site.
296
217
  def store(config_file = nil)
297
- @dir = get_dir(options['directory'])
298
- config = SiteDiff::Config.new(config_file, @dir)
299
- # TODO: Figure out how to remove this config.validate call.
300
- config.validate(need_before: false)
301
- config.paths_file_read
302
-
303
- cache = SiteDiff::Cache.new(directory: @dir, create: true)
304
- cache.write_tags << :before
305
-
306
- base = options[:url] || config.after['url']
307
- fetcher = SiteDiff::Fetch.new(cache,
308
- config.paths,
309
- config.setting(:interval),
310
- config.setting(:concurrency),
311
- get_curl_opts(config.settings),
312
- options[:debug],
313
- before: base)
314
- fetcher.run do |path, _res|
315
- SiteDiff.log "Visited #{path}, cached"
316
- end
218
+ api = Api.new(options['directory'], config_file)
219
+ api_options = clean_keys(options, :url, :debug)
220
+ api.store(api_options)
317
221
  end
318
222
 
319
223
  desc 'crawl [CONFIG-FILE]',
@@ -321,58 +225,15 @@ class SiteDiff
321
225
  ##
322
226
  # Crawls the "before" site to determine "paths".
323
227
  #
324
- # TODO: Move actual crawling to sitediff.crawl(config).
325
- # TODO: Switch to paths = sitediff.crawl().
326
228
  def crawl(config_file = nil)
327
- # Prepare configuration.
328
- @dir = get_dir(options['directory'])
329
- @config = SiteDiff::Config.new(config_file, @dir)
330
-
331
- # Prepare cache.
332
- @cache = SiteDiff::Cache.new(
333
- create: options['cached'] != 'none',
334
- directory: @dir
335
- )
336
- @cache.write_tags << :before << :after
337
-
338
- # Crawl with Hydra to discover paths.
339
- hydra = Typhoeus::Hydra.new(
340
- max_concurrency: @config.setting(:concurrency)
341
- )
342
- @paths = {}
343
- @config.roots.each do |tag, url|
344
- Crawler.new(
345
- hydra,
346
- url,
347
- @config.setting(:interval),
348
- @config.setting(:whitelist),
349
- @config.setting(:blacklist),
350
- @config.setting(:depth),
351
- get_curl_opts(@config.settings),
352
- @debug
353
- ) do |info|
354
- SiteDiff.log "Visited #{info.uri}, cached."
355
- after_crawl(tag, info)
356
- end
357
- end
358
- hydra.run
359
-
360
- # Write paths to a file.
361
- @paths = @paths.values.reduce(&:|).to_a.sort
362
- @config.paths_file_write(@paths)
363
-
364
- # Log output.
365
- file = Pathname.new(@dir) + Config::DEFAULT_PATHS_FILENAME
366
- SiteDiff.log ''
367
- SiteDiff.log "#{@paths.length} page(s) found."
368
- SiteDiff.log "Created #{file.expand_path}.", :success, 'done'
229
+ api = Api.new(options['directory'], config_file)
230
+ api.crawl
369
231
  end
370
232
 
371
233
  no_commands do
372
234
  # Generates CURL options.
373
235
  #
374
- # TODO: This should be in the config class instead.
375
- # TODO: Make all requests insecure and avoid custom curl-opts.
236
+ # TODO: Possibly move to API class.
376
237
  def get_curl_opts(options)
377
238
  # We do want string keys here
378
239
  bool_hash = { 'true' => true, 'false' => false }
@@ -381,10 +242,6 @@ class SiteDiff
381
242
  .merge(options['curl_options'] || {})
382
243
  .merge(options['curl_opts'] || {})
383
244
  curl_opts.each { |k, v| curl_opts[k] = bool_hash.fetch(v, v) }
384
- if options[:insecure]
385
- curl_opts[:ssl_verifypeer] = false
386
- curl_opts[:ssl_verifyhost] = 0
387
- end
388
245
  curl_opts
389
246
  end
390
247
 
@@ -398,23 +255,10 @@ class SiteDiff
398
255
  end
399
256
 
400
257
  ##
401
- # Processes a crawled path.
402
- def after_crawl(tag, info)
403
- path = UriWrapper.canonicalize(info.relative)
404
-
405
- # Register the path.
406
- @paths[tag] = [] unless @paths[tag]
407
- @paths[tag] << path
408
-
409
- result = info.read_result
410
-
411
- # Write result to applicable cache.
412
- @cache.set(tag, path, result)
413
- # If single-site, cache "after" as "before".
414
- @cache.set(:before, path, result) unless @config.roots[:before]
415
-
416
- # TODO: Restore application of rules.
417
- # @rules.handle_page(tag, res.content, info.document) if @rules && !res.error
258
+ # Clean keys - return a subset of a hash with keys as symbols.
259
+ def clean_keys(hash, *keys)
260
+ new_hash = hash.transform_keys { |k| k.tr('-', '_').to_sym }
261
+ new_hash.slice(*keys)
418
262
  end
419
263
  end
420
264
  end