sitediff 0.0.6 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.eslintignore +1 -0
- data/.eslintrc.json +28 -0
- data/.project +11 -0
- data/.rubocop.yml +179 -0
- data/.rubocop_todo.yml +51 -0
- data/CHANGELOG.md +28 -0
- data/Dockerfile +33 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +85 -0
- data/INSTALLATION.md +146 -0
- data/LICENSE +339 -0
- data/README.md +810 -0
- data/Rakefile +12 -0
- data/Thorfile +135 -0
- data/bin/sitediff +9 -2
- data/config/.gitkeep +0 -0
- data/config/sanitize_domains.example.yaml +8 -0
- data/config/sitediff.example.yaml +81 -0
- data/docker-compose.test.yml +3 -0
- data/lib/sitediff/api.rb +276 -0
- data/lib/sitediff/cache.rb +57 -8
- data/lib/sitediff/cli.rb +156 -176
- data/lib/sitediff/config/creator.rb +61 -77
- data/lib/sitediff/config/preset.rb +75 -0
- data/lib/sitediff/config.rb +436 -31
- data/lib/sitediff/crawler.rb +27 -21
- data/lib/sitediff/diff.rb +32 -9
- data/lib/sitediff/fetch.rb +10 -3
- data/lib/sitediff/files/diff.html.erb +20 -2
- data/lib/sitediff/files/jquery.min.js +2 -0
- data/lib/sitediff/files/normalize.css +349 -0
- data/lib/sitediff/files/report.html.erb +171 -0
- data/lib/sitediff/files/sidebyside.html.erb +5 -2
- data/lib/sitediff/files/sitediff.css +303 -30
- data/lib/sitediff/files/sitediff.js +367 -0
- data/lib/sitediff/presets/drupal.yaml +63 -0
- data/lib/sitediff/report.rb +254 -0
- data/lib/sitediff/result.rb +50 -20
- data/lib/sitediff/sanitize/dom_transform.rb +47 -8
- data/lib/sitediff/sanitize/regexp.rb +24 -3
- data/lib/sitediff/sanitize.rb +81 -12
- data/lib/sitediff/uriwrapper.rb +65 -23
- data/lib/sitediff/webserver/resultserver.rb +30 -33
- data/lib/sitediff/webserver.rb +15 -3
- data/lib/sitediff.rb +130 -83
- data/misc/sitediff - overview report.png +0 -0
- data/misc/sitediff - page report.png +0 -0
- data/package-lock.json +878 -0
- data/package.json +25 -0
- data/sitediff.gemspec +51 -0
- metadata +91 -29
- data/lib/sitediff/files/html_report.html.erb +0 -66
- data/lib/sitediff/files/rules/drupal.yaml +0 -63
- data/lib/sitediff/rules.rb +0 -65
data/lib/sitediff/cli.rb
CHANGED
@@ -2,35 +2,29 @@
|
|
2
2
|
|
3
3
|
require 'thor'
|
4
4
|
require 'sitediff'
|
5
|
-
require 'sitediff/
|
6
|
-
require 'sitediff/config'
|
7
|
-
require 'sitediff/config/creator'
|
8
|
-
require 'sitediff/fetch'
|
9
|
-
require 'sitediff/webserver/resultserver'
|
5
|
+
require 'sitediff/api'
|
10
6
|
|
11
7
|
class SiteDiff
|
8
|
+
# SiteDiff CLI.
|
12
9
|
class Cli < Thor
|
13
10
|
class_option 'directory',
|
14
11
|
type: :string,
|
15
12
|
aliases: '-C',
|
16
13
|
default: 'sitediff',
|
17
14
|
desc: 'Configuration directory'
|
18
|
-
class_option :
|
19
|
-
type: :hash,
|
20
|
-
default: {},
|
21
|
-
desc: 'Options to be passed to curl'
|
22
|
-
class_option :insecure,
|
15
|
+
class_option :debug,
|
23
16
|
type: :boolean,
|
17
|
+
aliases: '-d',
|
24
18
|
default: false,
|
25
|
-
desc: '
|
26
|
-
class_option
|
19
|
+
desc: 'Stop on certain errors and produce error trace backs.'
|
20
|
+
class_option 'verbose',
|
27
21
|
type: :boolean,
|
22
|
+
aliases: '-v',
|
28
23
|
default: false,
|
29
|
-
desc: '
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
desc: 'Crawling delay - interval in milliseconds'
|
24
|
+
desc: 'Show verbose output in terminal'
|
25
|
+
|
26
|
+
# Command aliases.
|
27
|
+
map recrawl: :crawl
|
34
28
|
|
35
29
|
# Thor, by default, exits with 0 no matter what!
|
36
30
|
def self.exit_on_failure?
|
@@ -42,6 +36,20 @@ class SiteDiff
|
|
42
36
|
true
|
43
37
|
end
|
44
38
|
|
39
|
+
desc 'version', 'Show version information'
|
40
|
+
##
|
41
|
+
# Show version information.
|
42
|
+
def version
|
43
|
+
gemspec = SiteDiff.gemspec
|
44
|
+
output = []
|
45
|
+
output.push("Sitediff CLI #{gemspec.version}")
|
46
|
+
if options[:verbose]
|
47
|
+
output.push("Website: #{gemspec.homepage}")
|
48
|
+
output.push("GitHub: #{gemspec.metadata['source_code_uri']}")
|
49
|
+
end
|
50
|
+
puts output.join("\n")
|
51
|
+
end
|
52
|
+
|
45
53
|
option 'paths-file',
|
46
54
|
type: :string,
|
47
55
|
desc: 'Paths are read (one at a line) from PATHS: ' \
|
@@ -53,83 +61,70 @@ class SiteDiff
|
|
53
61
|
desc: 'Specific path or paths to fetch'
|
54
62
|
option 'before',
|
55
63
|
type: :string,
|
56
|
-
desc: 'URL
|
64
|
+
desc: 'URL to the "before" site, prefixed to all paths.',
|
57
65
|
aliases: '--before-url'
|
58
66
|
option 'after',
|
59
67
|
type: :string,
|
60
|
-
desc: 'URL
|
68
|
+
desc: 'URL to the "after" site, prefixed to all paths.',
|
61
69
|
aliases: '--after-url'
|
70
|
+
option 'report-format',
|
71
|
+
type: :string,
|
72
|
+
enum: %w[html json],
|
73
|
+
default: 'html',
|
74
|
+
desc: 'The format in which a report should be generated.'
|
62
75
|
option 'before-report',
|
63
76
|
type: :string,
|
64
|
-
desc: '
|
77
|
+
desc: 'URL to use in reports. Useful if port forwarding.',
|
65
78
|
aliases: '--before-url-report'
|
66
79
|
option 'after-report',
|
67
80
|
type: :string,
|
68
|
-
desc: '
|
81
|
+
desc: 'URL to use in reports. Useful if port forwarding.',
|
69
82
|
aliases: '--after-url-report'
|
70
83
|
option 'cached',
|
71
84
|
type: :string,
|
72
85
|
enum: %w[none all before after],
|
73
86
|
default: 'before',
|
74
87
|
desc: 'Use the cached version of these sites, if available.'
|
75
|
-
option '
|
88
|
+
option 'ignore-whitespace',
|
76
89
|
type: :boolean,
|
77
|
-
aliases: '-v',
|
78
90
|
default: false,
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
# override
|
92
|
-
paths
|
93
|
-
|
94
|
-
|
95
|
-
SiteDiff.log "Can't have both --paths-file and --paths", :error
|
96
|
-
exit(-1)
|
97
|
-
end
|
98
|
-
|
99
|
-
paths_file = Pathname.new(paths_file).expand_path
|
100
|
-
unless File.exist? paths_file
|
101
|
-
raise Config::InvalidConfig,
|
102
|
-
"Paths file '#{paths_file}' not found!"
|
103
|
-
end
|
104
|
-
SiteDiff.log "Reading paths from: #{paths_file}"
|
105
|
-
config.paths = File.readlines(paths_file)
|
91
|
+
aliases: '-w',
|
92
|
+
desc: 'Ignore changes in whitespace.'
|
93
|
+
option 'export',
|
94
|
+
type: :boolean,
|
95
|
+
default: false,
|
96
|
+
aliases: '-e',
|
97
|
+
desc: 'Export report to files. This option forces HTML format.'
|
98
|
+
desc 'diff [OPTIONS] [CONFIG-FILE]',
|
99
|
+
'Compute diffs on configured URLs.'
|
100
|
+
##
|
101
|
+
# Computes diffs.
|
102
|
+
def diff(config_file = nil)
|
103
|
+
# Determine "paths" override based on options.
|
104
|
+
if options['paths'] && options['paths-file']
|
105
|
+
SiteDiff.log "Can't specify both --paths-file and --paths.", :error
|
106
|
+
exit(-1)
|
106
107
|
end
|
107
|
-
config.paths = paths if paths
|
108
|
-
|
109
|
-
config.before['url'] = options['before'] if options['before']
|
110
|
-
config.after['url'] = options['after'] if options['after']
|
111
108
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
# Failing diff --> exit(2), populated above
|
132
|
-
exit(exit_code)
|
109
|
+
api = Api.new(options['directory'], config_file)
|
110
|
+
api_options =
|
111
|
+
clean_keys(
|
112
|
+
options,
|
113
|
+
:paths,
|
114
|
+
:paths_file,
|
115
|
+
:ignore_whitespace,
|
116
|
+
:export,
|
117
|
+
:before,
|
118
|
+
:after,
|
119
|
+
:cached,
|
120
|
+
:verbose,
|
121
|
+
:debug,
|
122
|
+
:report_format,
|
123
|
+
:before_report,
|
124
|
+
:after_report
|
125
|
+
)
|
126
|
+
api_options[:cli_mode] = true
|
127
|
+
api.diff(api_options)
|
133
128
|
end
|
134
129
|
|
135
130
|
option :port,
|
@@ -140,129 +135,119 @@ class SiteDiff
|
|
140
135
|
type: :boolean,
|
141
136
|
default: true,
|
142
137
|
desc: 'Whether to open the served content in your browser'
|
143
|
-
desc 'serve [OPTIONS]',
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
SiteDiff::Webserver::ResultServer.new(
|
152
|
-
options[:port],
|
153
|
-
options['directory'],
|
154
|
-
browse: options[:browse],
|
155
|
-
cache: cache,
|
156
|
-
config: config
|
157
|
-
).wait
|
158
|
-
rescue SiteDiffException => e
|
159
|
-
SiteDiff.log e.message, :error
|
160
|
-
SiteDiff.log e.backtrace, :error
|
138
|
+
desc 'serve [OPTIONS] [CONFIG-FILE]',
|
139
|
+
'Serve SiteDiff report directory over HTTP.'
|
140
|
+
##
|
141
|
+
# Serves SiteDiff report for accessing in the browser.
|
142
|
+
def serve(config_file = nil)
|
143
|
+
api = Api.new(options['directory'], config_file)
|
144
|
+
api_options = clean_keys(options, :browse, :port)
|
145
|
+
api.serve(api_options)
|
161
146
|
end
|
162
147
|
|
163
148
|
option :depth,
|
164
149
|
type: :numeric,
|
165
|
-
default:
|
150
|
+
default: Config::DEFAULT_CONFIG['settings']['depth'],
|
166
151
|
desc: 'How deeply to crawl the given site'
|
167
|
-
option :
|
152
|
+
option :crawl,
|
153
|
+
type: :boolean,
|
154
|
+
default: true,
|
155
|
+
desc: 'Run "sitediff crawl" to discover paths.'
|
156
|
+
option :preset,
|
168
157
|
type: :string,
|
169
|
-
enum:
|
170
|
-
|
171
|
-
desc: 'Whether rules for the site should be auto-created'
|
158
|
+
enum: Config::Preset.all,
|
159
|
+
desc: 'Framework-specific presets to apply.'
|
172
160
|
option :concurrency,
|
173
161
|
type: :numeric,
|
174
|
-
default:
|
175
|
-
desc: 'Max number of concurrent connections made'
|
176
|
-
option :
|
162
|
+
default: Config::DEFAULT_CONFIG['settings']['concurrency'],
|
163
|
+
desc: 'Max number of concurrent connections made.'
|
164
|
+
option :interval,
|
165
|
+
type: :numeric,
|
166
|
+
default: Config::DEFAULT_CONFIG['settings']['interval'],
|
167
|
+
desc: 'Crawling delay - interval in milliseconds.'
|
168
|
+
option :include,
|
177
169
|
type: :string,
|
178
|
-
default: '',
|
179
|
-
desc: 'Optional
|
180
|
-
option :
|
170
|
+
default: Config::DEFAULT_CONFIG['settings']['include'],
|
171
|
+
desc: 'Optional URL include regex for crawling.'
|
172
|
+
option :exclude,
|
181
173
|
type: :string,
|
182
|
-
default: '',
|
183
|
-
desc: 'Optional
|
184
|
-
|
174
|
+
default: Config::DEFAULT_CONFIG['settings']['exclude'],
|
175
|
+
desc: 'Optional URL exclude regex for crawling.'
|
176
|
+
option :curl_options,
|
177
|
+
type: :hash,
|
178
|
+
default: {},
|
179
|
+
desc: 'Options to be passed to curl'
|
180
|
+
desc 'init URL [URL]', 'Create a sitediff configuration.'
|
181
|
+
##
|
182
|
+
# Initializes a sitediff (yaml) configuration file.
|
185
183
|
def init(*urls)
|
186
184
|
unless (1..2).cover? urls.size
|
187
185
|
SiteDiff.log 'sitediff init requires one or two URLs', :error
|
188
186
|
exit(2)
|
189
187
|
end
|
188
|
+
api_options =
|
189
|
+
clean_keys(
|
190
|
+
options,
|
191
|
+
:depth,
|
192
|
+
:concurrency,
|
193
|
+
:interval,
|
194
|
+
:include,
|
195
|
+
:exclude,
|
196
|
+
:preset,
|
197
|
+
:crawl
|
198
|
+
)
|
199
|
+
.merge(
|
200
|
+
{
|
201
|
+
after_url: urls.pop,
|
202
|
+
before_url: urls.pop,
|
203
|
+
directory: get_dir(options['directory']),
|
204
|
+
curl_opts: get_curl_opts(options)
|
205
|
+
}
|
206
|
+
)
|
190
207
|
|
191
|
-
|
192
|
-
check_interval(@interval)
|
193
|
-
@dir = get_dir(options['directory'])
|
194
|
-
curl_opts = get_curl_opts(options)
|
195
|
-
@whitelist = create_regexp(options['whitelist'])
|
196
|
-
@blacklist = create_regexp(options['blacklist'])
|
197
|
-
creator = SiteDiff::Config::Creator.new(options[:concurrency],
|
198
|
-
options['interval'],
|
199
|
-
@whitelist,
|
200
|
-
@blacklist,
|
201
|
-
curl_opts,
|
202
|
-
options[:debug],
|
203
|
-
*urls)
|
204
|
-
creator.create(
|
205
|
-
depth: options[:depth],
|
206
|
-
directory: @dir,
|
207
|
-
rules: options[:rules] != 'no',
|
208
|
-
rules_disabled: (options[:rules] == 'disabled')
|
209
|
-
) do |_tag, info|
|
210
|
-
SiteDiff.log "Visited #{info.uri}, cached"
|
211
|
-
end
|
212
|
-
|
213
|
-
SiteDiff.log "Created #{creator.config_file.expand_path}", :success
|
214
|
-
SiteDiff.log "You can now run 'sitediff diff'", :success
|
208
|
+
Api.init(api_options)
|
215
209
|
end
|
216
210
|
|
217
211
|
option :url,
|
218
212
|
type: :string,
|
219
213
|
desc: 'A custom base URL to fetch from'
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
config.validate(need_before: false)
|
230
|
-
cache = SiteDiff::Cache.new(directory: @dir, create: true)
|
231
|
-
cache.write_tags << :before
|
214
|
+
desc 'store [CONFIG-FILE]',
|
215
|
+
'Cache the current contents of a site for later comparison.'
|
216
|
+
##
|
217
|
+
# Caches the current version of the site.
|
218
|
+
def store(config_file = nil)
|
219
|
+
api = Api.new(options['directory'], config_file)
|
220
|
+
api_options = clean_keys(options, :url, :debug)
|
221
|
+
api.store(api_options)
|
222
|
+
end
|
232
223
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
fetcher.run do |path, _res|
|
242
|
-
SiteDiff.log "Visited #{path}, cached"
|
243
|
-
end
|
224
|
+
desc 'crawl [CONFIG-FILE]',
|
225
|
+
'Crawl the "before" site to discover paths.'
|
226
|
+
##
|
227
|
+
# Crawls the "before" site to determine "paths".
|
228
|
+
#
|
229
|
+
def crawl(config_file = nil)
|
230
|
+
api = Api.new(options['directory'], config_file)
|
231
|
+
api.crawl
|
244
232
|
end
|
245
233
|
|
246
234
|
no_commands do
|
235
|
+
# Generates CURL options.
|
236
|
+
#
|
237
|
+
# TODO: Possibly move to API class.
|
247
238
|
def get_curl_opts(options)
|
248
239
|
# We do want string keys here
|
249
240
|
bool_hash = { 'true' => true, 'false' => false }
|
250
|
-
curl_opts = UriWrapper::DEFAULT_CURL_OPTS
|
241
|
+
curl_opts = UriWrapper::DEFAULT_CURL_OPTS
|
242
|
+
.clone
|
243
|
+
.merge(options['curl_options'] || {})
|
244
|
+
.merge(options['curl_opts'] || {})
|
251
245
|
curl_opts.each { |k, v| curl_opts[k] = bool_hash.fetch(v, v) }
|
252
|
-
if options[:insecure]
|
253
|
-
curl_opts[:ssl_verifypeer] = false
|
254
|
-
curl_opts[:ssl_verifyhost] = 0
|
255
|
-
end
|
256
246
|
curl_opts
|
257
247
|
end
|
258
248
|
|
259
|
-
|
260
|
-
|
261
|
-
SiteDiff.log '--concurrency must be set to 1 in order to enable the interval feature'
|
262
|
-
exit(2)
|
263
|
-
end
|
264
|
-
end
|
265
|
-
|
249
|
+
##
|
250
|
+
# Ensures that the given directory exists.
|
266
251
|
def get_dir(directory)
|
267
252
|
# Create the dir. Must go before cache initialization!
|
268
253
|
@dir = Pathname.new(directory || '.')
|
@@ -270,16 +255,11 @@ class SiteDiff
|
|
270
255
|
@dir.to_s
|
271
256
|
end
|
272
257
|
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
SiteDiff.log 'whitelist and blacklist parameters must be valid regular expressions', :error
|
279
|
-
SiteDiff.log e.message, :error
|
280
|
-
SiteDiff.log e.backtrace, :error
|
281
|
-
end
|
282
|
-
return @return_value
|
258
|
+
##
|
259
|
+
# Clean keys - return a subset of a hash with keys as symbols.
|
260
|
+
def clean_keys(hash, *keys)
|
261
|
+
new_hash = hash.transform_keys { |k| k.tr('-', '_').to_sym }
|
262
|
+
new_hash.slice(*keys)
|
283
263
|
end
|
284
264
|
end
|
285
265
|
end
|
@@ -3,129 +3,113 @@
|
|
3
3
|
require 'sitediff/cache'
|
4
4
|
require 'sitediff/config'
|
5
5
|
require 'sitediff/crawler'
|
6
|
-
require 'sitediff/rules'
|
7
6
|
require 'pathname'
|
8
7
|
require 'typhoeus'
|
9
8
|
require 'yaml'
|
10
9
|
|
11
10
|
class SiteDiff
|
12
11
|
class Config
|
12
|
+
##
|
13
|
+
# SiteDiff Config Creator Object.
|
13
14
|
class Creator
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
@
|
18
|
-
@
|
19
|
-
@after =
|
20
|
-
@before = urls.pop # May be nil
|
21
|
-
@curl_opts = curl_opts
|
15
|
+
##
|
16
|
+
# Creates a Creator object.
|
17
|
+
def initialize(debug, before, after)
|
18
|
+
@config = nil
|
19
|
+
@before = before
|
20
|
+
@after = after
|
22
21
|
@debug = debug
|
23
22
|
end
|
24
23
|
|
24
|
+
##
|
25
|
+
# Determine if we're dealing with one or two URLs.
|
25
26
|
def roots
|
26
|
-
@roots =
|
27
|
-
|
28
|
-
|
29
|
-
r
|
30
|
-
end
|
27
|
+
@roots = { 'after' => @after }
|
28
|
+
@roots['before'] = @before || @after
|
29
|
+
@roots
|
31
30
|
end
|
32
31
|
|
33
|
-
|
34
|
-
|
32
|
+
##
|
33
|
+
# Build a config structure, return it.
|
34
|
+
def create(options)
|
35
35
|
@config = {}
|
36
|
-
@callback = block
|
37
|
-
@dir = Pathname.new(opts[:directory])
|
38
36
|
|
39
|
-
#
|
40
|
-
|
41
|
-
@
|
37
|
+
# @callback = block
|
38
|
+
|
39
|
+
@dir = Pathname.new(options[:directory])
|
42
40
|
|
43
41
|
# Setup instance vars
|
44
42
|
@paths = Hash.new { |h, k| h[k] = Set.new }
|
45
43
|
@cache = Cache.new(directory: @dir.to_s, create: true)
|
46
44
|
@cache.write_tags << :before << :after
|
47
45
|
|
48
|
-
build_config
|
46
|
+
build_config options
|
49
47
|
write_config
|
50
48
|
end
|
51
49
|
|
52
|
-
|
50
|
+
##
|
51
|
+
# Build and populate the config object which is being created.
|
52
|
+
#
|
53
|
+
# @param [String] options
|
54
|
+
# One or more options.
|
55
|
+
def build_config(options)
|
56
|
+
options = Config.stringify_keys options
|
57
|
+
|
58
|
+
# Build config for "before" and "after".
|
53
59
|
%w[before after].each do |tag|
|
54
|
-
next unless (
|
60
|
+
next unless (url = roots[tag])
|
55
61
|
|
56
|
-
@config[tag] = { 'url' =>
|
62
|
+
@config[tag] = { 'url' => url }
|
57
63
|
end
|
58
64
|
|
59
|
-
|
60
|
-
@
|
61
|
-
|
62
|
-
|
63
|
-
end
|
64
|
-
|
65
|
-
def crawl(depth = nil)
|
66
|
-
hydra = Typhoeus::Hydra.new(max_concurrency: @concurrency)
|
67
|
-
roots.each do |tag, u|
|
68
|
-
Crawler.new(hydra, u, @interval, @whitelist, @blacklist, depth, @curl_opts, @debug) do |info|
|
69
|
-
crawled_path(tag, info)
|
70
|
-
end
|
65
|
+
# Build other settings.
|
66
|
+
@config['settings'] = {}
|
67
|
+
Config::ALLOWED_SETTINGS_KEYS.each do |key|
|
68
|
+
@config['settings'][key] = options[key]
|
71
69
|
end
|
72
|
-
hydra.run
|
73
70
|
end
|
74
71
|
|
75
|
-
|
76
|
-
|
77
|
-
def altered_paths(path)
|
78
|
-
yield path + '/'
|
79
|
-
yield path.sub(%r{/$}, '')
|
80
|
-
end
|
81
|
-
|
82
|
-
path.empty? ? '/' : path
|
83
|
-
end
|
84
|
-
|
85
|
-
def crawled_path(tag, info)
|
86
|
-
path, dup = canonicalize(tag, info.relative)
|
87
|
-
return if dup
|
88
|
-
|
89
|
-
res = info.read_result
|
90
|
-
|
91
|
-
@callback[tag, info]
|
92
|
-
@paths[tag] << path
|
93
|
-
@cache.set(tag, path, res)
|
94
|
-
|
95
|
-
# If single-site, cache after as before!
|
96
|
-
@cache.set(:before, path, res) unless roots[:before]
|
97
|
-
|
98
|
-
# This is used to populate the list of rules we guess are
|
99
|
-
# applicable to the current site.
|
100
|
-
@rules.handle_page(tag, res.content, info.document) if @rules && !res.error
|
101
|
-
end
|
102
|
-
|
103
|
-
# Create a gitignore if we seem to be in git
|
72
|
+
##
|
73
|
+
# Create a gitignore if we seem to be in git.
|
104
74
|
def make_gitignore(dir)
|
105
75
|
# Check if we're in git
|
106
|
-
|
107
|
-
|
108
|
-
dir.+('.gitignore').open('w') do |f|
|
109
|
-
f.puts <<-GITIGNORE.gsub(/^\s+/, '')
|
110
|
-
output
|
111
|
-
cache.db
|
112
|
-
cache.db.db
|
113
|
-
GITIGNORE
|
76
|
+
unless dir.realpath.to_enum(:ascend).any? { |d| Dir.exist?("#{d}/.git") }
|
77
|
+
return
|
114
78
|
end
|
79
|
+
|
80
|
+
f = File.open("#{dir}/.gitignore", 'w')
|
81
|
+
f.puts <<-GITIGNORE.gsub(/^\s+/, '')
|
82
|
+
# Directories.
|
83
|
+
diffs
|
84
|
+
snapshot
|
85
|
+
|
86
|
+
# Files.
|
87
|
+
settings.yaml
|
88
|
+
paths.txt
|
89
|
+
failures.txt
|
90
|
+
GITIGNORE
|
91
|
+
f.close
|
115
92
|
end
|
116
93
|
|
94
|
+
##
|
95
|
+
# Returns the name of the config directory.
|
117
96
|
def directory
|
118
97
|
@dir
|
119
98
|
end
|
120
99
|
|
100
|
+
##
|
101
|
+
# Returns the name of the config file.
|
121
102
|
def config_file
|
122
103
|
@dir + Config::DEFAULT_FILENAME
|
123
104
|
end
|
124
105
|
|
125
|
-
|
106
|
+
##
|
107
|
+
# Writes the built config into the config file.
|
108
|
+
# TODO: Exclude default params before writing.
|
126
109
|
def write_config
|
127
110
|
make_gitignore(@dir)
|
128
|
-
|
111
|
+
data = Config.remove_defaults(@config)
|
112
|
+
config_file.open('w') { |f| f.puts data.to_yaml }
|
129
113
|
end
|
130
114
|
end
|
131
115
|
end
|