sitediff 0.0.6 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.eslintignore +1 -0
- data/.eslintrc.json +28 -0
- data/.project +11 -0
- data/.rubocop.yml +179 -0
- data/.rubocop_todo.yml +51 -0
- data/CHANGELOG.md +28 -0
- data/Dockerfile +33 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +85 -0
- data/INSTALLATION.md +146 -0
- data/LICENSE +339 -0
- data/README.md +810 -0
- data/Rakefile +12 -0
- data/Thorfile +135 -0
- data/bin/sitediff +9 -2
- data/config/.gitkeep +0 -0
- data/config/sanitize_domains.example.yaml +8 -0
- data/config/sitediff.example.yaml +81 -0
- data/docker-compose.test.yml +3 -0
- data/lib/sitediff/api.rb +276 -0
- data/lib/sitediff/cache.rb +57 -8
- data/lib/sitediff/cli.rb +156 -176
- data/lib/sitediff/config/creator.rb +61 -77
- data/lib/sitediff/config/preset.rb +75 -0
- data/lib/sitediff/config.rb +436 -31
- data/lib/sitediff/crawler.rb +27 -21
- data/lib/sitediff/diff.rb +32 -9
- data/lib/sitediff/fetch.rb +10 -3
- data/lib/sitediff/files/diff.html.erb +20 -2
- data/lib/sitediff/files/jquery.min.js +2 -0
- data/lib/sitediff/files/normalize.css +349 -0
- data/lib/sitediff/files/report.html.erb +171 -0
- data/lib/sitediff/files/sidebyside.html.erb +5 -2
- data/lib/sitediff/files/sitediff.css +303 -30
- data/lib/sitediff/files/sitediff.js +367 -0
- data/lib/sitediff/presets/drupal.yaml +63 -0
- data/lib/sitediff/report.rb +254 -0
- data/lib/sitediff/result.rb +50 -20
- data/lib/sitediff/sanitize/dom_transform.rb +47 -8
- data/lib/sitediff/sanitize/regexp.rb +24 -3
- data/lib/sitediff/sanitize.rb +81 -12
- data/lib/sitediff/uriwrapper.rb +65 -23
- data/lib/sitediff/webserver/resultserver.rb +30 -33
- data/lib/sitediff/webserver.rb +15 -3
- data/lib/sitediff.rb +130 -83
- data/misc/sitediff - overview report.png +0 -0
- data/misc/sitediff - page report.png +0 -0
- data/package-lock.json +878 -0
- data/package.json +25 -0
- data/sitediff.gemspec +51 -0
- metadata +91 -29
- data/lib/sitediff/files/html_report.html.erb +0 -66
- data/lib/sitediff/files/rules/drupal.yaml +0 -63
- data/lib/sitediff/rules.rb +0 -65
data/lib/sitediff/cli.rb
CHANGED
@@ -2,35 +2,29 @@
|
|
2
2
|
|
3
3
|
require 'thor'
|
4
4
|
require 'sitediff'
|
5
|
-
require 'sitediff/
|
6
|
-
require 'sitediff/config'
|
7
|
-
require 'sitediff/config/creator'
|
8
|
-
require 'sitediff/fetch'
|
9
|
-
require 'sitediff/webserver/resultserver'
|
5
|
+
require 'sitediff/api'
|
10
6
|
|
11
7
|
class SiteDiff
|
8
|
+
# SiteDiff CLI.
|
12
9
|
class Cli < Thor
|
13
10
|
class_option 'directory',
|
14
11
|
type: :string,
|
15
12
|
aliases: '-C',
|
16
13
|
default: 'sitediff',
|
17
14
|
desc: 'Configuration directory'
|
18
|
-
class_option :
|
19
|
-
type: :hash,
|
20
|
-
default: {},
|
21
|
-
desc: 'Options to be passed to curl'
|
22
|
-
class_option :insecure,
|
15
|
+
class_option :debug,
|
23
16
|
type: :boolean,
|
17
|
+
aliases: '-d',
|
24
18
|
default: false,
|
25
|
-
desc: '
|
26
|
-
class_option
|
19
|
+
desc: 'Stop on certain errors and produce error trace backs.'
|
20
|
+
class_option 'verbose',
|
27
21
|
type: :boolean,
|
22
|
+
aliases: '-v',
|
28
23
|
default: false,
|
29
|
-
desc: '
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
desc: 'Crawling delay - interval in milliseconds'
|
24
|
+
desc: 'Show verbose output in terminal'
|
25
|
+
|
26
|
+
# Command aliases.
|
27
|
+
map recrawl: :crawl
|
34
28
|
|
35
29
|
# Thor, by default, exits with 0 no matter what!
|
36
30
|
def self.exit_on_failure?
|
@@ -42,6 +36,20 @@ class SiteDiff
|
|
42
36
|
true
|
43
37
|
end
|
44
38
|
|
39
|
+
desc 'version', 'Show version information'
|
40
|
+
##
|
41
|
+
# Show version information.
|
42
|
+
def version
|
43
|
+
gemspec = SiteDiff.gemspec
|
44
|
+
output = []
|
45
|
+
output.push("Sitediff CLI #{gemspec.version}")
|
46
|
+
if options[:verbose]
|
47
|
+
output.push("Website: #{gemspec.homepage}")
|
48
|
+
output.push("GitHub: #{gemspec.metadata['source_code_uri']}")
|
49
|
+
end
|
50
|
+
puts output.join("\n")
|
51
|
+
end
|
52
|
+
|
45
53
|
option 'paths-file',
|
46
54
|
type: :string,
|
47
55
|
desc: 'Paths are read (one at a line) from PATHS: ' \
|
@@ -53,83 +61,70 @@ class SiteDiff
|
|
53
61
|
desc: 'Specific path or paths to fetch'
|
54
62
|
option 'before',
|
55
63
|
type: :string,
|
56
|
-
desc: 'URL
|
64
|
+
desc: 'URL to the "before" site, prefixed to all paths.',
|
57
65
|
aliases: '--before-url'
|
58
66
|
option 'after',
|
59
67
|
type: :string,
|
60
|
-
desc: 'URL
|
68
|
+
desc: 'URL to the "after" site, prefixed to all paths.',
|
61
69
|
aliases: '--after-url'
|
70
|
+
option 'report-format',
|
71
|
+
type: :string,
|
72
|
+
enum: %w[html json],
|
73
|
+
default: 'html',
|
74
|
+
desc: 'The format in which a report should be generated.'
|
62
75
|
option 'before-report',
|
63
76
|
type: :string,
|
64
|
-
desc: '
|
77
|
+
desc: 'URL to use in reports. Useful if port forwarding.',
|
65
78
|
aliases: '--before-url-report'
|
66
79
|
option 'after-report',
|
67
80
|
type: :string,
|
68
|
-
desc: '
|
81
|
+
desc: 'URL to use in reports. Useful if port forwarding.',
|
69
82
|
aliases: '--after-url-report'
|
70
83
|
option 'cached',
|
71
84
|
type: :string,
|
72
85
|
enum: %w[none all before after],
|
73
86
|
default: 'before',
|
74
87
|
desc: 'Use the cached version of these sites, if available.'
|
75
|
-
option '
|
88
|
+
option 'ignore-whitespace',
|
76
89
|
type: :boolean,
|
77
|
-
aliases: '-v',
|
78
90
|
default: false,
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
# override
|
92
|
-
paths
|
93
|
-
|
94
|
-
|
95
|
-
SiteDiff.log "Can't have both --paths-file and --paths", :error
|
96
|
-
exit(-1)
|
97
|
-
end
|
98
|
-
|
99
|
-
paths_file = Pathname.new(paths_file).expand_path
|
100
|
-
unless File.exist? paths_file
|
101
|
-
raise Config::InvalidConfig,
|
102
|
-
"Paths file '#{paths_file}' not found!"
|
103
|
-
end
|
104
|
-
SiteDiff.log "Reading paths from: #{paths_file}"
|
105
|
-
config.paths = File.readlines(paths_file)
|
91
|
+
aliases: '-w',
|
92
|
+
desc: 'Ignore changes in whitespace.'
|
93
|
+
option 'export',
|
94
|
+
type: :boolean,
|
95
|
+
default: false,
|
96
|
+
aliases: '-e',
|
97
|
+
desc: 'Export report to files. This option forces HTML format.'
|
98
|
+
desc 'diff [OPTIONS] [CONFIG-FILE]',
|
99
|
+
'Compute diffs on configured URLs.'
|
100
|
+
##
|
101
|
+
# Computes diffs.
|
102
|
+
def diff(config_file = nil)
|
103
|
+
# Determine "paths" override based on options.
|
104
|
+
if options['paths'] && options['paths-file']
|
105
|
+
SiteDiff.log "Can't specify both --paths-file and --paths.", :error
|
106
|
+
exit(-1)
|
106
107
|
end
|
107
|
-
config.paths = paths if paths
|
108
|
-
|
109
|
-
config.before['url'] = options['before'] if options['before']
|
110
|
-
config.after['url'] = options['after'] if options['after']
|
111
108
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
# Failing diff --> exit(2), populated above
|
132
|
-
exit(exit_code)
|
109
|
+
api = Api.new(options['directory'], config_file)
|
110
|
+
api_options =
|
111
|
+
clean_keys(
|
112
|
+
options,
|
113
|
+
:paths,
|
114
|
+
:paths_file,
|
115
|
+
:ignore_whitespace,
|
116
|
+
:export,
|
117
|
+
:before,
|
118
|
+
:after,
|
119
|
+
:cached,
|
120
|
+
:verbose,
|
121
|
+
:debug,
|
122
|
+
:report_format,
|
123
|
+
:before_report,
|
124
|
+
:after_report
|
125
|
+
)
|
126
|
+
api_options[:cli_mode] = true
|
127
|
+
api.diff(api_options)
|
133
128
|
end
|
134
129
|
|
135
130
|
option :port,
|
@@ -140,129 +135,119 @@ class SiteDiff
|
|
140
135
|
type: :boolean,
|
141
136
|
default: true,
|
142
137
|
desc: 'Whether to open the served content in your browser'
|
143
|
-
desc 'serve [OPTIONS]',
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
SiteDiff::Webserver::ResultServer.new(
|
152
|
-
options[:port],
|
153
|
-
options['directory'],
|
154
|
-
browse: options[:browse],
|
155
|
-
cache: cache,
|
156
|
-
config: config
|
157
|
-
).wait
|
158
|
-
rescue SiteDiffException => e
|
159
|
-
SiteDiff.log e.message, :error
|
160
|
-
SiteDiff.log e.backtrace, :error
|
138
|
+
desc 'serve [OPTIONS] [CONFIG-FILE]',
|
139
|
+
'Serve SiteDiff report directory over HTTP.'
|
140
|
+
##
|
141
|
+
# Serves SiteDiff report for accessing in the browser.
|
142
|
+
def serve(config_file = nil)
|
143
|
+
api = Api.new(options['directory'], config_file)
|
144
|
+
api_options = clean_keys(options, :browse, :port)
|
145
|
+
api.serve(api_options)
|
161
146
|
end
|
162
147
|
|
163
148
|
option :depth,
|
164
149
|
type: :numeric,
|
165
|
-
default:
|
150
|
+
default: Config::DEFAULT_CONFIG['settings']['depth'],
|
166
151
|
desc: 'How deeply to crawl the given site'
|
167
|
-
option :
|
152
|
+
option :crawl,
|
153
|
+
type: :boolean,
|
154
|
+
default: true,
|
155
|
+
desc: 'Run "sitediff crawl" to discover paths.'
|
156
|
+
option :preset,
|
168
157
|
type: :string,
|
169
|
-
enum:
|
170
|
-
|
171
|
-
desc: 'Whether rules for the site should be auto-created'
|
158
|
+
enum: Config::Preset.all,
|
159
|
+
desc: 'Framework-specific presets to apply.'
|
172
160
|
option :concurrency,
|
173
161
|
type: :numeric,
|
174
|
-
default:
|
175
|
-
desc: 'Max number of concurrent connections made'
|
176
|
-
option :
|
162
|
+
default: Config::DEFAULT_CONFIG['settings']['concurrency'],
|
163
|
+
desc: 'Max number of concurrent connections made.'
|
164
|
+
option :interval,
|
165
|
+
type: :numeric,
|
166
|
+
default: Config::DEFAULT_CONFIG['settings']['interval'],
|
167
|
+
desc: 'Crawling delay - interval in milliseconds.'
|
168
|
+
option :include,
|
177
169
|
type: :string,
|
178
|
-
default: '',
|
179
|
-
desc: 'Optional
|
180
|
-
option :
|
170
|
+
default: Config::DEFAULT_CONFIG['settings']['include'],
|
171
|
+
desc: 'Optional URL include regex for crawling.'
|
172
|
+
option :exclude,
|
181
173
|
type: :string,
|
182
|
-
default: '',
|
183
|
-
desc: 'Optional
|
184
|
-
|
174
|
+
default: Config::DEFAULT_CONFIG['settings']['exclude'],
|
175
|
+
desc: 'Optional URL exclude regex for crawling.'
|
176
|
+
option :curl_options,
|
177
|
+
type: :hash,
|
178
|
+
default: {},
|
179
|
+
desc: 'Options to be passed to curl'
|
180
|
+
desc 'init URL [URL]', 'Create a sitediff configuration.'
|
181
|
+
##
|
182
|
+
# Initializes a sitediff (yaml) configuration file.
|
185
183
|
def init(*urls)
|
186
184
|
unless (1..2).cover? urls.size
|
187
185
|
SiteDiff.log 'sitediff init requires one or two URLs', :error
|
188
186
|
exit(2)
|
189
187
|
end
|
188
|
+
api_options =
|
189
|
+
clean_keys(
|
190
|
+
options,
|
191
|
+
:depth,
|
192
|
+
:concurrency,
|
193
|
+
:interval,
|
194
|
+
:include,
|
195
|
+
:exclude,
|
196
|
+
:preset,
|
197
|
+
:crawl
|
198
|
+
)
|
199
|
+
.merge(
|
200
|
+
{
|
201
|
+
after_url: urls.pop,
|
202
|
+
before_url: urls.pop,
|
203
|
+
directory: get_dir(options['directory']),
|
204
|
+
curl_opts: get_curl_opts(options)
|
205
|
+
}
|
206
|
+
)
|
190
207
|
|
191
|
-
|
192
|
-
check_interval(@interval)
|
193
|
-
@dir = get_dir(options['directory'])
|
194
|
-
curl_opts = get_curl_opts(options)
|
195
|
-
@whitelist = create_regexp(options['whitelist'])
|
196
|
-
@blacklist = create_regexp(options['blacklist'])
|
197
|
-
creator = SiteDiff::Config::Creator.new(options[:concurrency],
|
198
|
-
options['interval'],
|
199
|
-
@whitelist,
|
200
|
-
@blacklist,
|
201
|
-
curl_opts,
|
202
|
-
options[:debug],
|
203
|
-
*urls)
|
204
|
-
creator.create(
|
205
|
-
depth: options[:depth],
|
206
|
-
directory: @dir,
|
207
|
-
rules: options[:rules] != 'no',
|
208
|
-
rules_disabled: (options[:rules] == 'disabled')
|
209
|
-
) do |_tag, info|
|
210
|
-
SiteDiff.log "Visited #{info.uri}, cached"
|
211
|
-
end
|
212
|
-
|
213
|
-
SiteDiff.log "Created #{creator.config_file.expand_path}", :success
|
214
|
-
SiteDiff.log "You can now run 'sitediff diff'", :success
|
208
|
+
Api.init(api_options)
|
215
209
|
end
|
216
210
|
|
217
211
|
option :url,
|
218
212
|
type: :string,
|
219
213
|
desc: 'A custom base URL to fetch from'
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
config.validate(need_before: false)
|
230
|
-
cache = SiteDiff::Cache.new(directory: @dir, create: true)
|
231
|
-
cache.write_tags << :before
|
214
|
+
desc 'store [CONFIG-FILE]',
|
215
|
+
'Cache the current contents of a site for later comparison.'
|
216
|
+
##
|
217
|
+
# Caches the current version of the site.
|
218
|
+
def store(config_file = nil)
|
219
|
+
api = Api.new(options['directory'], config_file)
|
220
|
+
api_options = clean_keys(options, :url, :debug)
|
221
|
+
api.store(api_options)
|
222
|
+
end
|
232
223
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
fetcher.run do |path, _res|
|
242
|
-
SiteDiff.log "Visited #{path}, cached"
|
243
|
-
end
|
224
|
+
desc 'crawl [CONFIG-FILE]',
|
225
|
+
'Crawl the "before" site to discover paths.'
|
226
|
+
##
|
227
|
+
# Crawls the "before" site to determine "paths".
|
228
|
+
#
|
229
|
+
def crawl(config_file = nil)
|
230
|
+
api = Api.new(options['directory'], config_file)
|
231
|
+
api.crawl
|
244
232
|
end
|
245
233
|
|
246
234
|
no_commands do
|
235
|
+
# Generates CURL options.
|
236
|
+
#
|
237
|
+
# TODO: Possibly move to API class.
|
247
238
|
def get_curl_opts(options)
|
248
239
|
# We do want string keys here
|
249
240
|
bool_hash = { 'true' => true, 'false' => false }
|
250
|
-
curl_opts = UriWrapper::DEFAULT_CURL_OPTS
|
241
|
+
curl_opts = UriWrapper::DEFAULT_CURL_OPTS
|
242
|
+
.clone
|
243
|
+
.merge(options['curl_options'] || {})
|
244
|
+
.merge(options['curl_opts'] || {})
|
251
245
|
curl_opts.each { |k, v| curl_opts[k] = bool_hash.fetch(v, v) }
|
252
|
-
if options[:insecure]
|
253
|
-
curl_opts[:ssl_verifypeer] = false
|
254
|
-
curl_opts[:ssl_verifyhost] = 0
|
255
|
-
end
|
256
246
|
curl_opts
|
257
247
|
end
|
258
248
|
|
259
|
-
|
260
|
-
|
261
|
-
SiteDiff.log '--concurrency must be set to 1 in order to enable the interval feature'
|
262
|
-
exit(2)
|
263
|
-
end
|
264
|
-
end
|
265
|
-
|
249
|
+
##
|
250
|
+
# Ensures that the given directory exists.
|
266
251
|
def get_dir(directory)
|
267
252
|
# Create the dir. Must go before cache initialization!
|
268
253
|
@dir = Pathname.new(directory || '.')
|
@@ -270,16 +255,11 @@ class SiteDiff
|
|
270
255
|
@dir.to_s
|
271
256
|
end
|
272
257
|
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
SiteDiff.log 'whitelist and blacklist parameters must be valid regular expressions', :error
|
279
|
-
SiteDiff.log e.message, :error
|
280
|
-
SiteDiff.log e.backtrace, :error
|
281
|
-
end
|
282
|
-
return @return_value
|
258
|
+
##
|
259
|
+
# Clean keys - return a subset of a hash with keys as symbols.
|
260
|
+
def clean_keys(hash, *keys)
|
261
|
+
new_hash = hash.transform_keys { |k| k.tr('-', '_').to_sym }
|
262
|
+
new_hash.slice(*keys)
|
283
263
|
end
|
284
264
|
end
|
285
265
|
end
|
@@ -3,129 +3,113 @@
|
|
3
3
|
require 'sitediff/cache'
|
4
4
|
require 'sitediff/config'
|
5
5
|
require 'sitediff/crawler'
|
6
|
-
require 'sitediff/rules'
|
7
6
|
require 'pathname'
|
8
7
|
require 'typhoeus'
|
9
8
|
require 'yaml'
|
10
9
|
|
11
10
|
class SiteDiff
|
12
11
|
class Config
|
12
|
+
##
|
13
|
+
# SiteDiff Config Creator Object.
|
13
14
|
class Creator
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
@
|
18
|
-
@
|
19
|
-
@after =
|
20
|
-
@before = urls.pop # May be nil
|
21
|
-
@curl_opts = curl_opts
|
15
|
+
##
|
16
|
+
# Creates a Creator object.
|
17
|
+
def initialize(debug, before, after)
|
18
|
+
@config = nil
|
19
|
+
@before = before
|
20
|
+
@after = after
|
22
21
|
@debug = debug
|
23
22
|
end
|
24
23
|
|
24
|
+
##
|
25
|
+
# Determine if we're dealing with one or two URLs.
|
25
26
|
def roots
|
26
|
-
@roots =
|
27
|
-
|
28
|
-
|
29
|
-
r
|
30
|
-
end
|
27
|
+
@roots = { 'after' => @after }
|
28
|
+
@roots['before'] = @before || @after
|
29
|
+
@roots
|
31
30
|
end
|
32
31
|
|
33
|
-
|
34
|
-
|
32
|
+
##
|
33
|
+
# Build a config structure, return it.
|
34
|
+
def create(options)
|
35
35
|
@config = {}
|
36
|
-
@callback = block
|
37
|
-
@dir = Pathname.new(opts[:directory])
|
38
36
|
|
39
|
-
#
|
40
|
-
|
41
|
-
@
|
37
|
+
# @callback = block
|
38
|
+
|
39
|
+
@dir = Pathname.new(options[:directory])
|
42
40
|
|
43
41
|
# Setup instance vars
|
44
42
|
@paths = Hash.new { |h, k| h[k] = Set.new }
|
45
43
|
@cache = Cache.new(directory: @dir.to_s, create: true)
|
46
44
|
@cache.write_tags << :before << :after
|
47
45
|
|
48
|
-
build_config
|
46
|
+
build_config options
|
49
47
|
write_config
|
50
48
|
end
|
51
49
|
|
52
|
-
|
50
|
+
##
|
51
|
+
# Build and populate the config object which is being created.
|
52
|
+
#
|
53
|
+
# @param [String] options
|
54
|
+
# One or more options.
|
55
|
+
def build_config(options)
|
56
|
+
options = Config.stringify_keys options
|
57
|
+
|
58
|
+
# Build config for "before" and "after".
|
53
59
|
%w[before after].each do |tag|
|
54
|
-
next unless (
|
60
|
+
next unless (url = roots[tag])
|
55
61
|
|
56
|
-
@config[tag] = { 'url' =>
|
62
|
+
@config[tag] = { 'url' => url }
|
57
63
|
end
|
58
64
|
|
59
|
-
|
60
|
-
@
|
61
|
-
|
62
|
-
|
63
|
-
end
|
64
|
-
|
65
|
-
def crawl(depth = nil)
|
66
|
-
hydra = Typhoeus::Hydra.new(max_concurrency: @concurrency)
|
67
|
-
roots.each do |tag, u|
|
68
|
-
Crawler.new(hydra, u, @interval, @whitelist, @blacklist, depth, @curl_opts, @debug) do |info|
|
69
|
-
crawled_path(tag, info)
|
70
|
-
end
|
65
|
+
# Build other settings.
|
66
|
+
@config['settings'] = {}
|
67
|
+
Config::ALLOWED_SETTINGS_KEYS.each do |key|
|
68
|
+
@config['settings'][key] = options[key]
|
71
69
|
end
|
72
|
-
hydra.run
|
73
70
|
end
|
74
71
|
|
75
|
-
|
76
|
-
|
77
|
-
def altered_paths(path)
|
78
|
-
yield path + '/'
|
79
|
-
yield path.sub(%r{/$}, '')
|
80
|
-
end
|
81
|
-
|
82
|
-
path.empty? ? '/' : path
|
83
|
-
end
|
84
|
-
|
85
|
-
def crawled_path(tag, info)
|
86
|
-
path, dup = canonicalize(tag, info.relative)
|
87
|
-
return if dup
|
88
|
-
|
89
|
-
res = info.read_result
|
90
|
-
|
91
|
-
@callback[tag, info]
|
92
|
-
@paths[tag] << path
|
93
|
-
@cache.set(tag, path, res)
|
94
|
-
|
95
|
-
# If single-site, cache after as before!
|
96
|
-
@cache.set(:before, path, res) unless roots[:before]
|
97
|
-
|
98
|
-
# This is used to populate the list of rules we guess are
|
99
|
-
# applicable to the current site.
|
100
|
-
@rules.handle_page(tag, res.content, info.document) if @rules && !res.error
|
101
|
-
end
|
102
|
-
|
103
|
-
# Create a gitignore if we seem to be in git
|
72
|
+
##
|
73
|
+
# Create a gitignore if we seem to be in git.
|
104
74
|
def make_gitignore(dir)
|
105
75
|
# Check if we're in git
|
106
|
-
|
107
|
-
|
108
|
-
dir.+('.gitignore').open('w') do |f|
|
109
|
-
f.puts <<-GITIGNORE.gsub(/^\s+/, '')
|
110
|
-
output
|
111
|
-
cache.db
|
112
|
-
cache.db.db
|
113
|
-
GITIGNORE
|
76
|
+
unless dir.realpath.to_enum(:ascend).any? { |d| Dir.exist?("#{d}/.git") }
|
77
|
+
return
|
114
78
|
end
|
79
|
+
|
80
|
+
f = File.open("#{dir}/.gitignore", 'w')
|
81
|
+
f.puts <<-GITIGNORE.gsub(/^\s+/, '')
|
82
|
+
# Directories.
|
83
|
+
diffs
|
84
|
+
snapshot
|
85
|
+
|
86
|
+
# Files.
|
87
|
+
settings.yaml
|
88
|
+
paths.txt
|
89
|
+
failures.txt
|
90
|
+
GITIGNORE
|
91
|
+
f.close
|
115
92
|
end
|
116
93
|
|
94
|
+
##
|
95
|
+
# Returns the name of the config directory.
|
117
96
|
def directory
|
118
97
|
@dir
|
119
98
|
end
|
120
99
|
|
100
|
+
##
|
101
|
+
# Returns the name of the config file.
|
121
102
|
def config_file
|
122
103
|
@dir + Config::DEFAULT_FILENAME
|
123
104
|
end
|
124
105
|
|
125
|
-
|
106
|
+
##
|
107
|
+
# Writes the built config into the config file.
|
108
|
+
# TODO: Exclude default params before writing.
|
126
109
|
def write_config
|
127
110
|
make_gitignore(@dir)
|
128
|
-
|
111
|
+
data = Config.remove_defaults(@config)
|
112
|
+
config_file.open('w') { |f| f.puts data.to_yaml }
|
129
113
|
end
|
130
114
|
end
|
131
115
|
end
|