wgit 0.0.17 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +61 -0
- data/LICENSE.txt +21 -0
- data/README.md +16 -7
- data/TODO.txt +34 -0
- data/lib/wgit.rb +3 -1
- data/lib/wgit/assertable.rb +35 -29
- data/lib/wgit/core_ext.rb +5 -3
- data/lib/wgit/crawler.rb +96 -58
- data/lib/wgit/database/connection_details.rb +4 -2
- data/lib/wgit/database/database.rb +84 -46
- data/lib/wgit/database/model.rb +12 -10
- data/lib/wgit/document.rb +100 -72
- data/lib/wgit/document_extensions.rb +11 -9
- data/lib/wgit/indexer.rb +34 -24
- data/lib/wgit/logger.rb +4 -2
- data/lib/wgit/url.rb +94 -59
- data/lib/wgit/utils.rb +13 -11
- data/lib/wgit/version.rb +3 -1
- metadata +41 -38
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
### Default Document Extensions ###
|
2
4
|
|
3
5
|
# Base.
|
@@ -5,9 +7,9 @@ Wgit::Document.define_extension(
|
|
5
7
|
:base,
|
6
8
|
'//base/@href',
|
7
9
|
singleton: true,
|
8
|
-
text_content_only: true
|
10
|
+
text_content_only: true
|
9
11
|
) do |base|
|
10
|
-
|
12
|
+
Wgit::Url.new(base) if base
|
11
13
|
end
|
12
14
|
|
13
15
|
# Title.
|
@@ -15,7 +17,7 @@ Wgit::Document.define_extension(
|
|
15
17
|
:title,
|
16
18
|
'//title',
|
17
19
|
singleton: true,
|
18
|
-
text_content_only: true
|
20
|
+
text_content_only: true
|
19
21
|
)
|
20
22
|
|
21
23
|
# Author.
|
@@ -23,7 +25,7 @@ Wgit::Document.define_extension(
|
|
23
25
|
:author,
|
24
26
|
'//meta[@name="author"]/@content',
|
25
27
|
singleton: true,
|
26
|
-
text_content_only: true
|
28
|
+
text_content_only: true
|
27
29
|
)
|
28
30
|
|
29
31
|
# Keywords.
|
@@ -31,9 +33,9 @@ Wgit::Document.define_extension(
|
|
31
33
|
:keywords,
|
32
34
|
'//meta[@name="keywords"]/@content',
|
33
35
|
singleton: true,
|
34
|
-
text_content_only: true
|
36
|
+
text_content_only: true
|
35
37
|
) do |keywords, source|
|
36
|
-
if keywords
|
38
|
+
if keywords && (source == :html)
|
37
39
|
keywords = keywords.split(',')
|
38
40
|
Wgit::Utils.process_arr(keywords)
|
39
41
|
end
|
@@ -45,9 +47,9 @@ Wgit::Document.define_extension(
|
|
45
47
|
:links,
|
46
48
|
'//a/@href',
|
47
49
|
singleton: false,
|
48
|
-
text_content_only: true
|
50
|
+
text_content_only: true
|
49
51
|
) do |links|
|
50
|
-
links
|
52
|
+
links&.map! { |link| Wgit::Url.new(link) }
|
51
53
|
end
|
52
54
|
|
53
55
|
# Text.
|
@@ -55,5 +57,5 @@ Wgit::Document.define_extension(
|
|
55
57
|
:text,
|
56
58
|
proc { Wgit::Document.text_elements_xpath },
|
57
59
|
singleton: false,
|
58
|
-
text_content_only: true
|
60
|
+
text_content_only: true
|
59
61
|
)
|
data/lib/wgit/indexer.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'crawler'
|
2
4
|
require_relative 'database/database'
|
3
5
|
|
4
6
|
module Wgit
|
5
|
-
|
6
7
|
# Convience method to index the World Wide Web using
|
7
8
|
# Wgit::Indexer#index_the_web.
|
8
9
|
#
|
@@ -18,7 +19,7 @@ module Wgit
|
|
18
19
|
# scraped from the web (default is 1GB). Note, that this value is used to
|
19
20
|
# determine when to stop crawling; it's not a guarantee of the max data
|
20
21
|
# that will be obtained.
|
21
|
-
def self.index_the_web(max_sites_to_crawl = -1, max_data_size =
|
22
|
+
def self.index_the_web(max_sites_to_crawl = -1, max_data_size = 1_048_576_000)
|
22
23
|
db = Wgit::Database.new
|
23
24
|
indexer = Wgit::Indexer.new(db)
|
24
25
|
indexer.index_the_web(max_sites_to_crawl, max_data_size)
|
@@ -81,7 +82,6 @@ module Wgit
|
|
81
82
|
|
82
83
|
# Class which sets up a crawler and saves the indexed docs to a database.
|
83
84
|
class Indexer
|
84
|
-
|
85
85
|
# The crawler used to scrape the WWW.
|
86
86
|
attr_reader :crawler
|
87
87
|
|
@@ -109,19 +109,19 @@ module Wgit
|
|
109
109
|
# scraped from the web (default is 1GB). Note, that this value is used to
|
110
110
|
# determine when to stop crawling; it's not a guarantee of the max data
|
111
111
|
# that will be obtained.
|
112
|
-
def index_the_web(max_sites_to_crawl = -1, max_data_size =
|
112
|
+
def index_the_web(max_sites_to_crawl = -1, max_data_size = 1_048_576_000)
|
113
113
|
if max_sites_to_crawl < 0
|
114
114
|
Wgit.logger.info("Indexing until the database has been filled or it runs out of \
|
115
115
|
urls to crawl (which might be never).")
|
116
116
|
end
|
117
117
|
site_count = 0
|
118
118
|
|
119
|
-
while keep_crawling?(site_count, max_sites_to_crawl, max_data_size)
|
119
|
+
while keep_crawling?(site_count, max_sites_to_crawl, max_data_size)
|
120
120
|
Wgit.logger.info("Current database size: #{@db.size}")
|
121
121
|
@crawler.urls = @db.uncrawled_urls
|
122
122
|
|
123
123
|
if @crawler.urls.empty?
|
124
|
-
Wgit.logger.info(
|
124
|
+
Wgit.logger.info('No urls to crawl, exiting.')
|
125
125
|
return
|
126
126
|
end
|
127
127
|
Wgit.logger.info("Starting crawl loop for: #{@crawler.urls}")
|
@@ -181,9 +181,7 @@ iteration.")
|
|
181
181
|
|
182
182
|
ext_urls = @crawler.crawl_site(url) do |doc|
|
183
183
|
result = true
|
184
|
-
if block_given?
|
185
|
-
result = yield(doc)
|
186
|
-
end
|
184
|
+
result = yield(doc) if block_given?
|
187
185
|
|
188
186
|
if result
|
189
187
|
if write_doc_to_db(doc)
|
@@ -221,9 +219,7 @@ site: #{url}")
|
|
221
219
|
def index_this_page(url, insert_externals = true)
|
222
220
|
document = @crawler.crawl_page(url) do |doc|
|
223
221
|
result = true
|
224
|
-
if block_given?
|
225
|
-
result = yield(doc)
|
226
|
-
end
|
222
|
+
result = yield(doc) if block_given?
|
227
223
|
|
228
224
|
if result
|
229
225
|
if write_doc_to_db(doc)
|
@@ -244,11 +240,20 @@ site: #{url}")
|
|
244
240
|
nil
|
245
241
|
end
|
246
242
|
|
247
|
-
|
243
|
+
protected
|
248
244
|
|
249
|
-
#
|
245
|
+
# Returns whether or not to keep crawling based on the DB size and current
|
246
|
+
# loop iteration.
|
247
|
+
#
|
248
|
+
# @param site_count [Integer] The current number of crawled sites.
|
249
|
+
# @param max_sites_to_crawl [Integer] The maximum number of sites to crawl
|
250
|
+
# before stopping.
|
251
|
+
# @param max_data_size [Integer] The maximum amount of data to crawl before
|
252
|
+
# stopping.
|
253
|
+
# @return [Boolean] True if the crawl should continue, false otherwise.
|
250
254
|
def keep_crawling?(site_count, max_sites_to_crawl, max_data_size)
|
251
255
|
return false if @db.size >= max_data_size
|
256
|
+
|
252
257
|
# If max_sites_to_crawl is -1 for example then crawl away.
|
253
258
|
if max_sites_to_crawl < 0
|
254
259
|
true
|
@@ -257,8 +262,11 @@ site: #{url}")
|
|
257
262
|
end
|
258
263
|
end
|
259
264
|
|
260
|
-
#
|
261
|
-
# inserts.
|
265
|
+
# Write the doc to the DB. Note that the unique url index on the documents
|
266
|
+
# collection deliberately prevents duplicate inserts.
|
267
|
+
#
|
268
|
+
# @param doc [Wgit::Document] The document to write to the DB.
|
269
|
+
# @return [Boolean] True if the write was successful, false otherwise.
|
262
270
|
def write_doc_to_db(doc)
|
263
271
|
@db.insert(doc)
|
264
272
|
Wgit.logger.info("Saved document for url: #{doc.url}")
|
@@ -268,18 +276,20 @@ site: #{url}")
|
|
268
276
|
false
|
269
277
|
end
|
270
278
|
|
271
|
-
#
|
279
|
+
# Write the urls to the DB. Note that the unique url index on the urls
|
280
|
+
# collection deliberately prevents duplicate inserts.
|
281
|
+
#
|
282
|
+
# @param urls [Array<Wgit::Url>] The urls to write to the DB.
|
283
|
+
# @return [Boolean] True if the write was successful, false otherwise.
|
272
284
|
def write_urls_to_db(urls)
|
273
285
|
count = 0
|
274
286
|
if urls.respond_to?(:each)
|
275
287
|
urls.each do |url|
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
Wgit.logger.info("Url already exists: #{url}")
|
282
|
-
end
|
288
|
+
@db.insert(url)
|
289
|
+
count += 1
|
290
|
+
Wgit.logger.info("Inserted url: #{url}")
|
291
|
+
rescue Mongo::Error::OperationFailure
|
292
|
+
Wgit.logger.info("Url already exists: #{url}")
|
283
293
|
end
|
284
294
|
end
|
285
295
|
count
|
data/lib/wgit/logger.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# FYI: The default logger is set at the bottom of this file.
|
2
4
|
|
3
5
|
require 'logger'
|
@@ -24,7 +26,7 @@ module Wgit
|
|
24
26
|
# @return [Logger] The default Logger instance.
|
25
27
|
def self.default_logger
|
26
28
|
logger = Logger.new(STDOUT, progname: 'wgit', level: :info)
|
27
|
-
logger.formatter = proc do |
|
29
|
+
logger.formatter = proc do |_severity, _datetime, progname, msg|
|
28
30
|
"[#{progname}] #{msg}\n"
|
29
31
|
end
|
30
32
|
logger
|
@@ -33,7 +35,7 @@ module Wgit
|
|
33
35
|
# Sets the default Logger instance to be used by Wgit.
|
34
36
|
# @return [Logger] The default Logger instance.
|
35
37
|
def self.use_default_logger
|
36
|
-
@logger =
|
38
|
+
@logger = default_logger
|
37
39
|
end
|
38
40
|
end
|
39
41
|
|
data/lib/wgit/url.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'utils'
|
2
4
|
require_relative 'assertable'
|
3
5
|
require 'uri'
|
4
6
|
require 'addressable/uri'
|
5
7
|
|
6
8
|
module Wgit
|
7
|
-
|
8
9
|
# Class modeling a web based URL.
|
9
10
|
# Can be an internal/relative link e.g. "about.html" or a full URL
|
10
11
|
# e.g. "http://www.google.co.uk". Is a subclass of String and uses
|
@@ -12,8 +13,9 @@ module Wgit
|
|
12
13
|
class Url < String
|
13
14
|
include Assertable
|
14
15
|
|
15
|
-
# Whether or not the Url has been crawled or not.
|
16
|
-
|
16
|
+
# Whether or not the Url has been crawled or not. A custom crawled= method
|
17
|
+
# is also provided by this class.
|
18
|
+
attr_reader :crawled
|
17
19
|
|
18
20
|
# The date which the Url was crawled.
|
19
21
|
attr_accessor :date_crawled
|
@@ -39,9 +41,9 @@ module Wgit
|
|
39
41
|
obj = url_or_obj
|
40
42
|
assert_respond_to(obj, [:fetch, :[]])
|
41
43
|
|
42
|
-
url = obj.fetch(
|
43
|
-
crawled = obj.fetch(
|
44
|
-
date_crawled = obj[
|
44
|
+
url = obj.fetch('url') # Should always be present.
|
45
|
+
crawled = obj.fetch('crawled', false)
|
46
|
+
date_crawled = obj['date_crawled']
|
45
47
|
end
|
46
48
|
|
47
49
|
@uri = Addressable::URI.parse(url)
|
@@ -56,7 +58,7 @@ module Wgit
|
|
56
58
|
# @param str [String] The URL string to parse.
|
57
59
|
# @return [Wgit::Url] The parsed Url object.
|
58
60
|
def self.parse(str)
|
59
|
-
|
61
|
+
new(str)
|
60
62
|
end
|
61
63
|
|
62
64
|
# Raises an exception if url is not a valid HTTP URL.
|
@@ -65,13 +67,11 @@ module Wgit
|
|
65
67
|
# @raise [RuntimeError] If url is invalid.
|
66
68
|
def self.validate(url)
|
67
69
|
url = Wgit::Url.new(url)
|
68
|
-
if url.relative_link?
|
69
|
-
|
70
|
-
end
|
71
|
-
unless url.start_with?("http://") or url.start_with?("https://")
|
70
|
+
raise "Invalid url (or a relative link): #{url}" if url.relative_link?
|
71
|
+
unless url.start_with?('http://') || url.start_with?('https://')
|
72
72
|
raise "Invalid url (missing protocol prefix): #{url}"
|
73
73
|
end
|
74
|
-
if URI.
|
74
|
+
if URI::DEFAULT_PARSER.make_regexp.match(url.normalise).nil?
|
75
75
|
raise "Invalid url: #{url}"
|
76
76
|
end
|
77
77
|
end
|
@@ -83,7 +83,7 @@ module Wgit
|
|
83
83
|
def self.valid?(url)
|
84
84
|
Wgit::Url.validate(url)
|
85
85
|
true
|
86
|
-
rescue
|
86
|
+
rescue StandardError
|
87
87
|
false
|
88
88
|
end
|
89
89
|
|
@@ -95,7 +95,7 @@ module Wgit
|
|
95
95
|
# @param https [Boolean] Whether the protocol prefix is https or http.
|
96
96
|
# @return [Wgit::Url] The url with a protocol prefix.
|
97
97
|
def self.prefix_protocol(url, https = false)
|
98
|
-
unless url.start_with?(
|
98
|
+
unless url.start_with?('http://') || url.start_with?('https://')
|
99
99
|
if https
|
100
100
|
url.replace("https://#{url}")
|
101
101
|
else
|
@@ -113,7 +113,7 @@ module Wgit
|
|
113
113
|
def self.concat(host, link)
|
114
114
|
host = Wgit::Url.new(host).without_trailing_slash
|
115
115
|
link = Wgit::Url.new(link).without_leading_slash
|
116
|
-
separator = (link.start_with?('#')
|
116
|
+
separator = (link.start_with?('#') || link.start_with?('?')) ? '' : '/'
|
117
117
|
Wgit::Url.new(host + separator + link)
|
118
118
|
end
|
119
119
|
|
@@ -126,26 +126,35 @@ module Wgit
|
|
126
126
|
super(new_url)
|
127
127
|
end
|
128
128
|
|
129
|
-
# Returns true if self is a relative Url.
|
129
|
+
# Returns true if self is a relative Url; false if absolute.
|
130
130
|
#
|
131
131
|
# All external links in a page are expected to have a protocol prefix e.g.
|
132
132
|
# "http://", otherwise the link is treated as an internal link (regardless
|
133
|
-
# of whether it's valid or not). The only exception is if
|
134
|
-
# provided and self is a page belonging to that
|
135
|
-
# is relative.
|
133
|
+
# of whether it's valid or not). The only exception is if an opts arg is
|
134
|
+
# provided and self is a page belonging to that arg type e.g. domain; then
|
135
|
+
# the link is relative.
|
136
136
|
#
|
137
|
-
# @param
|
138
|
-
#
|
137
|
+
# @param opts [Hash] The options with which to check relativity.
|
138
|
+
# @option opts [Wgit::Url, String] :host The Url host e.g.
|
139
|
+
# http://www.google.com/how which gives a host of 'www.google.com'.
|
139
140
|
# The host must be absolute and prefixed with a protocol.
|
140
|
-
# @
|
141
|
-
# http://www.google.com/how which gives a domain of google.com. The
|
141
|
+
# @option opts [Wgit::Url, String] :domain The Url domain e.g.
|
142
|
+
# http://www.google.com/how which gives a domain of 'google.com'. The
|
142
143
|
# domain must be absolute and prefixed with a protocol.
|
143
|
-
# @
|
144
|
+
# @option opts [Wgit::Url, String] :brand The Url brand e.g.
|
145
|
+
# http://www.google.com/how which gives a domain of 'google'. The
|
146
|
+
# brand must be absolute and prefixed with a protocol.
|
144
147
|
# @raise [RuntimeError] If self is invalid e.g. empty.
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
+
# @return [Boolean] True if relative, false if absolute.
|
149
|
+
def is_relative?(opts = {})
|
150
|
+
opts = { host: nil, domain: nil, brand: nil }.merge(opts)
|
148
151
|
|
152
|
+
raise "Invalid link: '#{self}'" if empty?
|
153
|
+
if opts.values.count(nil) < (opts.length - 1)
|
154
|
+
raise "Provide only one of: #{opts.keys}"
|
155
|
+
end
|
156
|
+
|
157
|
+
host = opts[:host]
|
149
158
|
if host
|
150
159
|
host = Wgit::Url.new(host)
|
151
160
|
if host.to_base.nil?
|
@@ -153,6 +162,7 @@ module Wgit
|
|
153
162
|
end
|
154
163
|
end
|
155
164
|
|
165
|
+
domain = opts[:domain]
|
156
166
|
if domain
|
157
167
|
domain = Wgit::Url.new(domain)
|
158
168
|
if domain.to_base.nil?
|
@@ -160,11 +170,22 @@ module Wgit
|
|
160
170
|
end
|
161
171
|
end
|
162
172
|
|
173
|
+
brand = opts[:brand]
|
174
|
+
if brand
|
175
|
+
brand = Wgit::Url.new(brand)
|
176
|
+
if brand.to_base.nil?
|
177
|
+
raise "Invalid brand, must be absolute and contain protocol: #{brand}"
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
163
181
|
if @uri.relative?
|
164
182
|
true
|
165
183
|
else
|
166
184
|
return host ? to_host == host.to_host : false if host
|
167
185
|
return domain ? to_domain == domain.to_domain : false if domain
|
186
|
+
return brand ? to_brand == brand.to_brand : false if brand
|
187
|
+
|
188
|
+
false
|
168
189
|
end
|
169
190
|
end
|
170
191
|
|
@@ -240,11 +261,21 @@ module Wgit
|
|
240
261
|
domain ? Wgit::Url.new(domain) : nil
|
241
262
|
end
|
242
263
|
|
264
|
+
# Returns a new Wgit::Url containing just the brand of this URL e.g.
|
265
|
+
# Given http://www.google.co.uk/about.html, google is returned.
|
266
|
+
#
|
267
|
+
# @return [Wgit::Url, nil] Containing just the brand or nil.
|
268
|
+
def to_brand
|
269
|
+
domain = to_domain
|
270
|
+
domain ? Wgit::Url.new(domain.split('.').first) : nil
|
271
|
+
end
|
272
|
+
|
243
273
|
# Returns only the base of this URL e.g. the protocol and host combined.
|
244
274
|
#
|
245
275
|
# @return [Wgit::Url, nil] Base of self e.g. http://www.google.co.uk or nil.
|
246
276
|
def to_base
|
247
|
-
return nil if @uri.scheme.nil?
|
277
|
+
return nil if @uri.scheme.nil? || @uri.host.nil?
|
278
|
+
|
248
279
|
base = "#{@uri.scheme}://#{@uri.host}"
|
249
280
|
Wgit::Url.new(base)
|
250
281
|
end
|
@@ -257,8 +288,9 @@ module Wgit
|
|
257
288
|
# @return [Wgit::Url, nil] Path of self e.g. about.html or nil.
|
258
289
|
def to_path
|
259
290
|
path = @uri.path
|
260
|
-
return nil if path.nil?
|
291
|
+
return nil if path.nil? || path.empty?
|
261
292
|
return Wgit::Url.new('/') if path == '/'
|
293
|
+
|
262
294
|
Wgit::Url.new(path).without_slashes
|
263
295
|
end
|
264
296
|
|
@@ -300,6 +332,7 @@ module Wgit
|
|
300
332
|
def to_extension
|
301
333
|
path = to_path
|
302
334
|
return nil unless path
|
335
|
+
|
303
336
|
segs = path.split('.')
|
304
337
|
segs.length > 1 ? Wgit::Url.new(segs.last) : nil
|
305
338
|
end
|
@@ -344,6 +377,7 @@ module Wgit
|
|
344
377
|
without_base = base_url ? gsub(base_url, '') : self
|
345
378
|
|
346
379
|
return self if ['', '/'].include?(without_base)
|
380
|
+
|
347
381
|
Wgit::Url.new(without_base).without_slashes
|
348
382
|
end
|
349
383
|
|
@@ -395,36 +429,37 @@ module Wgit
|
|
395
429
|
#
|
396
430
|
# @return [Hash] self's instance vars as a Hash.
|
397
431
|
def to_h
|
398
|
-
ignore = [
|
432
|
+
ignore = ['@uri']
|
399
433
|
h = Wgit::Utils.to_h(self, ignore)
|
400
|
-
Hash[h.to_a.insert(0, [
|
401
|
-
end
|
402
|
-
|
403
|
-
alias
|
404
|
-
alias
|
405
|
-
alias
|
406
|
-
alias
|
407
|
-
alias
|
408
|
-
alias
|
409
|
-
alias
|
410
|
-
alias
|
411
|
-
alias
|
412
|
-
alias
|
413
|
-
alias
|
414
|
-
alias
|
415
|
-
alias
|
416
|
-
alias
|
417
|
-
alias
|
418
|
-
alias
|
419
|
-
alias
|
420
|
-
alias
|
421
|
-
alias
|
422
|
-
alias
|
423
|
-
alias
|
424
|
-
alias
|
425
|
-
alias
|
426
|
-
alias
|
427
|
-
alias
|
428
|
-
alias
|
434
|
+
Hash[h.to_a.insert(0, ['url', self])] # Insert url at position 0.
|
435
|
+
end
|
436
|
+
|
437
|
+
alias uri to_uri
|
438
|
+
alias url to_url
|
439
|
+
alias scheme to_scheme
|
440
|
+
alias to_protocol to_scheme
|
441
|
+
alias protocol to_scheme
|
442
|
+
alias host to_host
|
443
|
+
alias domain to_domain
|
444
|
+
alias brand to_brand
|
445
|
+
alias base to_base
|
446
|
+
alias path to_path
|
447
|
+
alias endpoint to_endpoint
|
448
|
+
alias query_string to_query_string
|
449
|
+
alias query to_query_string
|
450
|
+
alias anchor to_anchor
|
451
|
+
alias to_fragment to_anchor
|
452
|
+
alias fragment to_anchor
|
453
|
+
alias extension to_extension
|
454
|
+
alias without_query without_query_string
|
455
|
+
alias without_fragment without_anchor
|
456
|
+
alias is_query? is_query_string?
|
457
|
+
alias is_fragment? is_anchor?
|
458
|
+
alias relative_link? is_relative?
|
459
|
+
alias internal_link? is_relative?
|
460
|
+
alias is_internal? is_relative?
|
461
|
+
alias relative? is_relative?
|
462
|
+
alias crawled? crawled
|
463
|
+
alias normalize normalise
|
429
464
|
end
|
430
465
|
end
|