wgit 0.10.8 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +33 -1
- data/CONTRIBUTING.md +1 -1
- data/README.md +3 -1
- data/bin/wgit +3 -1
- data/lib/wgit/assertable.rb +2 -2
- data/lib/wgit/crawler.rb +56 -34
- data/lib/wgit/database/database.rb +64 -52
- data/lib/wgit/document.rb +54 -40
- data/lib/wgit/document_extractors.rb +15 -1
- data/lib/wgit/dsl.rb +16 -20
- data/lib/wgit/indexer.rb +157 -63
- data/lib/wgit/logger.rb +1 -1
- data/lib/wgit/response.rb +21 -6
- data/lib/wgit/robots_parser.rb +193 -0
- data/lib/wgit/url.rb +115 -55
- data/lib/wgit/utils.rb +81 -28
- data/lib/wgit/version.rb +1 -1
- data/lib/wgit.rb +1 -0
- metadata +33 -38
data/lib/wgit/url.rb
CHANGED
@@ -28,6 +28,9 @@ module Wgit
|
|
28
28
|
# The duration of the crawl for this Url (in seconds).
|
29
29
|
attr_accessor :crawl_duration
|
30
30
|
|
31
|
+
# Record the redirects from the initial Url to the final Url.
|
32
|
+
attr_reader :redirects
|
33
|
+
|
31
34
|
# Initializes a new instance of Wgit::Url which models a web based
|
32
35
|
# HTTP URL.
|
33
36
|
#
|
@@ -57,12 +60,14 @@ module Wgit
|
|
57
60
|
crawled = obj.fetch('crawled', false)
|
58
61
|
date_crawled = obj.fetch('date_crawled', nil)
|
59
62
|
crawl_duration = obj.fetch('crawl_duration', nil)
|
63
|
+
redirects = obj.fetch('redirects', {})
|
60
64
|
end
|
61
65
|
|
62
66
|
@uri = Addressable::URI.parse(url)
|
63
67
|
@crawled = crawled
|
64
68
|
@date_crawled = date_crawled
|
65
69
|
@crawl_duration = crawl_duration
|
70
|
+
@redirects = redirects || {}
|
66
71
|
|
67
72
|
super(url)
|
68
73
|
end
|
@@ -107,16 +112,6 @@ Addressable::URI::InvalidURIError")
|
|
107
112
|
nil
|
108
113
|
end
|
109
114
|
|
110
|
-
# Sets the @crawled instance var, also setting @date_crawled for
|
111
|
-
# convenience.
|
112
|
-
#
|
113
|
-
# @param bool [Boolean] True if this Url has been crawled, false otherwise.
|
114
|
-
# @return [Boolean] The value of bool having been set.
|
115
|
-
def crawled=(bool)
|
116
|
-
@crawled = bool
|
117
|
-
@date_crawled = bool ? Wgit::Utils.time_stamp : nil
|
118
|
-
end
|
119
|
-
|
120
115
|
# Overrides String#inspect to distingiush this Url from a String.
|
121
116
|
#
|
122
117
|
# @return [String] A short textual representation of this Url.
|
@@ -134,6 +129,71 @@ Addressable::URI::InvalidURIError")
|
|
134
129
|
super(new_url)
|
135
130
|
end
|
136
131
|
|
132
|
+
# Overrides String#concat which oddly returns a Wgit::Url object, and
|
133
|
+
# instead returns a String. Therefore this method works the same as if
|
134
|
+
# you call String#concat, or its alias String#+, which is desired for
|
135
|
+
# this method. If you want to join two Urls, use Wgit::Url#join method.
|
136
|
+
#
|
137
|
+
# @param other [String] The String to concat onto this one.
|
138
|
+
# @return [String] The new concatted String, not a Wgit::Url.
|
139
|
+
def concat(other)
|
140
|
+
to_s.concat(other.to_s)
|
141
|
+
end
|
142
|
+
|
143
|
+
# Sets the @crawled instance var, also setting @date_crawled for
|
144
|
+
# convenience.
|
145
|
+
#
|
146
|
+
# @param bool [Boolean] True if this Url has been crawled, false otherwise.
|
147
|
+
# @return [Boolean] The value of bool having been set.
|
148
|
+
def crawled=(bool)
|
149
|
+
@crawled = bool
|
150
|
+
@date_crawled = bool ? Wgit::Utils.time_stamp : nil
|
151
|
+
end
|
152
|
+
|
153
|
+
# Sets the @redirects instance var, mapping any Strings into Wgit::Urls.
|
154
|
+
#
|
155
|
+
# @param redirects [Hash] The redirects Hash to set for this Url.
|
156
|
+
def redirects=(redirects)
|
157
|
+
assert_type(redirects, Hash)
|
158
|
+
|
159
|
+
map_to_url = proc do |url|
|
160
|
+
Wgit::Url.new(url.to_s, crawled: @crawled, date_crawled: @date_crawled)
|
161
|
+
end
|
162
|
+
|
163
|
+
@redirects = redirects
|
164
|
+
.map { |from, to| [map_to_url.call(from), map_to_url.call(to)] }
|
165
|
+
.to_h
|
166
|
+
end
|
167
|
+
|
168
|
+
# Returns the Wgit::Url's starting with the originally requested Url to be
|
169
|
+
# crawled, followed by each redirected to Url, finishing with the final
|
170
|
+
# crawled Url e.g.
|
171
|
+
#
|
172
|
+
# Example Url redirects journey (dictated by the webserver):
|
173
|
+
#
|
174
|
+
# ```
|
175
|
+
# http://example.com => 301 to https://example.com
|
176
|
+
# https://example.com => 301 to https://example.com/
|
177
|
+
# https://example.com/ => 200 OK (no more redirects, crawl complete)
|
178
|
+
# ```
|
179
|
+
#
|
180
|
+
# Would return an Array of Wgit::Url's in the form of:
|
181
|
+
#
|
182
|
+
# ```
|
183
|
+
# %w(
|
184
|
+
# http://example.com
|
185
|
+
# https://example.com
|
186
|
+
# https://example.com/
|
187
|
+
# )
|
188
|
+
# ```
|
189
|
+
#
|
190
|
+
# @return [Array<Wgit::Url>] Each redirected to Url's finishing with the
|
191
|
+
# final (successfully) crawled Url. If no redirects took place, then just
|
192
|
+
# the originally requested Url is returned inside the Array.
|
193
|
+
def redirects_journey
|
194
|
+
[redirects.keys, self].flatten
|
195
|
+
end
|
196
|
+
|
137
197
|
# Returns true if self is a relative Url; false if absolute.
|
138
198
|
#
|
139
199
|
# An absolute URL must have a scheme prefix e.g.
|
@@ -170,7 +230,7 @@ Addressable::URI::InvalidURIError")
|
|
170
230
|
raise 'Url (self) cannot be empty' if empty?
|
171
231
|
|
172
232
|
return false if scheme_relative?
|
173
|
-
return true
|
233
|
+
return true if @uri.relative?
|
174
234
|
|
175
235
|
# Self is absolute but may be relative to the opts param e.g. host.
|
176
236
|
opts.select! { |_k, v| v }
|
@@ -226,22 +286,23 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
226
286
|
!valid?
|
227
287
|
end
|
228
288
|
|
229
|
-
#
|
230
|
-
# modified.
|
289
|
+
# Joins self and other together before returning a new Url. Self is not
|
290
|
+
# modified. Some magic occurs depending on what is being joined, see
|
291
|
+
# the source code for more information.
|
231
292
|
#
|
232
|
-
# @param other [Wgit::Url, String] The other to
|
293
|
+
# @param other [Wgit::Url, String] The other (relative) Url to join to the
|
294
|
+
# end of self.
|
233
295
|
# @return [Wgit::Url] self + separator + other, separator depends on other.
|
234
|
-
def
|
296
|
+
def join(other)
|
235
297
|
other = Wgit::Url.new(other)
|
236
298
|
raise 'other must be relative' unless other.relative?
|
237
299
|
|
238
300
|
other = other.omit_leading_slash
|
239
301
|
separator = %w[# ? .].include?(other[0]) ? '' : '/'
|
302
|
+
separator = '' if end_with?('/')
|
303
|
+
joined = self + separator + other
|
240
304
|
|
241
|
-
|
242
|
-
concatted = omit_trailing_slash.to_s + separator.to_s + other.to_s
|
243
|
-
|
244
|
-
Wgit::Url.new(concatted)
|
305
|
+
Wgit::Url.new(joined)
|
245
306
|
end
|
246
307
|
|
247
308
|
# Normalizes/escapes self and returns a new Wgit::Url. Self isn't modified.
|
@@ -257,7 +318,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
257
318
|
#
|
258
319
|
# If self is absolute then it's returned as is, making this method
|
259
320
|
# idempotent. The doc's `<base>` element is used if present, otherwise
|
260
|
-
# `doc.url` is used as the base; which is
|
321
|
+
# `doc.url` is used as the base; which is joined with self.
|
261
322
|
#
|
262
323
|
# Typically used to build an absolute link obtained from a document.
|
263
324
|
#
|
@@ -267,7 +328,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
267
328
|
#
|
268
329
|
# link.make_absolute(doc) # => "http://example.com/favicon.png"
|
269
330
|
#
|
270
|
-
# @param doc [Wgit::Document] The doc whose base Url is
|
331
|
+
# @param doc [Wgit::Document] The doc whose base Url is joined with
|
271
332
|
# self.
|
272
333
|
# @raise [StandardError] If doc isn't a Wgit::Document or if `doc.base_url`
|
273
334
|
# raises an Exception.
|
@@ -279,7 +340,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
279
340
|
|
280
341
|
return prefix_scheme(doc.url.to_scheme&.to_sym) if scheme_relative?
|
281
342
|
|
282
|
-
absolute? ? self : doc.base_url(link: self).
|
343
|
+
absolute? ? self : doc.base_url(link: self).join(self)
|
283
344
|
end
|
284
345
|
|
285
346
|
# Returns self having prefixed a scheme/protocol. Doesn't modify receiver.
|
@@ -427,7 +488,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
427
488
|
return nil if path.nil? || path.empty?
|
428
489
|
return Wgit::Url.new('/') if path == '/'
|
429
490
|
|
430
|
-
Wgit::Url.new(path).
|
491
|
+
Wgit::Url.new(path).omit_leading_slash
|
431
492
|
end
|
432
493
|
|
433
494
|
# Returns the endpoint of this URL e.g. the bit after the host with any
|
@@ -439,7 +500,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
439
500
|
# an endpoint, / is returned.
|
440
501
|
def to_endpoint
|
441
502
|
endpoint = @uri.path
|
442
|
-
endpoint =
|
503
|
+
endpoint = "/#{endpoint}" unless endpoint.start_with?('/')
|
443
504
|
Wgit::Url.new(endpoint)
|
444
505
|
end
|
445
506
|
|
@@ -484,7 +545,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
484
545
|
#
|
485
546
|
# @return [Wgit::Url, nil] Containing just the extension string or nil.
|
486
547
|
def to_extension
|
487
|
-
path = to_path
|
548
|
+
path = to_path&.omit_trailing_slash
|
488
549
|
return nil unless path
|
489
550
|
|
490
551
|
segs = path.split('.')
|
@@ -530,7 +591,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
530
591
|
#
|
531
592
|
# @return [Wgit::Url] Self without a trailing slash.
|
532
593
|
def omit_leading_slash
|
533
|
-
start_with?('/') ? Wgit::Url.new(self[1
|
594
|
+
start_with?('/') ? Wgit::Url.new(self[1..]) : self
|
534
595
|
end
|
535
596
|
|
536
597
|
# Returns a new Wgit::Url containing self without a trailing slash. Is
|
@@ -564,7 +625,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
564
625
|
|
565
626
|
return self if ['', '/'].include?(omit_base)
|
566
627
|
|
567
|
-
Wgit::Url.new(omit_base).
|
628
|
+
Wgit::Url.new(omit_base).omit_leading_slash
|
568
629
|
end
|
569
630
|
|
570
631
|
# Returns a new Wgit::Url with the origin (base + port) removed e.g. Given
|
@@ -579,7 +640,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
579
640
|
|
580
641
|
return self if ['', '/'].include?(omit_origin)
|
581
642
|
|
582
|
-
Wgit::Url.new(omit_origin).
|
643
|
+
Wgit::Url.new(omit_origin).omit_leading_slash
|
583
644
|
end
|
584
645
|
|
585
646
|
# Returns a new Wgit::Url with the query string portion removed e.g. Given
|
@@ -642,32 +703,31 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
642
703
|
start_with?('//')
|
643
704
|
end
|
644
705
|
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
alias sub_domain to_sub_domain
|
706
|
+
alias_method :crawled?, :crawled
|
707
|
+
alias_method :is_relative?, :relative?
|
708
|
+
alias_method :is_absolute?, :absolute?
|
709
|
+
alias_method :is_valid?, :valid?
|
710
|
+
alias_method :is_query?, :query?
|
711
|
+
alias_method :is_fragment?, :fragment?
|
712
|
+
alias_method :is_index?, :index?
|
713
|
+
alias_method :is_scheme_relative?, :scheme_relative?
|
714
|
+
alias_method :uri, :to_uri
|
715
|
+
alias_method :url, :to_url
|
716
|
+
alias_method :scheme, :to_scheme
|
717
|
+
alias_method :host, :to_host
|
718
|
+
alias_method :port, :to_port
|
719
|
+
alias_method :domain, :to_domain
|
720
|
+
alias_method :brand, :to_brand
|
721
|
+
alias_method :base, :to_base
|
722
|
+
alias_method :origin, :to_origin
|
723
|
+
alias_method :path, :to_path
|
724
|
+
alias_method :endpoint, :to_endpoint
|
725
|
+
alias_method :query, :to_query
|
726
|
+
alias_method :query_hash, :to_query_hash
|
727
|
+
alias_method :fragment, :to_fragment
|
728
|
+
alias_method :extension, :to_extension
|
729
|
+
alias_method :user, :to_user
|
730
|
+
alias_method :password, :to_password
|
731
|
+
alias_method :sub_domain, :to_sub_domain
|
672
732
|
end
|
673
733
|
end
|
data/lib/wgit/utils.rb
CHANGED
@@ -23,7 +23,7 @@ module Wgit
|
|
23
23
|
obj.instance_variables.each do |var|
|
24
24
|
next if ignore.include?(var.to_s)
|
25
25
|
|
26
|
-
key = var.to_s[1
|
26
|
+
key = var.to_s[1..] # Remove the @ prefix.
|
27
27
|
key = key.to_sym unless use_strings_as_keys
|
28
28
|
hash[key] = obj.instance_variable_get(var)
|
29
29
|
end
|
@@ -37,9 +37,9 @@ module Wgit
|
|
37
37
|
# @yield [el] Gives each element (Object) of obj_or_objects if it's
|
38
38
|
# Enumerable, otherwise obj_or_objs itself is given.
|
39
39
|
# @return [Object] The obj_or_objs parameter is returned.
|
40
|
-
def self.each(obj_or_objs)
|
40
|
+
def self.each(obj_or_objs, &block)
|
41
41
|
if obj_or_objs.respond_to?(:each)
|
42
|
-
obj_or_objs.each
|
42
|
+
obj_or_objs.each(&block)
|
43
43
|
else
|
44
44
|
yield(obj_or_objs)
|
45
45
|
end
|
@@ -129,15 +129,13 @@ module Wgit
|
|
129
129
|
# Prints out the search results in a search engine like format.
|
130
130
|
# The format for each result looks like:
|
131
131
|
#
|
132
|
+
# ```
|
132
133
|
# Title
|
133
|
-
#
|
134
134
|
# Keywords (if there are some)
|
135
|
-
#
|
136
135
|
# Text Snippet (formatted to show the searched for query, if provided)
|
137
|
-
#
|
138
136
|
# URL
|
139
|
-
#
|
140
137
|
# <empty_line_seperator>
|
138
|
+
# ```
|
141
139
|
#
|
142
140
|
# @param results [Array<Wgit::Document>] Array of Wgit::Document's which
|
143
141
|
# each have had #search!(query) called (to update it's @text with the
|
@@ -147,7 +145,7 @@ module Wgit
|
|
147
145
|
# @param stream [#puts] Any object that respond_to?(:puts). It is used
|
148
146
|
# to output text somewhere e.g. a file or STDERR.
|
149
147
|
# @return [Integer] The number of results.
|
150
|
-
def self.
|
148
|
+
def self.pprint_search_results(results, keyword_limit: 5, stream: $stdout)
|
151
149
|
raise 'stream must respond_to? :puts' unless stream.respond_to?(:puts)
|
152
150
|
|
153
151
|
results.each do |doc|
|
@@ -167,56 +165,111 @@ module Wgit
|
|
167
165
|
end
|
168
166
|
|
169
167
|
# Sanitises the obj to make it uniform by calling the correct sanitize_*
|
170
|
-
# method for its type e.g. if obj.is_a? String then
|
171
|
-
# not in the case statement will be ignored and returned as is.
|
168
|
+
# method for its type e.g. if obj.is_a? String then sanitize_str(obj) is called.
|
169
|
+
# Any type not in the case statement will be ignored and returned as is.
|
170
|
+
# Call this method if unsure what obj's type is.
|
172
171
|
#
|
173
172
|
# @param obj [Object] The object to be sanitized.
|
174
173
|
# @param encode [Boolean] Whether or not to encode to UTF-8 replacing
|
175
174
|
# invalid characters.
|
176
|
-
# @return [Object] The sanitized obj
|
175
|
+
# @return [Object] The sanitized obj.
|
177
176
|
def self.sanitize(obj, encode: true)
|
178
177
|
case obj
|
178
|
+
when Wgit::Url
|
179
|
+
sanitize_url(obj, encode:)
|
179
180
|
when String
|
180
|
-
sanitize_str(obj, encode:
|
181
|
+
sanitize_str(obj, encode:)
|
181
182
|
when Array
|
182
|
-
sanitize_arr(obj, encode:
|
183
|
+
sanitize_arr(obj, encode:)
|
183
184
|
else
|
184
185
|
obj
|
185
186
|
end
|
186
187
|
end
|
187
188
|
|
189
|
+
# Sanitises a Wgit::Url to make it uniform. First sanitizes the Url as a
|
190
|
+
# String before replacing the Url value with the sanitized version. This
|
191
|
+
# method therefore modifies the given url param and also returns it.
|
192
|
+
#
|
193
|
+
# @param url [Wgit::Url] The Wgit::Url to sanitize. url is modified.
|
194
|
+
# @param encode [Boolean] Whether or not to encode to UTF-8 replacing
|
195
|
+
# invalid characters.
|
196
|
+
# @return [Wgit::Url] The sanitized url, which is also modified.
|
197
|
+
def self.sanitize_url(url, encode: true)
|
198
|
+
str = sanitize_str(url.to_s, encode:)
|
199
|
+
url.replace(str)
|
200
|
+
end
|
201
|
+
|
188
202
|
# Sanitises a String to make it uniform. Strips any leading/trailing white
|
189
203
|
# space. Also applies UTF-8 encoding (replacing invalid characters) if
|
190
204
|
# `encode: true`.
|
191
205
|
#
|
192
|
-
# @param str [String] The String to sanitize. str is modified.
|
206
|
+
# @param str [String] The String to sanitize. str is not modified.
|
193
207
|
# @param encode [Boolean] Whether or not to encode to UTF-8 replacing
|
194
208
|
# invalid characters.
|
195
|
-
# @return [String] The sanitized str
|
209
|
+
# @return [String] The sanitized str.
|
196
210
|
def self.sanitize_str(str, encode: true)
|
197
|
-
|
198
|
-
str.encode!('UTF-8', undef: :replace, invalid: :replace) if encode
|
199
|
-
str.strip!
|
200
|
-
end
|
211
|
+
return str unless str.is_a?(String)
|
201
212
|
|
202
|
-
str
|
213
|
+
str = str.encode('UTF-8', undef: :replace, invalid: :replace) if encode
|
214
|
+
str.strip
|
203
215
|
end
|
204
216
|
|
205
217
|
# Sanitises an Array to make it uniform. Removes empty Strings and nils,
|
206
218
|
# processes non empty Strings using Wgit::Utils.sanitize and removes
|
207
219
|
# duplicates.
|
208
220
|
#
|
209
|
-
# @param arr [Enumerable] The Array to sanitize. arr is modified.
|
210
|
-
# @return [Enumerable] The sanitized arr
|
221
|
+
# @param arr [Enumerable] The Array to sanitize. arr is not modified.
|
222
|
+
# @return [Enumerable] The sanitized arr.
|
211
223
|
def self.sanitize_arr(arr, encode: true)
|
212
|
-
|
213
|
-
arr.map! { |str| sanitize(str, encode: encode) }
|
214
|
-
arr.reject! { |str| str.is_a?(String) ? str.empty? : false }
|
215
|
-
arr.compact!
|
216
|
-
arr.uniq!
|
217
|
-
end
|
224
|
+
return arr unless arr.is_a?(Array)
|
218
225
|
|
219
226
|
arr
|
227
|
+
.map { |str| sanitize(str, encode:) }
|
228
|
+
.reject { |str| str.is_a?(String) && str.empty? }
|
229
|
+
.compact
|
230
|
+
.uniq
|
231
|
+
end
|
232
|
+
|
233
|
+
# Pretty prints a log statement, used for debugging purposes.
|
234
|
+
#
|
235
|
+
# Use like:
|
236
|
+
#
|
237
|
+
# ```
|
238
|
+
# Wgit::Utils.pprint 1, include_html: include_html, ignore: ignore_vars
|
239
|
+
# ```
|
240
|
+
#
|
241
|
+
# Which produces a log like:
|
242
|
+
#
|
243
|
+
# ```
|
244
|
+
# DEBUG_1 - include_html: true | ignore: ['@html', '@parser']
|
245
|
+
# ```
|
246
|
+
#
|
247
|
+
# @param identifier [#to_s] A log identifier e.g. "START" or 1 etc.
|
248
|
+
# @param stream [#puts] Any object that respond_to? :puts and :print. It is
|
249
|
+
# used to output the log text somewhere e.g. a file or STDERR.
|
250
|
+
# @param prefix [String] The log prefix, useful for visibility/greping.
|
251
|
+
# @param new_line [Boolean] Wether or not to use a new line (\n) as the
|
252
|
+
# separator.
|
253
|
+
# @param vars [Hash<#inspect, #inspect>] The vars to inspect in the log.
|
254
|
+
def self.pprint(identifier, stream: $stdout, prefix: 'DEBUG', new_line: false, **vars)
|
255
|
+
sep1 = new_line ? "\n" : ' - '
|
256
|
+
sep2 = new_line ? "\n" : ' | '
|
257
|
+
|
258
|
+
stream.print "\n#{prefix}_#{identifier}#{sep1}"
|
259
|
+
|
260
|
+
vars.each_with_index do |arr, i|
|
261
|
+
last_item = (i + 1) == vars.size
|
262
|
+
sep3 = sep2
|
263
|
+
sep3 = new_line ? "\n" : '' if last_item
|
264
|
+
k, v = arr
|
265
|
+
|
266
|
+
stream.print "#{k}: #{v}#{sep3}"
|
267
|
+
end
|
268
|
+
|
269
|
+
stream.puts "\n"
|
270
|
+
stream.puts "\n" unless new_line
|
271
|
+
|
272
|
+
nil
|
220
273
|
end
|
221
274
|
end
|
222
275
|
end
|
data/lib/wgit/version.rb
CHANGED
data/lib/wgit.rb
CHANGED
@@ -10,6 +10,7 @@ require_relative 'wgit/document_extractors'
|
|
10
10
|
require_relative 'wgit/crawler'
|
11
11
|
require_relative 'wgit/database/model'
|
12
12
|
require_relative 'wgit/database/database'
|
13
|
+
require_relative 'wgit/robots_parser'
|
13
14
|
require_relative 'wgit/indexer'
|
14
15
|
require_relative 'wgit/dsl'
|
15
16
|
require_relative 'wgit/base'
|