wgit 0.10.7 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +44 -1
- data/CONTRIBUTING.md +1 -1
- data/README.md +22 -2
- data/bin/wgit +3 -1
- data/lib/wgit/assertable.rb +2 -2
- data/lib/wgit/crawler.rb +56 -34
- data/lib/wgit/database/database.rb +64 -52
- data/lib/wgit/document.rb +67 -39
- data/lib/wgit/document_extractors.rb +15 -1
- data/lib/wgit/dsl.rb +16 -20
- data/lib/wgit/indexer.rb +157 -63
- data/lib/wgit/logger.rb +1 -1
- data/lib/wgit/response.rb +21 -6
- data/lib/wgit/robots_parser.rb +193 -0
- data/lib/wgit/url.rb +118 -51
- data/lib/wgit/utils.rb +81 -28
- data/lib/wgit/version.rb +1 -1
- data/lib/wgit.rb +1 -0
- metadata +33 -38
data/lib/wgit/url.rb
CHANGED
@@ -28,6 +28,9 @@ module Wgit
|
|
28
28
|
# The duration of the crawl for this Url (in seconds).
|
29
29
|
attr_accessor :crawl_duration
|
30
30
|
|
31
|
+
# Record the redirects from the initial Url to the final Url.
|
32
|
+
attr_reader :redirects
|
33
|
+
|
31
34
|
# Initializes a new instance of Wgit::Url which models a web based
|
32
35
|
# HTTP URL.
|
33
36
|
#
|
@@ -57,12 +60,14 @@ module Wgit
|
|
57
60
|
crawled = obj.fetch('crawled', false)
|
58
61
|
date_crawled = obj.fetch('date_crawled', nil)
|
59
62
|
crawl_duration = obj.fetch('crawl_duration', nil)
|
63
|
+
redirects = obj.fetch('redirects', {})
|
60
64
|
end
|
61
65
|
|
62
66
|
@uri = Addressable::URI.parse(url)
|
63
67
|
@crawled = crawled
|
64
68
|
@date_crawled = date_crawled
|
65
69
|
@crawl_duration = crawl_duration
|
70
|
+
@redirects = redirects || {}
|
66
71
|
|
67
72
|
super(url)
|
68
73
|
end
|
@@ -107,6 +112,34 @@ Addressable::URI::InvalidURIError")
|
|
107
112
|
nil
|
108
113
|
end
|
109
114
|
|
115
|
+
# Overrides String#inspect to distingiush this Url from a String.
|
116
|
+
#
|
117
|
+
# @return [String] A short textual representation of this Url.
|
118
|
+
def inspect
|
119
|
+
"#<Wgit::Url url=\"#{self}\" crawled=#{@crawled}>"
|
120
|
+
end
|
121
|
+
|
122
|
+
# Overrides String#replace setting the new_url @uri and String value.
|
123
|
+
#
|
124
|
+
# @param new_url [Wgit::Url, String] The new URL value.
|
125
|
+
# @return [String] The new URL value once set.
|
126
|
+
def replace(new_url)
|
127
|
+
@uri = Addressable::URI.parse(new_url)
|
128
|
+
|
129
|
+
super(new_url)
|
130
|
+
end
|
131
|
+
|
132
|
+
# Overrides String#concat which oddly returns a Wgit::Url object, and
|
133
|
+
# instead returns a String. Therefore this method works the same as if
|
134
|
+
# you call String#concat, or its alias String#+, which is desired for
|
135
|
+
# this method. If you want to join two Urls, use Wgit::Url#join method.
|
136
|
+
#
|
137
|
+
# @param other [String] The String to concat onto this one.
|
138
|
+
# @return [String] The new concatted String, not a Wgit::Url.
|
139
|
+
def concat(other)
|
140
|
+
to_s.concat(other.to_s)
|
141
|
+
end
|
142
|
+
|
110
143
|
# Sets the @crawled instance var, also setting @date_crawled for
|
111
144
|
# convenience.
|
112
145
|
#
|
@@ -117,14 +150,48 @@ Addressable::URI::InvalidURIError")
|
|
117
150
|
@date_crawled = bool ? Wgit::Utils.time_stamp : nil
|
118
151
|
end
|
119
152
|
|
120
|
-
#
|
153
|
+
# Sets the @redirects instance var, mapping any Strings into Wgit::Urls.
|
121
154
|
#
|
122
|
-
# @param
|
123
|
-
|
124
|
-
|
125
|
-
@uri = Addressable::URI.parse(new_url)
|
155
|
+
# @param redirects [Hash] The redirects Hash to set for this Url.
|
156
|
+
def redirects=(redirects)
|
157
|
+
assert_type(redirects, Hash)
|
126
158
|
|
127
|
-
|
159
|
+
map_to_url = proc do |url|
|
160
|
+
Wgit::Url.new(url.to_s, crawled: @crawled, date_crawled: @date_crawled)
|
161
|
+
end
|
162
|
+
|
163
|
+
@redirects = redirects
|
164
|
+
.map { |from, to| [map_to_url.call(from), map_to_url.call(to)] }
|
165
|
+
.to_h
|
166
|
+
end
|
167
|
+
|
168
|
+
# Returns the Wgit::Url's starting with the originally requested Url to be
|
169
|
+
# crawled, followed by each redirected to Url, finishing with the final
|
170
|
+
# crawled Url e.g.
|
171
|
+
#
|
172
|
+
# Example Url redirects journey (dictated by the webserver):
|
173
|
+
#
|
174
|
+
# ```
|
175
|
+
# http://example.com => 301 to https://example.com
|
176
|
+
# https://example.com => 301 to https://example.com/
|
177
|
+
# https://example.com/ => 200 OK (no more redirects, crawl complete)
|
178
|
+
# ```
|
179
|
+
#
|
180
|
+
# Would return an Array of Wgit::Url's in the form of:
|
181
|
+
#
|
182
|
+
# ```
|
183
|
+
# %w(
|
184
|
+
# http://example.com
|
185
|
+
# https://example.com
|
186
|
+
# https://example.com/
|
187
|
+
# )
|
188
|
+
# ```
|
189
|
+
#
|
190
|
+
# @return [Array<Wgit::Url>] Each redirected to Url's finishing with the
|
191
|
+
# final (successfully) crawled Url. If no redirects took place, then just
|
192
|
+
# the originally requested Url is returned inside the Array.
|
193
|
+
def redirects_journey
|
194
|
+
[redirects.keys, self].flatten
|
128
195
|
end
|
129
196
|
|
130
197
|
# Returns true if self is a relative Url; false if absolute.
|
@@ -163,7 +230,7 @@ Addressable::URI::InvalidURIError")
|
|
163
230
|
raise 'Url (self) cannot be empty' if empty?
|
164
231
|
|
165
232
|
return false if scheme_relative?
|
166
|
-
return true
|
233
|
+
return true if @uri.relative?
|
167
234
|
|
168
235
|
# Self is absolute but may be relative to the opts param e.g. host.
|
169
236
|
opts.select! { |_k, v| v }
|
@@ -219,22 +286,23 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
219
286
|
!valid?
|
220
287
|
end
|
221
288
|
|
222
|
-
#
|
223
|
-
# modified.
|
289
|
+
# Joins self and other together before returning a new Url. Self is not
|
290
|
+
# modified. Some magic occurs depending on what is being joined, see
|
291
|
+
# the source code for more information.
|
224
292
|
#
|
225
|
-
# @param other [Wgit::Url, String] The other to
|
293
|
+
# @param other [Wgit::Url, String] The other (relative) Url to join to the
|
294
|
+
# end of self.
|
226
295
|
# @return [Wgit::Url] self + separator + other, separator depends on other.
|
227
|
-
def
|
296
|
+
def join(other)
|
228
297
|
other = Wgit::Url.new(other)
|
229
298
|
raise 'other must be relative' unless other.relative?
|
230
299
|
|
231
300
|
other = other.omit_leading_slash
|
232
301
|
separator = %w[# ? .].include?(other[0]) ? '' : '/'
|
302
|
+
separator = '' if end_with?('/')
|
303
|
+
joined = self + separator + other
|
233
304
|
|
234
|
-
|
235
|
-
concatted = omit_trailing_slash.to_s + separator.to_s + other.to_s
|
236
|
-
|
237
|
-
Wgit::Url.new(concatted)
|
305
|
+
Wgit::Url.new(joined)
|
238
306
|
end
|
239
307
|
|
240
308
|
# Normalizes/escapes self and returns a new Wgit::Url. Self isn't modified.
|
@@ -250,7 +318,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
250
318
|
#
|
251
319
|
# If self is absolute then it's returned as is, making this method
|
252
320
|
# idempotent. The doc's `<base>` element is used if present, otherwise
|
253
|
-
# `doc.url` is used as the base; which is
|
321
|
+
# `doc.url` is used as the base; which is joined with self.
|
254
322
|
#
|
255
323
|
# Typically used to build an absolute link obtained from a document.
|
256
324
|
#
|
@@ -260,7 +328,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
260
328
|
#
|
261
329
|
# link.make_absolute(doc) # => "http://example.com/favicon.png"
|
262
330
|
#
|
263
|
-
# @param doc [Wgit::Document] The doc whose base Url is
|
331
|
+
# @param doc [Wgit::Document] The doc whose base Url is joined with
|
264
332
|
# self.
|
265
333
|
# @raise [StandardError] If doc isn't a Wgit::Document or if `doc.base_url`
|
266
334
|
# raises an Exception.
|
@@ -272,7 +340,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
272
340
|
|
273
341
|
return prefix_scheme(doc.url.to_scheme&.to_sym) if scheme_relative?
|
274
342
|
|
275
|
-
absolute? ? self : doc.base_url(link: self).
|
343
|
+
absolute? ? self : doc.base_url(link: self).join(self)
|
276
344
|
end
|
277
345
|
|
278
346
|
# Returns self having prefixed a scheme/protocol. Doesn't modify receiver.
|
@@ -420,7 +488,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
420
488
|
return nil if path.nil? || path.empty?
|
421
489
|
return Wgit::Url.new('/') if path == '/'
|
422
490
|
|
423
|
-
Wgit::Url.new(path).
|
491
|
+
Wgit::Url.new(path).omit_leading_slash
|
424
492
|
end
|
425
493
|
|
426
494
|
# Returns the endpoint of this URL e.g. the bit after the host with any
|
@@ -432,7 +500,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
432
500
|
# an endpoint, / is returned.
|
433
501
|
def to_endpoint
|
434
502
|
endpoint = @uri.path
|
435
|
-
endpoint =
|
503
|
+
endpoint = "/#{endpoint}" unless endpoint.start_with?('/')
|
436
504
|
Wgit::Url.new(endpoint)
|
437
505
|
end
|
438
506
|
|
@@ -477,7 +545,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
477
545
|
#
|
478
546
|
# @return [Wgit::Url, nil] Containing just the extension string or nil.
|
479
547
|
def to_extension
|
480
|
-
path = to_path
|
548
|
+
path = to_path&.omit_trailing_slash
|
481
549
|
return nil unless path
|
482
550
|
|
483
551
|
segs = path.split('.')
|
@@ -523,7 +591,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
523
591
|
#
|
524
592
|
# @return [Wgit::Url] Self without a trailing slash.
|
525
593
|
def omit_leading_slash
|
526
|
-
start_with?('/') ? Wgit::Url.new(self[1
|
594
|
+
start_with?('/') ? Wgit::Url.new(self[1..]) : self
|
527
595
|
end
|
528
596
|
|
529
597
|
# Returns a new Wgit::Url containing self without a trailing slash. Is
|
@@ -557,7 +625,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
557
625
|
|
558
626
|
return self if ['', '/'].include?(omit_base)
|
559
627
|
|
560
|
-
Wgit::Url.new(omit_base).
|
628
|
+
Wgit::Url.new(omit_base).omit_leading_slash
|
561
629
|
end
|
562
630
|
|
563
631
|
# Returns a new Wgit::Url with the origin (base + port) removed e.g. Given
|
@@ -572,7 +640,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
572
640
|
|
573
641
|
return self if ['', '/'].include?(omit_origin)
|
574
642
|
|
575
|
-
Wgit::Url.new(omit_origin).
|
643
|
+
Wgit::Url.new(omit_origin).omit_leading_slash
|
576
644
|
end
|
577
645
|
|
578
646
|
# Returns a new Wgit::Url with the query string portion removed e.g. Given
|
@@ -635,32 +703,31 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
635
703
|
start_with?('//')
|
636
704
|
end
|
637
705
|
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
alias sub_domain to_sub_domain
|
706
|
+
alias_method :crawled?, :crawled
|
707
|
+
alias_method :is_relative?, :relative?
|
708
|
+
alias_method :is_absolute?, :absolute?
|
709
|
+
alias_method :is_valid?, :valid?
|
710
|
+
alias_method :is_query?, :query?
|
711
|
+
alias_method :is_fragment?, :fragment?
|
712
|
+
alias_method :is_index?, :index?
|
713
|
+
alias_method :is_scheme_relative?, :scheme_relative?
|
714
|
+
alias_method :uri, :to_uri
|
715
|
+
alias_method :url, :to_url
|
716
|
+
alias_method :scheme, :to_scheme
|
717
|
+
alias_method :host, :to_host
|
718
|
+
alias_method :port, :to_port
|
719
|
+
alias_method :domain, :to_domain
|
720
|
+
alias_method :brand, :to_brand
|
721
|
+
alias_method :base, :to_base
|
722
|
+
alias_method :origin, :to_origin
|
723
|
+
alias_method :path, :to_path
|
724
|
+
alias_method :endpoint, :to_endpoint
|
725
|
+
alias_method :query, :to_query
|
726
|
+
alias_method :query_hash, :to_query_hash
|
727
|
+
alias_method :fragment, :to_fragment
|
728
|
+
alias_method :extension, :to_extension
|
729
|
+
alias_method :user, :to_user
|
730
|
+
alias_method :password, :to_password
|
731
|
+
alias_method :sub_domain, :to_sub_domain
|
665
732
|
end
|
666
733
|
end
|
data/lib/wgit/utils.rb
CHANGED
@@ -23,7 +23,7 @@ module Wgit
|
|
23
23
|
obj.instance_variables.each do |var|
|
24
24
|
next if ignore.include?(var.to_s)
|
25
25
|
|
26
|
-
key = var.to_s[1
|
26
|
+
key = var.to_s[1..] # Remove the @ prefix.
|
27
27
|
key = key.to_sym unless use_strings_as_keys
|
28
28
|
hash[key] = obj.instance_variable_get(var)
|
29
29
|
end
|
@@ -37,9 +37,9 @@ module Wgit
|
|
37
37
|
# @yield [el] Gives each element (Object) of obj_or_objects if it's
|
38
38
|
# Enumerable, otherwise obj_or_objs itself is given.
|
39
39
|
# @return [Object] The obj_or_objs parameter is returned.
|
40
|
-
def self.each(obj_or_objs)
|
40
|
+
def self.each(obj_or_objs, &block)
|
41
41
|
if obj_or_objs.respond_to?(:each)
|
42
|
-
obj_or_objs.each
|
42
|
+
obj_or_objs.each(&block)
|
43
43
|
else
|
44
44
|
yield(obj_or_objs)
|
45
45
|
end
|
@@ -129,15 +129,13 @@ module Wgit
|
|
129
129
|
# Prints out the search results in a search engine like format.
|
130
130
|
# The format for each result looks like:
|
131
131
|
#
|
132
|
+
# ```
|
132
133
|
# Title
|
133
|
-
#
|
134
134
|
# Keywords (if there are some)
|
135
|
-
#
|
136
135
|
# Text Snippet (formatted to show the searched for query, if provided)
|
137
|
-
#
|
138
136
|
# URL
|
139
|
-
#
|
140
137
|
# <empty_line_seperator>
|
138
|
+
# ```
|
141
139
|
#
|
142
140
|
# @param results [Array<Wgit::Document>] Array of Wgit::Document's which
|
143
141
|
# each have had #search!(query) called (to update it's @text with the
|
@@ -147,7 +145,7 @@ module Wgit
|
|
147
145
|
# @param stream [#puts] Any object that respond_to?(:puts). It is used
|
148
146
|
# to output text somewhere e.g. a file or STDERR.
|
149
147
|
# @return [Integer] The number of results.
|
150
|
-
def self.
|
148
|
+
def self.pprint_search_results(results, keyword_limit: 5, stream: $stdout)
|
151
149
|
raise 'stream must respond_to? :puts' unless stream.respond_to?(:puts)
|
152
150
|
|
153
151
|
results.each do |doc|
|
@@ -167,56 +165,111 @@ module Wgit
|
|
167
165
|
end
|
168
166
|
|
169
167
|
# Sanitises the obj to make it uniform by calling the correct sanitize_*
|
170
|
-
# method for its type e.g. if obj.is_a? String then
|
171
|
-
# not in the case statement will be ignored and returned as is.
|
168
|
+
# method for its type e.g. if obj.is_a? String then sanitize_str(obj) is called.
|
169
|
+
# Any type not in the case statement will be ignored and returned as is.
|
170
|
+
# Call this method if unsure what obj's type is.
|
172
171
|
#
|
173
172
|
# @param obj [Object] The object to be sanitized.
|
174
173
|
# @param encode [Boolean] Whether or not to encode to UTF-8 replacing
|
175
174
|
# invalid characters.
|
176
|
-
# @return [Object] The sanitized obj
|
175
|
+
# @return [Object] The sanitized obj.
|
177
176
|
def self.sanitize(obj, encode: true)
|
178
177
|
case obj
|
178
|
+
when Wgit::Url
|
179
|
+
sanitize_url(obj, encode:)
|
179
180
|
when String
|
180
|
-
sanitize_str(obj, encode:
|
181
|
+
sanitize_str(obj, encode:)
|
181
182
|
when Array
|
182
|
-
sanitize_arr(obj, encode:
|
183
|
+
sanitize_arr(obj, encode:)
|
183
184
|
else
|
184
185
|
obj
|
185
186
|
end
|
186
187
|
end
|
187
188
|
|
189
|
+
# Sanitises a Wgit::Url to make it uniform. First sanitizes the Url as a
|
190
|
+
# String before replacing the Url value with the sanitized version. This
|
191
|
+
# method therefore modifies the given url param and also returns it.
|
192
|
+
#
|
193
|
+
# @param url [Wgit::Url] The Wgit::Url to sanitize. url is modified.
|
194
|
+
# @param encode [Boolean] Whether or not to encode to UTF-8 replacing
|
195
|
+
# invalid characters.
|
196
|
+
# @return [Wgit::Url] The sanitized url, which is also modified.
|
197
|
+
def self.sanitize_url(url, encode: true)
|
198
|
+
str = sanitize_str(url.to_s, encode:)
|
199
|
+
url.replace(str)
|
200
|
+
end
|
201
|
+
|
188
202
|
# Sanitises a String to make it uniform. Strips any leading/trailing white
|
189
203
|
# space. Also applies UTF-8 encoding (replacing invalid characters) if
|
190
204
|
# `encode: true`.
|
191
205
|
#
|
192
|
-
# @param str [String] The String to sanitize. str is modified.
|
206
|
+
# @param str [String] The String to sanitize. str is not modified.
|
193
207
|
# @param encode [Boolean] Whether or not to encode to UTF-8 replacing
|
194
208
|
# invalid characters.
|
195
|
-
# @return [String] The sanitized str
|
209
|
+
# @return [String] The sanitized str.
|
196
210
|
def self.sanitize_str(str, encode: true)
|
197
|
-
|
198
|
-
str.encode!('UTF-8', undef: :replace, invalid: :replace) if encode
|
199
|
-
str.strip!
|
200
|
-
end
|
211
|
+
return str unless str.is_a?(String)
|
201
212
|
|
202
|
-
str
|
213
|
+
str = str.encode('UTF-8', undef: :replace, invalid: :replace) if encode
|
214
|
+
str.strip
|
203
215
|
end
|
204
216
|
|
205
217
|
# Sanitises an Array to make it uniform. Removes empty Strings and nils,
|
206
218
|
# processes non empty Strings using Wgit::Utils.sanitize and removes
|
207
219
|
# duplicates.
|
208
220
|
#
|
209
|
-
# @param arr [Enumerable] The Array to sanitize. arr is modified.
|
210
|
-
# @return [Enumerable] The sanitized arr
|
221
|
+
# @param arr [Enumerable] The Array to sanitize. arr is not modified.
|
222
|
+
# @return [Enumerable] The sanitized arr.
|
211
223
|
def self.sanitize_arr(arr, encode: true)
|
212
|
-
|
213
|
-
arr.map! { |str| sanitize(str, encode: encode) }
|
214
|
-
arr.reject! { |str| str.is_a?(String) ? str.empty? : false }
|
215
|
-
arr.compact!
|
216
|
-
arr.uniq!
|
217
|
-
end
|
224
|
+
return arr unless arr.is_a?(Array)
|
218
225
|
|
219
226
|
arr
|
227
|
+
.map { |str| sanitize(str, encode:) }
|
228
|
+
.reject { |str| str.is_a?(String) && str.empty? }
|
229
|
+
.compact
|
230
|
+
.uniq
|
231
|
+
end
|
232
|
+
|
233
|
+
# Pretty prints a log statement, used for debugging purposes.
|
234
|
+
#
|
235
|
+
# Use like:
|
236
|
+
#
|
237
|
+
# ```
|
238
|
+
# Wgit::Utils.pprint 1, include_html: include_html, ignore: ignore_vars
|
239
|
+
# ```
|
240
|
+
#
|
241
|
+
# Which produces a log like:
|
242
|
+
#
|
243
|
+
# ```
|
244
|
+
# DEBUG_1 - include_html: true | ignore: ['@html', '@parser']
|
245
|
+
# ```
|
246
|
+
#
|
247
|
+
# @param identifier [#to_s] A log identifier e.g. "START" or 1 etc.
|
248
|
+
# @param stream [#puts] Any object that respond_to? :puts and :print. It is
|
249
|
+
# used to output the log text somewhere e.g. a file or STDERR.
|
250
|
+
# @param prefix [String] The log prefix, useful for visibility/greping.
|
251
|
+
# @param new_line [Boolean] Wether or not to use a new line (\n) as the
|
252
|
+
# separator.
|
253
|
+
# @param vars [Hash<#inspect, #inspect>] The vars to inspect in the log.
|
254
|
+
def self.pprint(identifier, stream: $stdout, prefix: 'DEBUG', new_line: false, **vars)
|
255
|
+
sep1 = new_line ? "\n" : ' - '
|
256
|
+
sep2 = new_line ? "\n" : ' | '
|
257
|
+
|
258
|
+
stream.print "\n#{prefix}_#{identifier}#{sep1}"
|
259
|
+
|
260
|
+
vars.each_with_index do |arr, i|
|
261
|
+
last_item = (i + 1) == vars.size
|
262
|
+
sep3 = sep2
|
263
|
+
sep3 = new_line ? "\n" : '' if last_item
|
264
|
+
k, v = arr
|
265
|
+
|
266
|
+
stream.print "#{k}: #{v}#{sep3}"
|
267
|
+
end
|
268
|
+
|
269
|
+
stream.puts "\n"
|
270
|
+
stream.puts "\n" unless new_line
|
271
|
+
|
272
|
+
nil
|
220
273
|
end
|
221
274
|
end
|
222
275
|
end
|
data/lib/wgit/version.rb
CHANGED
data/lib/wgit.rb
CHANGED
@@ -10,6 +10,7 @@ require_relative 'wgit/document_extractors'
|
|
10
10
|
require_relative 'wgit/crawler'
|
11
11
|
require_relative 'wgit/database/model'
|
12
12
|
require_relative 'wgit/database/database'
|
13
|
+
require_relative 'wgit/robots_parser'
|
13
14
|
require_relative 'wgit/indexer'
|
14
15
|
require_relative 'wgit/dsl'
|
15
16
|
require_relative 'wgit/base'
|