wgit 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +1 -1
- data/CHANGELOG.md +39 -0
- data/LICENSE.txt +1 -1
- data/README.md +118 -323
- data/bin/wgit +9 -5
- data/lib/wgit.rb +3 -1
- data/lib/wgit/assertable.rb +3 -3
- data/lib/wgit/base.rb +30 -0
- data/lib/wgit/crawler.rb +206 -76
- data/lib/wgit/database/database.rb +309 -134
- data/lib/wgit/database/model.rb +10 -3
- data/lib/wgit/document.rb +138 -95
- data/lib/wgit/{document_extensions.rb → document_extractors.rb} +11 -11
- data/lib/wgit/dsl.rb +324 -0
- data/lib/wgit/indexer.rb +65 -162
- data/lib/wgit/response.rb +5 -2
- data/lib/wgit/url.rb +133 -31
- data/lib/wgit/utils.rb +32 -20
- data/lib/wgit/version.rb +2 -1
- metadata +26 -14
data/lib/wgit/response.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module Wgit
|
2
|
-
# Response class
|
2
|
+
# Response class modeling a generic HTTP GET response.
|
3
3
|
class Response
|
4
4
|
# The underlying HTTP adapter/library response object.
|
5
5
|
attr_accessor :adapter_response
|
@@ -69,7 +69,10 @@ module Wgit
|
|
69
69
|
# @param headers [Hash] The new response headers.
|
70
70
|
# @return [Hash] @headers's new value.
|
71
71
|
def headers=(headers)
|
72
|
-
|
72
|
+
unless headers
|
73
|
+
@headers = {}
|
74
|
+
return
|
75
|
+
end
|
73
76
|
|
74
77
|
@headers = headers.map do |k, v|
|
75
78
|
k = k.downcase.gsub('-', '_').to_sym
|
data/lib/wgit/url.rb
CHANGED
@@ -6,15 +6,15 @@ require 'uri'
|
|
6
6
|
require 'addressable/uri'
|
7
7
|
|
8
8
|
module Wgit
|
9
|
-
# Class modeling a web based HTTP URL.
|
9
|
+
# Class modeling/serialising a web based HTTP URL.
|
10
10
|
#
|
11
11
|
# Can be an internal/relative link e.g. "about.html" or an absolute URL
|
12
|
-
# e.g. "http://www.google.co.uk". Is a subclass of String and uses
|
13
|
-
#
|
12
|
+
# e.g. "http://www.google.co.uk". Is a subclass of String and uses `URI` and
|
13
|
+
# `addressable/uri` internally for parsing.
|
14
14
|
#
|
15
|
-
# Most of the methods in this class return new Wgit::Url instances making
|
16
|
-
# method calls chainable e.g. url.omit_base.omit_fragment etc. The
|
17
|
-
# also try to be idempotent where possible.
|
15
|
+
# Most of the methods in this class return new `Wgit::Url` instances making
|
16
|
+
# the method calls chainable e.g. `url.omit_base.omit_fragment` etc. The
|
17
|
+
# methods also try to be idempotent where possible.
|
18
18
|
class Url < String
|
19
19
|
include Assertable
|
20
20
|
|
@@ -28,7 +28,7 @@ module Wgit
|
|
28
28
|
# The duration of the crawl for this Url (in seconds).
|
29
29
|
attr_accessor :crawl_duration
|
30
30
|
|
31
|
-
# Initializes a new instance of Wgit::Url which
|
31
|
+
# Initializes a new instance of Wgit::Url which models a web based
|
32
32
|
# HTTP URL.
|
33
33
|
#
|
34
34
|
# @param url_or_obj [String, Wgit::Url, #fetch#[]] Is either a String
|
@@ -99,10 +99,10 @@ module Wgit
|
|
99
99
|
# @param obj [Object] The object to parse, which #is_a?(String).
|
100
100
|
# @raise [StandardError] If obj.is_a?(String) is false.
|
101
101
|
# @return [Wgit::Url] A Wgit::Url instance or nil (if obj is invalid).
|
102
|
-
def self.
|
102
|
+
def self.parse?(obj)
|
103
103
|
parse(obj)
|
104
104
|
rescue Addressable::URI::InvalidURIError
|
105
|
-
Wgit.logger.debug("Wgit::Url.
|
105
|
+
Wgit.logger.debug("Wgit::Url.parse?('#{obj}') exception: \
|
106
106
|
Addressable::URI::InvalidURIError")
|
107
107
|
nil
|
108
108
|
end
|
@@ -115,8 +115,6 @@ Addressable::URI::InvalidURIError")
|
|
115
115
|
def crawled=(bool)
|
116
116
|
@crawled = bool
|
117
117
|
@date_crawled = bool ? Wgit::Utils.time_stamp : nil
|
118
|
-
|
119
|
-
bool
|
120
118
|
end
|
121
119
|
|
122
120
|
# Overrides String#replace setting the new_url @uri and String value.
|
@@ -146,10 +144,10 @@ Addressable::URI::InvalidURIError")
|
|
146
144
|
# @param opts [Hash] The options with which to check relativity. Only one
|
147
145
|
# opts param should be provided. The provided opts param Url must be
|
148
146
|
# absolute and be prefixed with a scheme. Consider using the output of
|
149
|
-
# Wgit::Url#
|
150
|
-
# @option opts [Wgit::Url, String] :
|
151
|
-
# http://www.google.com/how which gives a
|
152
|
-
# 'http://www.google.com'.
|
147
|
+
# Wgit::Url#to_origin which should work (unless it's nil).
|
148
|
+
# @option opts [Wgit::Url, String] :origin The Url origin e.g.
|
149
|
+
# http://www.google.com:81/how which gives a origin of
|
150
|
+
# 'http://www.google.com:81'.
|
153
151
|
# @option opts [Wgit::Url, String] :host The Url host e.g.
|
154
152
|
# http://www.google.com/how which gives a host of 'www.google.com'.
|
155
153
|
# @option opts [Wgit::Url, String] :domain The Url domain e.g.
|
@@ -160,7 +158,7 @@ Addressable::URI::InvalidURIError")
|
|
160
158
|
# param has been provided.
|
161
159
|
# @return [Boolean] True if relative, false if absolute.
|
162
160
|
def relative?(opts = {})
|
163
|
-
defaults = {
|
161
|
+
defaults = { origin: nil, host: nil, domain: nil, brand: nil }
|
164
162
|
opts = defaults.merge(opts)
|
165
163
|
raise 'Url (self) cannot be empty' if empty?
|
166
164
|
|
@@ -180,8 +178,8 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
180
178
|
end
|
181
179
|
|
182
180
|
case type
|
183
|
-
when :
|
184
|
-
|
181
|
+
when :origin # http://www.google.com:81
|
182
|
+
to_origin == url.to_origin
|
185
183
|
when :host # www.google.com
|
186
184
|
to_host == url.to_host
|
187
185
|
when :domain # google.com
|
@@ -206,8 +204,8 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
206
204
|
# @return [Boolean] True if valid, absolute and crawable, otherwise false.
|
207
205
|
def valid?
|
208
206
|
return false if relative?
|
209
|
-
return false unless
|
210
|
-
return false
|
207
|
+
return false unless to_origin && to_domain
|
208
|
+
return false unless URI::DEFAULT_PARSER.make_regexp.match(normalize)
|
211
209
|
|
212
210
|
true
|
213
211
|
end
|
@@ -238,7 +236,8 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
238
236
|
Wgit::Url.new(concatted)
|
239
237
|
end
|
240
238
|
|
241
|
-
#
|
239
|
+
# Normalizes/escapes self and returns a new Wgit::Url. Self isn't modified.
|
240
|
+
# This should be used before GET'ing the url, in case it has IRI chars.
|
242
241
|
#
|
243
242
|
# @return [Wgit::Url] An escaped version of self.
|
244
243
|
def normalize
|
@@ -249,8 +248,8 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
249
248
|
# modify the receiver.
|
250
249
|
#
|
251
250
|
# If self is absolute then it's returned as is, making this method
|
252
|
-
# idempotent. The doc's
|
253
|
-
# doc.url is used as the base; which is concatted with self.
|
251
|
+
# idempotent. The doc's `<base>` element is used if present, otherwise
|
252
|
+
# `doc.url` is used as the base; which is concatted with self.
|
254
253
|
#
|
255
254
|
# Typically used to build an absolute link obtained from a document.
|
256
255
|
#
|
@@ -258,14 +257,14 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
258
257
|
# link = Wgit::Url.new('/favicon.png')
|
259
258
|
# doc = Wgit::Document.new('http://example.com')
|
260
259
|
#
|
261
|
-
# link.
|
260
|
+
# link.make_absolute(doc) # => "http://example.com/favicon.png"
|
262
261
|
#
|
263
262
|
# @param doc [Wgit::Document] The doc whose base Url is concatted with
|
264
263
|
# self.
|
265
264
|
# @raise [StandardError] If doc isn't a Wgit::Document or if `doc.base_url`
|
266
265
|
# raises an Exception.
|
267
266
|
# @return [Wgit::Url] Self in absolute form.
|
268
|
-
def
|
267
|
+
def make_absolute(doc)
|
269
268
|
assert_type(doc, Wgit::Document)
|
270
269
|
|
271
270
|
absolute? ? self : doc.base_url(link: self).concat(self)
|
@@ -294,8 +293,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
294
293
|
#
|
295
294
|
# @return [Hash] self's instance vars as a Hash.
|
296
295
|
def to_h
|
297
|
-
|
298
|
-
h = Wgit::Utils.to_h(self, ignore: ignore)
|
296
|
+
h = Wgit::Utils.to_h(self, ignore: ['@uri'])
|
299
297
|
Hash[h.to_a.insert(0, ['url', self])] # Insert url at position 0.
|
300
298
|
end
|
301
299
|
|
@@ -338,6 +336,20 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
338
336
|
host ? Wgit::Url.new(host) : nil
|
339
337
|
end
|
340
338
|
|
339
|
+
# Returns a new Wgit::Url containing just the port of this URL e.g.
|
340
|
+
# Given http://www.google.co.uk:443/about.html, '443' is returned.
|
341
|
+
#
|
342
|
+
# @return [Wgit::Url, nil] Containing just the port or nil.
|
343
|
+
def to_port
|
344
|
+
port = @uri.port
|
345
|
+
|
346
|
+
# @uri.port defaults port to 80/443 if missing, so we check for :#{port}.
|
347
|
+
return nil unless port
|
348
|
+
return nil unless include?(":#{port}")
|
349
|
+
|
350
|
+
Wgit::Url.new(port.to_s)
|
351
|
+
end
|
352
|
+
|
341
353
|
# Returns a new Wgit::Url containing just the domain of this URL e.g.
|
342
354
|
# Given http://www.google.co.uk/about.html, google.co.uk is returned.
|
343
355
|
#
|
@@ -347,6 +359,20 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
347
359
|
domain ? Wgit::Url.new(domain) : nil
|
348
360
|
end
|
349
361
|
|
362
|
+
# Returns a new Wgit::Url containing just the sub domain of this URL e.g.
|
363
|
+
# Given http://scripts.dev.google.com, scripts.dev is returned.
|
364
|
+
#
|
365
|
+
# @return [Wgit::Url, nil] Containing just the sub domain or nil.
|
366
|
+
def to_sub_domain
|
367
|
+
return nil unless to_host
|
368
|
+
|
369
|
+
dot_domain = ".#{to_domain}"
|
370
|
+
return nil unless include?(dot_domain)
|
371
|
+
|
372
|
+
sub_domain = to_host.sub(dot_domain, '')
|
373
|
+
Wgit::Url.new(sub_domain)
|
374
|
+
end
|
375
|
+
|
350
376
|
# Returns a new Wgit::Url containing just the brand of this URL e.g.
|
351
377
|
# Given http://www.google.co.uk/about.html, google is returned.
|
352
378
|
#
|
@@ -362,12 +388,24 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
362
388
|
# @return [Wgit::Url, nil] The base of self e.g. http://www.google.co.uk or
|
363
389
|
# nil.
|
364
390
|
def to_base
|
365
|
-
return nil
|
391
|
+
return nil unless @uri.scheme && @uri.host
|
366
392
|
|
367
393
|
base = "#{@uri.scheme}://#{@uri.host}"
|
368
394
|
Wgit::Url.new(base)
|
369
395
|
end
|
370
396
|
|
397
|
+
# Returns only the origin of this URL e.g. the protocol scheme, host and
|
398
|
+
# port combined. For http://localhost:3000/api, http://localhost:3000 gets
|
399
|
+
# returned. If there's no port present, then to_base is returned.
|
400
|
+
#
|
401
|
+
# @return [Wgit::Url, nil] The origin of self or nil.
|
402
|
+
def to_origin
|
403
|
+
return nil unless to_base
|
404
|
+
return to_base unless to_port
|
405
|
+
|
406
|
+
Wgit::Url.new("#{to_base}:#{to_port}")
|
407
|
+
end
|
408
|
+
|
371
409
|
# Returns the path of this URL e.g. the bit after the host without slashes.
|
372
410
|
# For example:
|
373
411
|
# Wgit::Url.new("http://www.google.co.uk/about.html/").to_path returns
|
@@ -396,7 +434,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
396
434
|
end
|
397
435
|
|
398
436
|
# Returns a new Wgit::Url containing just the query string of this URL
|
399
|
-
# e.g. Given http://google.com?q=
|
437
|
+
# e.g. Given http://google.com?q=foo&bar=1, 'q=ruby&bar=1' is returned.
|
400
438
|
#
|
401
439
|
# @return [Wgit::Url, nil] Containing just the query string or nil.
|
402
440
|
def to_query
|
@@ -404,6 +442,24 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
404
442
|
query ? Wgit::Url.new(query) : nil
|
405
443
|
end
|
406
444
|
|
445
|
+
# Returns a Hash containing just the query string parameters of this URL
|
446
|
+
# e.g. Given http://google.com?q=ruby, "{ 'q' => 'ruby' }" is returned.
|
447
|
+
#
|
448
|
+
# @param symbolize_keys [Boolean] The returned Hash keys will be Symbols if
|
449
|
+
# true, Strings otherwise.
|
450
|
+
# @return [Hash<String | Symbol, String>] Containing the query string
|
451
|
+
# params or empty if the URL doesn't contain any query parameters.
|
452
|
+
def to_query_hash(symbolize_keys: false)
|
453
|
+
query_str = to_query
|
454
|
+
return {} unless query_str
|
455
|
+
|
456
|
+
query_str.split('&').each_with_object({}) do |param, hash|
|
457
|
+
k, v = param.split('=')
|
458
|
+
k = k.to_sym if symbolize_keys
|
459
|
+
hash[k] = v
|
460
|
+
end
|
461
|
+
end
|
462
|
+
|
407
463
|
# Returns a new Wgit::Url containing just the fragment string of this URL
|
408
464
|
# e.g. Given http://google.com#about, #about is returned.
|
409
465
|
#
|
@@ -425,6 +481,24 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
425
481
|
segs.length > 1 ? Wgit::Url.new(segs.last) : nil
|
426
482
|
end
|
427
483
|
|
484
|
+
# Returns a new Wgit::Url containing just the username string of this URL
|
485
|
+
# e.g. Given http://me:pass1@example.com, me is returned.
|
486
|
+
#
|
487
|
+
# @return [Wgit::Url, nil] Containing just the user string or nil.
|
488
|
+
def to_user
|
489
|
+
user = @uri.user
|
490
|
+
user ? Wgit::Url.new(user) : nil
|
491
|
+
end
|
492
|
+
|
493
|
+
# Returns a new Wgit::Url containing just the password string of this URL
|
494
|
+
# e.g. Given http://me:pass1@example.com, pass1 is returned.
|
495
|
+
#
|
496
|
+
# @return [Wgit::Url, nil] Containing just the password string or nil.
|
497
|
+
def to_password
|
498
|
+
password = @uri.password
|
499
|
+
password ? Wgit::Url.new(password) : nil
|
500
|
+
end
|
501
|
+
|
428
502
|
# Omits the given URL components from self and returns a new Wgit::Url.
|
429
503
|
#
|
430
504
|
# Calls Addressable::URI#omit underneath and creates a new Wgit::Url from
|
@@ -468,7 +542,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
468
542
|
.omit_trailing_slash
|
469
543
|
end
|
470
544
|
|
471
|
-
# Returns a new Wgit::Url with the base (
|
545
|
+
# Returns a new Wgit::Url with the base (scheme and host) removed e.g. Given
|
472
546
|
# http://google.com/search?q=something#about, search?q=something#about is
|
473
547
|
# returned. If relative and base isn't present then self is returned.
|
474
548
|
# Leading and trailing slashes are always stripped from the return value.
|
@@ -483,6 +557,21 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
483
557
|
Wgit::Url.new(omit_base).omit_slashes
|
484
558
|
end
|
485
559
|
|
560
|
+
# Returns a new Wgit::Url with the origin (base + port) removed e.g. Given
|
561
|
+
# http://google.com:81/search?q=something#about, search?q=something#about is
|
562
|
+
# returned. If relative and base isn't present then self is returned.
|
563
|
+
# Leading and trailing slashes are always stripped from the return value.
|
564
|
+
#
|
565
|
+
# @return [Wgit::Url] Self containing everything after the origin.
|
566
|
+
def omit_origin
|
567
|
+
origin = to_origin
|
568
|
+
omit_origin = origin ? gsub(origin, '') : self
|
569
|
+
|
570
|
+
return self if ['', '/'].include?(omit_origin)
|
571
|
+
|
572
|
+
Wgit::Url.new(omit_origin).omit_slashes
|
573
|
+
end
|
574
|
+
|
486
575
|
# Returns a new Wgit::Url with the query string portion removed e.g. Given
|
487
576
|
# http://google.com/search?q=hello, http://google.com/search is
|
488
577
|
# returned. Self is returned as is if no query string is present. A URL
|
@@ -528,25 +617,38 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
|
|
528
617
|
start_with?('#')
|
529
618
|
end
|
530
619
|
|
620
|
+
# Returns true if self equals '/' a.k.a. index.
|
621
|
+
#
|
622
|
+
# @return [Boolean] True if self equals '/', false otherwise.
|
623
|
+
def index?
|
624
|
+
self == '/'
|
625
|
+
end
|
626
|
+
|
531
627
|
alias + concat
|
532
628
|
alias crawled? crawled
|
533
|
-
alias normalise normalize
|
534
629
|
alias is_relative? relative?
|
535
630
|
alias is_absolute? absolute?
|
536
631
|
alias is_valid? valid?
|
537
632
|
alias is_query? query?
|
538
633
|
alias is_fragment? fragment?
|
634
|
+
alias is_index? index?
|
539
635
|
alias uri to_uri
|
540
636
|
alias url to_url
|
541
637
|
alias scheme to_scheme
|
542
638
|
alias host to_host
|
639
|
+
alias port to_port
|
543
640
|
alias domain to_domain
|
544
641
|
alias brand to_brand
|
545
642
|
alias base to_base
|
643
|
+
alias origin to_origin
|
546
644
|
alias path to_path
|
547
645
|
alias endpoint to_endpoint
|
548
646
|
alias query to_query
|
647
|
+
alias query_hash to_query_hash
|
549
648
|
alias fragment to_fragment
|
550
649
|
alias extension to_extension
|
650
|
+
alias user to_user
|
651
|
+
alias password to_password
|
652
|
+
alias sub_domain to_sub_domain
|
551
653
|
end
|
552
654
|
end
|
data/lib/wgit/utils.rb
CHANGED
@@ -145,7 +145,8 @@ module Wgit
|
|
145
145
|
# @param keyword_limit [Integer] The max amount of keywords to be
|
146
146
|
# outputted to the stream.
|
147
147
|
# @param stream [#puts] Any object that respond_to?(:puts). It is used
|
148
|
-
# to output text somewhere e.g. a file or
|
148
|
+
# to output text somewhere e.g. a file or STDERR.
|
149
|
+
# @return [Integer] The number of results.
|
149
150
|
def self.printf_search_results(results, keyword_limit: 5, stream: STDOUT)
|
150
151
|
raise 'stream must respond_to? :puts' unless stream.respond_to?(:puts)
|
151
152
|
|
@@ -162,18 +163,37 @@ module Wgit
|
|
162
163
|
stream.puts
|
163
164
|
end
|
164
165
|
|
165
|
-
|
166
|
+
results.size
|
166
167
|
end
|
167
168
|
|
168
|
-
#
|
169
|
+
# Sanitises the obj to make it uniform by calling the correct sanitize_*
|
170
|
+
# method for its type e.g. if obj.is_a? String then sanitize(obj). Any type
|
171
|
+
# not in the case statement will be ignored and returned as is.
|
172
|
+
#
|
173
|
+
# @param obj [Object] The object to be sanitized.
|
174
|
+
# @param encode [Boolean] Whether or not to encode to UTF-8 replacing
|
175
|
+
# invalid characters.
|
176
|
+
# @return [Object] The sanitized obj is both modified and then returned.
|
177
|
+
def self.sanitize(obj, encode: true)
|
178
|
+
case obj
|
179
|
+
when String
|
180
|
+
sanitize_str(obj, encode: encode)
|
181
|
+
when Array
|
182
|
+
sanitize_arr(obj, encode: encode)
|
183
|
+
else
|
184
|
+
obj
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# Sanitises a String to make it uniform. Strips any leading/trailing white
|
169
189
|
# space. Also applies UTF-8 encoding (replacing invalid characters) if
|
170
190
|
# `encode: true`.
|
171
191
|
#
|
172
|
-
# @param str [String] The String to
|
192
|
+
# @param str [String] The String to sanitize. str is modified.
|
173
193
|
# @param encode [Boolean] Whether or not to encode to UTF-8 replacing
|
174
194
|
# invalid characters.
|
175
|
-
# @return [String] The
|
176
|
-
def self.
|
195
|
+
# @return [String] The sanitized str is both modified and then returned.
|
196
|
+
def self.sanitize_str(str, encode: true)
|
177
197
|
if str.is_a?(String)
|
178
198
|
str.encode!('UTF-8', undef: :replace, invalid: :replace) if encode
|
179
199
|
str.strip!
|
@@ -182,15 +202,15 @@ module Wgit
|
|
182
202
|
str
|
183
203
|
end
|
184
204
|
|
185
|
-
#
|
186
|
-
# processes non empty Strings using Wgit::Utils.
|
205
|
+
# Sanitises an Array to make it uniform. Removes empty Strings and nils,
|
206
|
+
# processes non empty Strings using Wgit::Utils.sanitize and removes
|
187
207
|
# duplicates.
|
188
208
|
#
|
189
|
-
# @param arr [Enumerable] The Array to
|
190
|
-
# @return [Enumerable] The
|
191
|
-
def self.
|
209
|
+
# @param arr [Enumerable] The Array to sanitize. arr is modified.
|
210
|
+
# @return [Enumerable] The sanitized arr is both modified and then returned.
|
211
|
+
def self.sanitize_arr(arr, encode: true)
|
192
212
|
if arr.is_a?(Array)
|
193
|
-
arr.map! { |str|
|
213
|
+
arr.map! { |str| sanitize(str, encode: encode) }
|
194
214
|
arr.reject! { |str| str.is_a?(String) ? str.empty? : false }
|
195
215
|
arr.compact!
|
196
216
|
arr.uniq!
|
@@ -198,13 +218,5 @@ module Wgit
|
|
198
218
|
|
199
219
|
arr
|
200
220
|
end
|
201
|
-
|
202
|
-
# Returns the model having removed non bson types (for use with MongoDB).
|
203
|
-
#
|
204
|
-
# @param model_hash [Hash] The model Hash to process.
|
205
|
-
# @return [Hash] The model Hash with non bson types removed.
|
206
|
-
def self.remove_non_bson_types(model_hash)
|
207
|
-
model_hash.select { |_k, v| v.respond_to?(:bson_type) }
|
208
|
-
end
|
209
221
|
end
|
210
222
|
end
|
data/lib/wgit/version.rb
CHANGED
@@ -2,10 +2,11 @@
|
|
2
2
|
|
3
3
|
# Wgit is a WWW indexer/scraper which crawls URL's and retrieves their page
|
4
4
|
# contents for later use.
|
5
|
+
#
|
5
6
|
# @author Michael Telford
|
6
7
|
module Wgit
|
7
8
|
# The current gem version of Wgit.
|
8
|
-
VERSION = '0.
|
9
|
+
VERSION = '0.9.0'
|
9
10
|
|
10
11
|
# Returns the current gem version of Wgit as a String.
|
11
12
|
def self.version
|