wgit 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +21 -0
- data/README.md +334 -0
- data/TODO.txt +35 -0
- data/lib/wgit/assertable.rb +4 -0
- data/lib/wgit/core_ext.rb +4 -2
- data/lib/wgit/crawler.rb +188 -188
- data/lib/wgit/database/database.rb +22 -21
- data/lib/wgit/document.rb +594 -592
- data/lib/wgit/url.rb +306 -278
- data/lib/wgit/version.rb +1 -1
- metadata +6 -3
data/lib/wgit/crawler.rb
CHANGED
@@ -1,188 +1,188 @@
|
|
1
|
-
require_relative 'url'
|
2
|
-
require_relative 'document'
|
3
|
-
require_relative 'utils'
|
4
|
-
require_relative 'assertable'
|
5
|
-
require 'net/http' # requires 'uri'
|
6
|
-
|
7
|
-
module Wgit
|
8
|
-
|
9
|
-
# The Crawler class provides a means of crawling web based URL's, turning
|
10
|
-
# their HTML into Wgit::Document's.
|
11
|
-
class Crawler
|
12
|
-
include Assertable
|
13
|
-
|
14
|
-
# The urls to crawl.
|
15
|
-
attr_reader :urls
|
16
|
-
|
17
|
-
# The docs of the crawled @urls.
|
18
|
-
attr_reader :docs
|
19
|
-
|
20
|
-
# Initializes the Crawler by setting the @urls and @docs.
|
21
|
-
#
|
22
|
-
# @param urls [*Wgit::Url] The URLs to crawl.
|
23
|
-
def initialize(*urls)
|
24
|
-
self.[](*urls)
|
25
|
-
@docs = []
|
26
|
-
end
|
27
|
-
|
28
|
-
# Sets this Crawler's @urls.
|
29
|
-
#
|
30
|
-
# @param urls [Array<Wgit::Url>] The URLs to crawl.
|
31
|
-
def urls=(urls)
|
32
|
-
@urls = []
|
33
|
-
Wgit::Utils.each(urls) { |url| add_url(url) }
|
34
|
-
end
|
35
|
-
|
36
|
-
# Sets this Crawler's @urls.
|
37
|
-
#
|
38
|
-
# @param urls [*Wgit::Url] The URLs to crawl.
|
39
|
-
def [](*urls)
|
40
|
-
# If urls is nil then add_url (when called later) will set @urls = []
|
41
|
-
# so we do nothing here.
|
42
|
-
if not urls.nil?
|
43
|
-
# Due to *urls you can end up with [[url1,url2,url3]] etc. where the
|
44
|
-
# outer array is bogus so we use the inner one only.
|
45
|
-
if urls.is_a?(Enumerable) &&
|
46
|
-
urls.length == 1 &&
|
47
|
-
urls.first.is_a?(Enumerable)
|
48
|
-
urls = urls.first
|
49
|
-
end
|
50
|
-
|
51
|
-
# Here we call urls= method using self because the param name is also
|
52
|
-
# urls which conflicts.
|
53
|
-
self.urls = urls
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
# Adds the url to this Crawler's @urls.
|
58
|
-
#
|
59
|
-
# @param url [Wgit::Url] A URL to crawl.
|
60
|
-
def <<(url)
|
61
|
-
add_url(url)
|
62
|
-
end
|
63
|
-
|
64
|
-
# Crawls individual urls, not entire sites.
|
65
|
-
#
|
66
|
-
# @param urls [Array<Wgit::Url>] The URLs to crawl.
|
67
|
-
# @yield [doc] If provided, the block is given each crawled
|
68
|
-
# Document. Otherwise each doc is added to @docs which can be accessed
|
69
|
-
# by Crawler#docs after this method returns.
|
70
|
-
# @return [Wgit::Document] The last Document crawled.
|
71
|
-
def crawl_urls(urls = @urls, &block)
|
72
|
-
raise "No urls to crawl" unless urls
|
73
|
-
@docs = []
|
74
|
-
doc = nil
|
75
|
-
Wgit::Utils.each(urls) { |url| doc = handle_crawl_block(url, &block) }
|
76
|
-
doc ? doc : @docs.last
|
77
|
-
end
|
78
|
-
|
79
|
-
# Crawl the url and return the response document or nil.
|
80
|
-
#
|
81
|
-
# @param url [Wgit::Document] The URL to crawl.
|
82
|
-
# @yield [doc] The crawled HTML Document regardless if the
|
83
|
-
# crawl was successful or not. Therefore, the Document#url can be used.
|
84
|
-
# @return [Wgit::Document, nil] The crawled HTML Document or nil if the
|
85
|
-
# crawl was unsuccessful.
|
86
|
-
def crawl_url(url = @urls.first)
|
87
|
-
assert_type(url, Wgit::Url)
|
88
|
-
markup = fetch(url)
|
89
|
-
url.crawled = true
|
90
|
-
doc = Wgit::Document.new(url, markup)
|
91
|
-
yield(doc) if block_given?
|
92
|
-
doc.empty? ? nil : doc
|
93
|
-
end
|
94
|
-
|
95
|
-
# Crawls an entire site by recursively going through its internal_links.
|
96
|
-
#
|
97
|
-
# @param base_url [Wgit::Url] The base URL of the website to be crawled.
|
98
|
-
# @yield [doc] Given each crawled Document/page of the site.
|
99
|
-
# A block is the only way to interact with each crawled Document.
|
100
|
-
# @return [Array<Wgit::Url>, nil] Unique Array of external urls collected
|
101
|
-
# from all of the site's pages or nil if the base_url could not be
|
102
|
-
# crawled successfully.
|
103
|
-
def crawl_site(base_url = @urls.first, &block)
|
104
|
-
assert_type(base_url, Wgit::Url)
|
105
|
-
|
106
|
-
doc = crawl_url(base_url, &block)
|
107
|
-
return nil if doc.nil?
|
108
|
-
|
109
|
-
path = base_url.path.
|
110
|
-
crawled_urls = [path]
|
111
|
-
external_urls = doc.external_links
|
112
|
-
internal_urls = doc.internal_links
|
113
|
-
|
114
|
-
return doc.external_links.uniq if internal_urls.empty?
|
115
|
-
|
116
|
-
loop do
|
117
|
-
internal_urls.uniq!
|
118
|
-
|
119
|
-
links = internal_urls - crawled_urls
|
120
|
-
break if links.empty?
|
121
|
-
|
122
|
-
links.each do |link|
|
123
|
-
doc = crawl_url(Wgit::Url.concat(base_url.to_base, link), &block)
|
124
|
-
crawled_urls << link
|
125
|
-
next if doc.nil?
|
126
|
-
internal_urls.concat(doc.internal_links)
|
127
|
-
external_urls.concat(doc.external_links)
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
external_urls.uniq
|
132
|
-
end
|
133
|
-
|
134
|
-
private
|
135
|
-
|
136
|
-
# Add the document to the @docs array for later processing or let the block
|
137
|
-
# process it here and now.
|
138
|
-
def handle_crawl_block(url, &block)
|
139
|
-
if block_given?
|
140
|
-
crawl_url(url, &block)
|
141
|
-
else
|
142
|
-
@docs << crawl_url(url)
|
143
|
-
nil
|
144
|
-
end
|
145
|
-
end
|
146
|
-
|
147
|
-
# The fetch method performs a HTTP GET to obtain the HTML document.
|
148
|
-
# Invalid urls or any HTTP response that doesn't return a HTML body will be
|
149
|
-
# ignored and nil will be returned. Otherwise, the HTML is returned.
|
150
|
-
def fetch(url)
|
151
|
-
response = resolve(url)
|
152
|
-
response.body.empty? ? nil : response.body
|
153
|
-
rescue Exception => ex
|
154
|
-
Wgit.logger.debug(
|
155
|
-
"Wgit::Crawler#fetch('#{url}') exception: #{ex.message}"
|
156
|
-
)
|
157
|
-
nil
|
158
|
-
end
|
159
|
-
|
160
|
-
# The resolve method performs a HTTP GET to obtain the HTML document.
|
161
|
-
# A certain amount of redirects will be followed by default before raising
|
162
|
-
# an exception. Redirects can be disabled by setting `redirect_limit: 1`.
|
163
|
-
# The Net::HTTPResponse will be returned.
|
164
|
-
def resolve(url, redirect_limit: 5)
|
165
|
-
redirect_count = -1
|
166
|
-
begin
|
167
|
-
raise "Too many redirects" if redirect_count >= redirect_limit
|
168
|
-
redirect_count += 1
|
169
|
-
|
170
|
-
response = Net::HTTP.get_response(URI(url))
|
171
|
-
location = Wgit::Url.new(response.fetch('location', ''))
|
172
|
-
if not location.empty?
|
173
|
-
url = location.is_relative? ? url.to_base.concat(location) : location
|
174
|
-
end
|
175
|
-
end while response.is_a?(Net::HTTPRedirection)
|
176
|
-
response
|
177
|
-
end
|
178
|
-
|
179
|
-
# Add the url to @urls ensuring it is cast to a Wgit::Url if necessary.
|
180
|
-
def add_url(url)
|
181
|
-
@urls = [] if @urls.nil?
|
182
|
-
@urls << Wgit::Url.new(url)
|
183
|
-
end
|
184
|
-
|
185
|
-
alias :crawl :crawl_urls
|
186
|
-
alias :crawl_r :crawl_site
|
187
|
-
end
|
188
|
-
end
|
1
|
+
require_relative 'url'
|
2
|
+
require_relative 'document'
|
3
|
+
require_relative 'utils'
|
4
|
+
require_relative 'assertable'
|
5
|
+
require 'net/http' # requires 'uri'
|
6
|
+
|
7
|
+
module Wgit
|
8
|
+
|
9
|
+
# The Crawler class provides a means of crawling web based URL's, turning
|
10
|
+
# their HTML into Wgit::Document's.
|
11
|
+
class Crawler
|
12
|
+
include Assertable
|
13
|
+
|
14
|
+
# The urls to crawl.
|
15
|
+
attr_reader :urls
|
16
|
+
|
17
|
+
# The docs of the crawled @urls.
|
18
|
+
attr_reader :docs
|
19
|
+
|
20
|
+
# Initializes the Crawler by setting the @urls and @docs.
|
21
|
+
#
|
22
|
+
# @param urls [*Wgit::Url] The URLs to crawl.
|
23
|
+
def initialize(*urls)
|
24
|
+
self.[](*urls)
|
25
|
+
@docs = []
|
26
|
+
end
|
27
|
+
|
28
|
+
# Sets this Crawler's @urls.
|
29
|
+
#
|
30
|
+
# @param urls [Array<Wgit::Url>] The URLs to crawl.
|
31
|
+
def urls=(urls)
|
32
|
+
@urls = []
|
33
|
+
Wgit::Utils.each(urls) { |url| add_url(url) }
|
34
|
+
end
|
35
|
+
|
36
|
+
# Sets this Crawler's @urls.
|
37
|
+
#
|
38
|
+
# @param urls [*Wgit::Url] The URLs to crawl.
|
39
|
+
def [](*urls)
|
40
|
+
# If urls is nil then add_url (when called later) will set @urls = []
|
41
|
+
# so we do nothing here.
|
42
|
+
if not urls.nil?
|
43
|
+
# Due to *urls you can end up with [[url1,url2,url3]] etc. where the
|
44
|
+
# outer array is bogus so we use the inner one only.
|
45
|
+
if urls.is_a?(Enumerable) &&
|
46
|
+
urls.length == 1 &&
|
47
|
+
urls.first.is_a?(Enumerable)
|
48
|
+
urls = urls.first
|
49
|
+
end
|
50
|
+
|
51
|
+
# Here we call urls= method using self because the param name is also
|
52
|
+
# urls which conflicts.
|
53
|
+
self.urls = urls
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Adds the url to this Crawler's @urls.
|
58
|
+
#
|
59
|
+
# @param url [Wgit::Url] A URL to crawl.
|
60
|
+
def <<(url)
|
61
|
+
add_url(url)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Crawls individual urls, not entire sites.
|
65
|
+
#
|
66
|
+
# @param urls [Array<Wgit::Url>] The URLs to crawl.
|
67
|
+
# @yield [doc] If provided, the block is given each crawled
|
68
|
+
# Document. Otherwise each doc is added to @docs which can be accessed
|
69
|
+
# by Crawler#docs after this method returns.
|
70
|
+
# @return [Wgit::Document] The last Document crawled.
|
71
|
+
def crawl_urls(urls = @urls, &block)
|
72
|
+
raise "No urls to crawl" unless urls
|
73
|
+
@docs = []
|
74
|
+
doc = nil
|
75
|
+
Wgit::Utils.each(urls) { |url| doc = handle_crawl_block(url, &block) }
|
76
|
+
doc ? doc : @docs.last
|
77
|
+
end
|
78
|
+
|
79
|
+
# Crawl the url and return the response document or nil.
|
80
|
+
#
|
81
|
+
# @param url [Wgit::Document] The URL to crawl.
|
82
|
+
# @yield [doc] The crawled HTML Document regardless if the
|
83
|
+
# crawl was successful or not. Therefore, the Document#url can be used.
|
84
|
+
# @return [Wgit::Document, nil] The crawled HTML Document or nil if the
|
85
|
+
# crawl was unsuccessful.
|
86
|
+
def crawl_url(url = @urls.first)
|
87
|
+
assert_type(url, Wgit::Url)
|
88
|
+
markup = fetch(url)
|
89
|
+
url.crawled = true
|
90
|
+
doc = Wgit::Document.new(url, markup)
|
91
|
+
yield(doc) if block_given?
|
92
|
+
doc.empty? ? nil : doc
|
93
|
+
end
|
94
|
+
|
95
|
+
# Crawls an entire site by recursively going through its internal_links.
|
96
|
+
#
|
97
|
+
# @param base_url [Wgit::Url] The base URL of the website to be crawled.
|
98
|
+
# @yield [doc] Given each crawled Document/page of the site.
|
99
|
+
# A block is the only way to interact with each crawled Document.
|
100
|
+
# @return [Array<Wgit::Url>, nil] Unique Array of external urls collected
|
101
|
+
# from all of the site's pages or nil if the base_url could not be
|
102
|
+
# crawled successfully.
|
103
|
+
def crawl_site(base_url = @urls.first, &block)
|
104
|
+
assert_type(base_url, Wgit::Url)
|
105
|
+
|
106
|
+
doc = crawl_url(base_url, &block)
|
107
|
+
return nil if doc.nil?
|
108
|
+
|
109
|
+
path = base_url.path.nil? ? '/' : base_url.path
|
110
|
+
crawled_urls = [path]
|
111
|
+
external_urls = doc.external_links
|
112
|
+
internal_urls = doc.internal_links
|
113
|
+
|
114
|
+
return doc.external_links.uniq if internal_urls.empty?
|
115
|
+
|
116
|
+
loop do
|
117
|
+
internal_urls.uniq!
|
118
|
+
|
119
|
+
links = internal_urls - crawled_urls
|
120
|
+
break if links.empty?
|
121
|
+
|
122
|
+
links.each do |link|
|
123
|
+
doc = crawl_url(Wgit::Url.concat(base_url.to_base, link), &block)
|
124
|
+
crawled_urls << link
|
125
|
+
next if doc.nil?
|
126
|
+
internal_urls.concat(doc.internal_links)
|
127
|
+
external_urls.concat(doc.external_links)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
external_urls.uniq
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
# Add the document to the @docs array for later processing or let the block
|
137
|
+
# process it here and now.
|
138
|
+
def handle_crawl_block(url, &block)
|
139
|
+
if block_given?
|
140
|
+
crawl_url(url, &block)
|
141
|
+
else
|
142
|
+
@docs << crawl_url(url)
|
143
|
+
nil
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# The fetch method performs a HTTP GET to obtain the HTML document.
|
148
|
+
# Invalid urls or any HTTP response that doesn't return a HTML body will be
|
149
|
+
# ignored and nil will be returned. Otherwise, the HTML is returned.
|
150
|
+
def fetch(url)
|
151
|
+
response = resolve(url)
|
152
|
+
response.body.empty? ? nil : response.body
|
153
|
+
rescue Exception => ex
|
154
|
+
Wgit.logger.debug(
|
155
|
+
"Wgit::Crawler#fetch('#{url}') exception: #{ex.message}"
|
156
|
+
)
|
157
|
+
nil
|
158
|
+
end
|
159
|
+
|
160
|
+
# The resolve method performs a HTTP GET to obtain the HTML document.
|
161
|
+
# A certain amount of redirects will be followed by default before raising
|
162
|
+
# an exception. Redirects can be disabled by setting `redirect_limit: 1`.
|
163
|
+
# The Net::HTTPResponse will be returned.
|
164
|
+
def resolve(url, redirect_limit: 5)
|
165
|
+
redirect_count = -1
|
166
|
+
begin
|
167
|
+
raise "Too many redirects" if redirect_count >= redirect_limit
|
168
|
+
redirect_count += 1
|
169
|
+
|
170
|
+
response = Net::HTTP.get_response(URI(url))
|
171
|
+
location = Wgit::Url.new(response.fetch('location', ''))
|
172
|
+
if not location.empty?
|
173
|
+
url = location.is_relative? ? url.to_base.concat(location) : location
|
174
|
+
end
|
175
|
+
end while response.is_a?(Net::HTTPRedirection)
|
176
|
+
response
|
177
|
+
end
|
178
|
+
|
179
|
+
# Add the url to @urls ensuring it is cast to a Wgit::Url if necessary.
|
180
|
+
def add_url(url)
|
181
|
+
@urls = [] if @urls.nil?
|
182
|
+
@urls << Wgit::Url.new(url)
|
183
|
+
end
|
184
|
+
|
185
|
+
alias :crawl :crawl_urls
|
186
|
+
alias :crawl_r :crawl_site
|
187
|
+
end
|
188
|
+
end
|
@@ -8,7 +8,7 @@ require 'mongo'
|
|
8
8
|
|
9
9
|
module Wgit
|
10
10
|
|
11
|
-
# Class modeling a DB connection and CRUD operations for the Url and
|
11
|
+
# Class modeling a DB connection and CRUD operations for the Url and
|
12
12
|
# Document collections.
|
13
13
|
class Database
|
14
14
|
include Assertable
|
@@ -19,7 +19,7 @@ module Wgit
|
|
19
19
|
def initialize
|
20
20
|
conn_details = Wgit::CONNECTION_DETAILS
|
21
21
|
if conn_details.empty?
|
22
|
-
raise "Wgit::CONNECTION_DETAILS must be defined and include :host,
|
22
|
+
raise "Wgit::CONNECTION_DETAILS must be defined and include :host,
|
23
23
|
:port, :db, :uname, :pword for a database connection to be established."
|
24
24
|
end
|
25
25
|
|
@@ -29,14 +29,14 @@ module Wgit
|
|
29
29
|
Mongo::Logger.logger.level = Logger::ERROR
|
30
30
|
|
31
31
|
address = "#{conn_details[:host]}:#{conn_details[:port]}"
|
32
|
-
@@client = Mongo::Client.new([address],
|
32
|
+
@@client = Mongo::Client.new([address],
|
33
33
|
database: conn_details[:db],
|
34
34
|
user: conn_details[:uname],
|
35
35
|
password: conn_details[:pword])
|
36
36
|
end
|
37
37
|
|
38
38
|
### Create Data ###
|
39
|
-
|
39
|
+
|
40
40
|
# Insert one or more Url or Document objects into the DB.
|
41
41
|
#
|
42
42
|
# @param data [Hash, Enumerable<Hash>] Hash(es) returned from
|
@@ -57,9 +57,9 @@ module Wgit
|
|
57
57
|
raise "data is not in the correct format (all Url's or Document's)"
|
58
58
|
end
|
59
59
|
end
|
60
|
-
|
60
|
+
|
61
61
|
### Retrieve Data ###
|
62
|
-
|
62
|
+
|
63
63
|
# Returns Url records from the DB. All Urls are sorted by date_added
|
64
64
|
# ascending, in other words the first url returned is the first one that
|
65
65
|
# was inserted into the DB.
|
@@ -71,18 +71,18 @@ module Wgit
|
|
71
71
|
# @return [Array<Wgit::Url>] The Urls obtained from the DB.
|
72
72
|
def urls(crawled = nil, limit = 0, skip = 0)
|
73
73
|
crawled.nil? ? query = {} : query = { crawled: crawled }
|
74
|
-
|
74
|
+
|
75
75
|
sort = { date_added: 1 }
|
76
76
|
results = retrieve(:urls, query, sort, {}, limit, skip)
|
77
77
|
return [] if results.count < 1
|
78
|
-
|
78
|
+
|
79
79
|
# results.respond_to? :map! is false so we use map and overwrite the var.
|
80
80
|
results = results.map { |url_doc| Wgit::Url.new(url_doc) }
|
81
81
|
results.each { |url| yield(url) } if block_given?
|
82
|
-
|
82
|
+
|
83
83
|
results
|
84
84
|
end
|
85
|
-
|
85
|
+
|
86
86
|
# Returns Url records that have been crawled.
|
87
87
|
#
|
88
88
|
# @param limit [Integer] The max number of Url's to return. 0 returns all.
|
@@ -127,20 +127,20 @@ module Wgit
|
|
127
127
|
def search(query, whole_sentence = false, limit = 10, skip = 0)
|
128
128
|
query.strip!
|
129
129
|
query.replace("\"" + query + "\"") if whole_sentence
|
130
|
-
|
130
|
+
|
131
131
|
# The sort_proj sorts based on the most search hits.
|
132
132
|
# We use the sort_proj hash as both a sort and a projection below.
|
133
133
|
# :$caseSensitive => case_sensitive, 3.2+ only.
|
134
134
|
sort_proj = { score: { :$meta => "textScore" } }
|
135
135
|
query = { :$text => { :$search => query } }
|
136
|
-
|
136
|
+
|
137
137
|
results = retrieve(:documents, query, sort_proj, sort_proj, limit, skip)
|
138
138
|
return [] if results.count < 1 # respond_to? :empty? == false
|
139
|
-
|
139
|
+
|
140
140
|
# results.respond_to? :map! is false so we use map and overwrite the var.
|
141
141
|
results = results.map { |mongo_doc| Wgit::Document.new(mongo_doc) }
|
142
142
|
results.each { |doc| yield(doc) } if block_given?
|
143
|
-
|
143
|
+
|
144
144
|
results
|
145
145
|
end
|
146
146
|
|
@@ -150,7 +150,7 @@ module Wgit
|
|
150
150
|
def stats
|
151
151
|
@@client.command(dbStats: 0).documents[0]
|
152
152
|
end
|
153
|
-
|
153
|
+
|
154
154
|
# Returns the current size of the database.
|
155
155
|
#
|
156
156
|
# @return [Integer] The current size of the DB.
|
@@ -201,7 +201,7 @@ module Wgit
|
|
201
201
|
end
|
202
202
|
|
203
203
|
### Update Data ###
|
204
|
-
|
204
|
+
|
205
205
|
# Update a Url or Document object in the DB.
|
206
206
|
#
|
207
207
|
# @param data [Hash, Enumerable<Hash>] Hash(es) returned from
|
@@ -254,7 +254,7 @@ module Wgit
|
|
254
254
|
end
|
255
255
|
create(:urls, url_or_urls)
|
256
256
|
end
|
257
|
-
|
257
|
+
|
258
258
|
# Insert one or more Document objects into the DB.
|
259
259
|
def insert_docs(doc_or_docs)
|
260
260
|
unless doc_or_docs.respond_to?(:map)
|
@@ -270,7 +270,7 @@ module Wgit
|
|
270
270
|
end
|
271
271
|
create(:documents, doc_or_docs)
|
272
272
|
end
|
273
|
-
|
273
|
+
|
274
274
|
# Create/insert one or more Url or Document records into the DB.
|
275
275
|
def create(collection, data)
|
276
276
|
assert_type(data, [Hash, Array])
|
@@ -324,9 +324,9 @@ module Wgit
|
|
324
324
|
update = { "$set" => doc_hash }
|
325
325
|
_update(true, :documents, selection, update)
|
326
326
|
end
|
327
|
-
|
327
|
+
|
328
328
|
# Update one or more Url or Document records in the DB.
|
329
|
-
# NOTE: The Model.common_update_data should be merged in the calling
|
329
|
+
# NOTE: The Model.common_update_data should be merged in the calling
|
330
330
|
# method as the update param can be bespoke due to its nature.
|
331
331
|
def _update(single, collection, selection, update)
|
332
332
|
assert_arr_types([selection, update], Hash)
|
@@ -338,12 +338,13 @@ module Wgit
|
|
338
338
|
raise "DB write (update) failed" unless write_succeeded?(result)
|
339
339
|
result.n
|
340
340
|
end
|
341
|
-
|
341
|
+
|
342
342
|
alias :count :size
|
343
343
|
alias :length :size
|
344
344
|
alias :num_documents :num_docs
|
345
345
|
alias :document? :doc?
|
346
346
|
alias :insert_url :insert_urls
|
347
347
|
alias :insert_doc :insert_docs
|
348
|
+
alias :num_objects :num_records
|
348
349
|
end
|
349
350
|
end
|