wgit 0.0.18 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/wgit.rb +0 -1
- data/lib/wgit/assertable.rb +20 -23
- data/lib/wgit/core_ext.rb +6 -14
- data/lib/wgit/crawler.rb +94 -183
- data/lib/wgit/database/database.rb +209 -185
- data/lib/wgit/database/model.rb +7 -7
- data/lib/wgit/document.rb +281 -241
- data/lib/wgit/indexer.rb +99 -92
- data/lib/wgit/logger.rb +5 -1
- data/lib/wgit/url.rb +171 -185
- data/lib/wgit/utils.rb +57 -68
- data/lib/wgit/version.rb +1 -1
- metadata +86 -60
- data/CHANGELOG.md +0 -61
- data/LICENSE.txt +0 -21
- data/README.md +0 -361
- data/TODO.txt +0 -34
- data/lib/wgit/database/connection_details.rb +0 -41
@@ -1,84 +1,100 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative '../document'
|
4
3
|
require_relative '../url'
|
5
|
-
require_relative '../
|
4
|
+
require_relative '../document'
|
5
|
+
require_relative '../logger'
|
6
6
|
require_relative '../assertable'
|
7
7
|
require_relative 'model'
|
8
8
|
require 'logger'
|
9
9
|
require 'mongo'
|
10
10
|
|
11
11
|
module Wgit
|
12
|
-
# Class modeling a DB connection and CRUD operations for the Url and
|
13
|
-
#
|
12
|
+
# Class modeling a DB connection and CRUD operations for the Url and Document
|
13
|
+
# collections.
|
14
14
|
class Database
|
15
15
|
include Assertable
|
16
16
|
|
17
|
-
#
|
17
|
+
# The connection string for the database.
|
18
|
+
attr_reader :connection_string
|
19
|
+
|
20
|
+
# The database client object. Gets set when a connection is established.
|
21
|
+
attr_reader :client
|
22
|
+
|
23
|
+
# Initializes a connected database client using the provided
|
24
|
+
# connection_string or ENV['WGIT_CONNECTION_STRING'].
|
18
25
|
#
|
19
|
-
# @raise [
|
20
|
-
|
21
|
-
|
26
|
+
# @raise [StandardError] If a connection string isn't provided, either as a
|
27
|
+
# parameter or via the environment.
|
28
|
+
def initialize(connection_string = nil)
|
29
|
+
connection_string ||= ENV['WGIT_CONNECTION_STRING']
|
30
|
+
raise "connection_string and ENV['WGIT_CONNECTION_STRING'] are nil" \
|
31
|
+
unless connection_string
|
32
|
+
|
33
|
+
@client = Database.establish_connection(connection_string)
|
34
|
+
@connection_string = connection_string
|
22
35
|
end
|
23
36
|
|
24
|
-
#
|
37
|
+
# A class alias for Database.new.
|
25
38
|
#
|
26
|
-
# @
|
27
|
-
def self.connect
|
28
|
-
|
29
|
-
|
30
|
-
:connection_string"
|
31
|
-
end
|
39
|
+
# @return [Wgit::Database] The connected database client.
|
40
|
+
def self.connect(connection_string = nil)
|
41
|
+
new(connection_string)
|
42
|
+
end
|
32
43
|
|
33
|
-
|
44
|
+
# Initializes a connected database client using the connection string.
|
45
|
+
#
|
46
|
+
# @raise [StandardError] If a connection cannot be established.
|
47
|
+
# @return [Mong::Client] The connected MongoDB client.
|
48
|
+
def self.establish_connection(connection_string)
|
49
|
+
# Only log for error (and more severe) scenarios.
|
34
50
|
Mongo::Logger.logger = Wgit.logger.clone
|
35
51
|
Mongo::Logger.logger.progname = 'mongo'
|
36
52
|
Mongo::Logger.logger.level = Logger::ERROR
|
37
53
|
|
38
54
|
# Connects to the database here.
|
39
|
-
Mongo::Client.new(
|
55
|
+
Mongo::Client.new(connection_string)
|
40
56
|
end
|
41
57
|
|
42
58
|
### Create Data ###
|
43
59
|
|
44
60
|
# Insert one or more Url or Document objects into the DB.
|
45
61
|
#
|
46
|
-
# @param data [
|
47
|
-
# Wgit::
|
48
|
-
#
|
62
|
+
# @param data [Wgit::Url, Wgit::Document, Enumerable<Wgit::Url,
|
63
|
+
# Wgit::Document>] Hash(es) returned from Wgit::Model.url or
|
64
|
+
# Wgit::Model.document.
|
65
|
+
# @raise [StandardError] If data isn't valid.
|
49
66
|
def insert(data)
|
50
|
-
|
67
|
+
type = data.is_a?(Enumerable) ? data.first : data
|
68
|
+
case type
|
69
|
+
when Wgit::Url
|
51
70
|
insert_urls(data)
|
52
|
-
|
71
|
+
when Wgit::Document
|
53
72
|
insert_docs(data)
|
54
|
-
elsif data.respond_to?(:first)
|
55
|
-
if data.first.is_a?(Url)
|
56
|
-
insert_urls(data)
|
57
|
-
else
|
58
|
-
insert_docs(data)
|
59
|
-
end
|
60
73
|
else
|
61
|
-
raise "
|
74
|
+
raise "Unsupported type - #{data.class}: #{data}"
|
62
75
|
end
|
63
76
|
end
|
64
77
|
|
65
78
|
### Retrieve Data ###
|
66
79
|
|
67
|
-
# Returns Url records from the DB.
|
68
|
-
#
|
69
|
-
#
|
80
|
+
# Returns Url records from the DB.
|
81
|
+
#
|
82
|
+
# All Urls are sorted by date_added ascending, in other words the first url
|
83
|
+
# returned is the first one that was inserted into the DB.
|
70
84
|
#
|
71
85
|
# @param crawled [Boolean] Filter by Url#crawled value. nil returns all.
|
72
86
|
# @param limit [Integer] The max number of Url's to return. 0 returns all.
|
73
87
|
# @param skip [Integer] Skip n amount of Url's.
|
74
|
-
# @yield [url] Given each Url returned from the DB.
|
88
|
+
# @yield [url] Given each Url object (Wgit::Url) returned from the DB.
|
75
89
|
# @return [Array<Wgit::Url>] The Urls obtained from the DB.
|
76
|
-
def urls(crawled
|
90
|
+
def urls(crawled: nil, limit: 0, skip: 0)
|
77
91
|
query = crawled.nil? ? {} : { crawled: crawled }
|
78
|
-
|
79
92
|
sort = { date_added: 1 }
|
80
|
-
|
81
|
-
|
93
|
+
|
94
|
+
results = retrieve(:urls, query,
|
95
|
+
sort: sort, projection: {},
|
96
|
+
limit: limit, skip: skip)
|
97
|
+
return [] if results.count < 1 # results#empty? doesn't exist.
|
82
98
|
|
83
99
|
# results.respond_to? :map! is false so we use map and overwrite the var.
|
84
100
|
results = results.map { |url_doc| Wgit::Url.new(url_doc) }
|
@@ -91,54 +107,59 @@ module Wgit
|
|
91
107
|
#
|
92
108
|
# @param limit [Integer] The max number of Url's to return. 0 returns all.
|
93
109
|
# @param skip [Integer] Skip n amount of Url's.
|
94
|
-
# @yield [url] Given each Url returned from the DB.
|
110
|
+
# @yield [url] Given each Url object (Wgit::Url) returned from the DB.
|
95
111
|
# @return [Array<Wgit::Url>] The crawled Urls obtained from the DB.
|
96
|
-
def crawled_urls(limit
|
97
|
-
urls(true, limit, skip, &block)
|
112
|
+
def crawled_urls(limit: 0, skip: 0, &block)
|
113
|
+
urls(crawled: true, limit: limit, skip: skip, &block)
|
98
114
|
end
|
99
115
|
|
100
|
-
# Returned Url records that haven't been crawled.
|
101
|
-
# block, if given.
|
116
|
+
# Returned Url records that haven't yet been crawled.
|
102
117
|
#
|
103
118
|
# @param limit [Integer] The max number of Url's to return. 0 returns all.
|
104
119
|
# @param skip [Integer] Skip n amount of Url's.
|
105
|
-
# @yield [url] Given each Url returned from the DB.
|
120
|
+
# @yield [url] Given each Url object (Wgit::Url) returned from the DB.
|
106
121
|
# @return [Array<Wgit::Url>] The uncrawled Urls obtained from the DB.
|
107
|
-
def uncrawled_urls(limit
|
108
|
-
urls(false, limit, skip, &block)
|
122
|
+
def uncrawled_urls(limit: 0, skip: 0, &block)
|
123
|
+
urls(crawled: false, limit: limit, skip: skip, &block)
|
109
124
|
end
|
110
125
|
|
111
|
-
# Searches
|
126
|
+
# Searches the database's Documents for the given query.
|
112
127
|
#
|
113
|
-
#
|
114
|
-
#
|
115
|
-
# The searched fields are decided by the text index setup against the
|
128
|
+
# The searched fields are decided by the text index setup on the
|
116
129
|
# documents collection. Currently we search against the following fields:
|
117
|
-
# "author", "keywords", "title" and "text".
|
130
|
+
# "author", "keywords", "title" and "text" by default.
|
118
131
|
#
|
119
|
-
# The MongoDB search ranks/sorts the results in order (highest
|
120
|
-
#
|
121
|
-
#
|
122
|
-
# elsewhere if needed.
|
132
|
+
# The MongoDB search algorithm ranks/sorts the results in order (highest
|
133
|
+
# first) based on each document's "textScore" (which records the number of
|
134
|
+
# query hits). The "textScore" is then stored in each Document result
|
135
|
+
# object for use elsewhere if needed; accessed via Wgit::Document#score.
|
123
136
|
#
|
124
137
|
# @param query [String] The text query to search with.
|
138
|
+
# @param case_sensitive [Boolean] Whether character case must match.
|
125
139
|
# @param whole_sentence [Boolean] Whether multiple words should be searched
|
126
140
|
# for separately.
|
127
141
|
# @param limit [Integer] The max number of results to return.
|
128
|
-
# @param skip [Integer] The number of
|
129
|
-
# @yield [doc] Given each search result (Wgit::Document)
|
142
|
+
# @param skip [Integer] The number of results to skip.
|
143
|
+
# @yield [doc] Given each search result (Wgit::Document) returned from the
|
144
|
+
# DB.
|
130
145
|
# @return [Array<Wgit::Document>] The search results obtained from the DB.
|
131
|
-
def search(
|
146
|
+
def search(
|
147
|
+
query, case_sensitive: false, whole_sentence: false, limit: 10, skip: 0
|
148
|
+
)
|
132
149
|
query.strip!
|
133
150
|
query.replace('"' + query + '"') if whole_sentence
|
134
151
|
|
135
|
-
#
|
152
|
+
# Sort based on the most search hits (aka "textScore").
|
136
153
|
# We use the sort_proj hash as both a sort and a projection below.
|
137
|
-
# :$caseSensitive => case_sensitive, 3.2+ only.
|
138
154
|
sort_proj = { score: { :$meta => 'textScore' } }
|
139
|
-
query = { :$text => {
|
140
|
-
|
141
|
-
|
155
|
+
query = { :$text => {
|
156
|
+
:$search => query,
|
157
|
+
:$caseSensitive => case_sensitive
|
158
|
+
} }
|
159
|
+
|
160
|
+
results = retrieve(:documents, query,
|
161
|
+
sort: sort_proj, projection: sort_proj,
|
162
|
+
limit: limit, skip: skip)
|
142
163
|
return [] if results.count < 1 # respond_to? :empty? == false
|
143
164
|
|
144
165
|
# results.respond_to? :map! is false so we use map and overwrite the var.
|
@@ -152,7 +173,7 @@ module Wgit
|
|
152
173
|
#
|
153
174
|
# @return [BSON::Document#[]#fetch] Similar to a Hash instance.
|
154
175
|
def stats
|
155
|
-
|
176
|
+
@client.command(dbStats: 0).documents[0]
|
156
177
|
end
|
157
178
|
|
158
179
|
# Returns the current size of the database.
|
@@ -166,14 +187,14 @@ module Wgit
|
|
166
187
|
#
|
167
188
|
# @return [Integer] The current number of URL records.
|
168
189
|
def num_urls
|
169
|
-
|
190
|
+
@client[:urls].count
|
170
191
|
end
|
171
192
|
|
172
193
|
# Returns the total number of Document records in the DB.
|
173
194
|
#
|
174
195
|
# @return [Integer] The current number of Document records.
|
175
196
|
def num_docs
|
176
|
-
|
197
|
+
@client[:documents].count
|
177
198
|
end
|
178
199
|
|
179
200
|
# Returns the total number of records (urls + docs) in the DB.
|
@@ -183,209 +204,212 @@ module Wgit
|
|
183
204
|
num_urls + num_docs
|
184
205
|
end
|
185
206
|
|
186
|
-
# Returns whether or not a record with the given url (which is
|
187
|
-
# exists in the database's 'urls' collection.
|
207
|
+
# Returns whether or not a record with the given 'url' field (which is
|
208
|
+
# unique) exists in the database's 'urls' collection.
|
188
209
|
#
|
189
210
|
# @param url [Wgit::Url] The Url to search the DB for.
|
190
211
|
# @return [Boolean] True if url exists, otherwise false.
|
191
212
|
def url?(url)
|
192
213
|
h = { 'url' => url }
|
193
|
-
|
214
|
+
@client[:urls].find(h).any?
|
194
215
|
end
|
195
216
|
|
196
|
-
# Returns whether or not a record with the given doc
|
197
|
-
# exists in the database's 'documents' collection.
|
217
|
+
# Returns whether or not a record with the given doc 'url' field (which is
|
218
|
+
# unique) exists in the database's 'documents' collection.
|
198
219
|
#
|
199
220
|
# @param doc [Wgit::Document] The Document to search the DB for.
|
200
221
|
# @return [Boolean] True if doc exists, otherwise false.
|
201
222
|
def doc?(doc)
|
202
223
|
url = doc.respond_to?(:url) ? doc.url : doc
|
203
224
|
h = { 'url' => url }
|
204
|
-
|
225
|
+
@client[:documents].find(h).any?
|
205
226
|
end
|
206
227
|
|
207
228
|
### Update Data ###
|
208
229
|
|
209
230
|
# Update a Url or Document object in the DB.
|
210
231
|
#
|
211
|
-
# @param data [
|
212
|
-
#
|
213
|
-
# @raise [RuntimeError] If the data is not valid.
|
232
|
+
# @param data [Wgit::Url, Wgit::Document] The data to update.
|
233
|
+
# @raise [StandardError] If the data is not valid.
|
214
234
|
def update(data)
|
215
|
-
|
235
|
+
case data
|
236
|
+
when Wgit::Url
|
216
237
|
update_url(data)
|
217
|
-
|
238
|
+
when Wgit::Document
|
218
239
|
update_doc(data)
|
219
240
|
else
|
220
|
-
raise "
|
241
|
+
raise "Unsupported type - #{data.class}: #{data}"
|
221
242
|
end
|
222
243
|
end
|
223
244
|
|
224
245
|
protected
|
225
246
|
|
226
|
-
#
|
247
|
+
# Insert one or more Url objects into the DB.
|
227
248
|
#
|
228
|
-
# @param
|
229
|
-
# @
|
230
|
-
# @
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
# Single create result.
|
236
|
-
when 'Mongo::Operation::Insert::Result'
|
237
|
-
result.documents.first[:err].nil?
|
238
|
-
# Multiple create result.
|
239
|
-
when 'Mongo::BulkWrite::Result'
|
240
|
-
result.inserted_count == count
|
241
|
-
# Single and multiple update result.
|
242
|
-
when 'Mongo::Operation::Update::Result'
|
243
|
-
if multi
|
244
|
-
result.n == count
|
245
|
-
else
|
246
|
-
result.documents.first[:err].nil?
|
247
|
-
end
|
248
|
-
# Class no longer used, have you upgraded the 'mongo' gem?
|
249
|
+
# @param data [Wgit::Url, Array<Wgit::Url>] One or more Urls to insert.
|
250
|
+
# @raise [StandardError] If data type isn't supported.
|
251
|
+
# @return [Integer] The number of inserted Urls.
|
252
|
+
def insert_urls(data)
|
253
|
+
if data.respond_to?(:map)
|
254
|
+
assert_arr_type(data, Wgit::Url)
|
255
|
+
data.map! { |url| Wgit::Model.url(url) }
|
249
256
|
else
|
250
|
-
|
257
|
+
assert_type(data, Wgit::Url)
|
258
|
+
data = Wgit::Model.url(data)
|
251
259
|
end
|
260
|
+
|
261
|
+
create(:urls, data)
|
252
262
|
end
|
253
263
|
|
254
|
-
# Insert one or more
|
264
|
+
# Insert one or more Document objects into the DB.
|
255
265
|
#
|
256
|
-
# @param
|
257
|
-
# insert.
|
258
|
-
# @raise [
|
259
|
-
# @return [Integer] The number of inserted
|
260
|
-
def
|
261
|
-
if
|
262
|
-
assert_arr_types(
|
263
|
-
|
264
|
-
Wgit::Model.url(url)
|
265
|
-
end
|
266
|
+
# @param data [Wgit::Document, Array<Wgit::Document>] One or more Documents
|
267
|
+
# to insert.
|
268
|
+
# @raise [StandardError] If data type isn't supported.
|
269
|
+
# @return [Integer] The number of inserted Documents.
|
270
|
+
def insert_docs(data)
|
271
|
+
if data.respond_to?(:map)
|
272
|
+
assert_arr_types(data, Wgit::Document)
|
273
|
+
data.map! { |doc| Wgit::Model.document(doc) }
|
266
274
|
else
|
267
|
-
|
268
|
-
|
275
|
+
assert_types(data, Wgit::Document)
|
276
|
+
data = Wgit::Model.document(data)
|
269
277
|
end
|
270
|
-
|
278
|
+
|
279
|
+
create(:documents, data)
|
271
280
|
end
|
272
281
|
|
273
|
-
#
|
282
|
+
# Update a Url record in the DB.
|
274
283
|
#
|
275
|
-
# @param
|
276
|
-
#
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
284
|
+
# @param url [Wgit::Url] The Url to update.
|
285
|
+
# @return [Integer] The number of updated records.
|
286
|
+
def update_url(url)
|
287
|
+
assert_type(url, Wgit::Url)
|
288
|
+
selection = { url: url }
|
289
|
+
url_hash = Wgit::Model.url(url).merge(Wgit::Model.common_update_data)
|
290
|
+
update = { '$set' => url_hash }
|
291
|
+
mutate(true, :urls, selection, update)
|
292
|
+
end
|
293
|
+
|
294
|
+
# Update a Document record in the DB.
|
295
|
+
#
|
296
|
+
# @param doc [Wgit::Document] The Document to update.
|
297
|
+
# @return [Integer] The number of updated records.
|
298
|
+
def update_doc(doc)
|
299
|
+
assert_type(doc, Wgit::Document)
|
300
|
+
selection = { url: doc.url }
|
301
|
+
doc_hash = Wgit::Model.document(doc).merge(Wgit::Model.common_update_data)
|
302
|
+
update = { '$set' => doc_hash }
|
303
|
+
mutate(true, :documents, selection, update)
|
304
|
+
end
|
305
|
+
|
306
|
+
private
|
307
|
+
|
308
|
+
# Return if the write to the DB succeeded or not.
|
309
|
+
#
|
310
|
+
# @param result [Mongo::Object] The operation result.
|
311
|
+
# @param records [Integer] The number of records written to.
|
312
|
+
# @param multi [Boolean] Whether several records are being written to.
|
313
|
+
# @raise [StandardError] If the result type isn't supported.
|
314
|
+
# @return [Boolean] True if the write was successful, false otherwise.
|
315
|
+
def write_succeeded?(result, records: 1, multi: false)
|
316
|
+
case result
|
317
|
+
# Single create result.
|
318
|
+
when Mongo::Operation::Insert::Result
|
319
|
+
result.documents.first[:err].nil?
|
320
|
+
# Multiple create result.
|
321
|
+
when Mongo::BulkWrite::Result
|
322
|
+
result.inserted_count == records
|
323
|
+
# Single and multiple update result.
|
324
|
+
when Mongo::Operation::Update::Result
|
325
|
+
multi ? result.n == records : result.documents.first[:err].nil?
|
326
|
+
# Class no longer used, have you upgraded the 'mongo' gem?
|
285
327
|
else
|
286
|
-
|
287
|
-
unless doc_or_docs.is_a?(Hash)
|
288
|
-
doc_or_docs = Wgit::Model.document(doc_or_docs)
|
289
|
-
end
|
328
|
+
raise "Result class not currently supported: #{result.class}"
|
290
329
|
end
|
291
|
-
create(:documents, doc_or_docs)
|
292
330
|
end
|
293
331
|
|
294
332
|
# Create/insert one or more Url or Document records into the DB.
|
295
333
|
#
|
296
334
|
# @param collection [Symbol] Either :urls or :documents.
|
297
|
-
# @param data [Hash, Array<
|
298
|
-
# @raise [
|
299
|
-
#
|
300
|
-
# @return [Integer] The number of inserted Objects.
|
335
|
+
# @param data [Hash, Array<Hash>] The data to insert.
|
336
|
+
# @raise [StandardError] If data type is unsupported or the write fails.
|
337
|
+
# @return [Integer] The number of inserted records.
|
301
338
|
def create(collection, data)
|
302
|
-
|
339
|
+
assert_types(data, [Hash, Array])
|
340
|
+
|
303
341
|
# Single doc.
|
304
|
-
|
342
|
+
case data
|
343
|
+
when Hash
|
305
344
|
data.merge!(Wgit::Model.common_insert_data)
|
306
|
-
result =
|
345
|
+
result = @client[collection.to_sym].insert_one(data)
|
307
346
|
raise 'DB write (insert) failed' unless write_succeeded?(result)
|
308
347
|
|
309
348
|
result.n
|
310
349
|
# Multiple docs.
|
311
|
-
|
350
|
+
when Array
|
312
351
|
assert_arr_types(data, Hash)
|
313
|
-
data.map!
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
352
|
+
data.map! { |hash| hash.merge(Wgit::Model.common_insert_data) }
|
353
|
+
result = @client[collection.to_sym].insert_many(data)
|
354
|
+
raise 'DB write(s) (insert) failed' unless write_succeeded?(
|
355
|
+
result, records: data.length
|
356
|
+
)
|
318
357
|
|
319
358
|
result.inserted_count
|
320
359
|
else
|
321
|
-
raise
|
360
|
+
raise 'data must be a Hash or an Array of Hashes'
|
322
361
|
end
|
323
362
|
end
|
324
363
|
|
325
364
|
# Retrieve Url or Document records from the DB.
|
326
365
|
#
|
327
366
|
# @param collection [Symbol] Either :urls or :documents.
|
328
|
-
# @param query [Hash] The query used
|
367
|
+
# @param query [Hash] The query used for the retrieval.
|
329
368
|
# @param sort [Hash] The sort to use.
|
330
369
|
# @param projection [Hash] The projection to use.
|
331
370
|
# @param limit [Integer] The limit to use.
|
332
371
|
# @param skip [Integer] The skip to use.
|
333
|
-
# @
|
372
|
+
# @raise [StandardError] If query type isn't valid.
|
373
|
+
# @return [Mongo::Object] The Mongo client operation result.
|
334
374
|
def retrieve(collection, query,
|
335
|
-
sort
|
336
|
-
limit
|
375
|
+
sort: {}, projection: {},
|
376
|
+
limit: 0, skip: 0)
|
337
377
|
assert_type(query, Hash)
|
338
|
-
|
339
|
-
|
378
|
+
@client[collection.to_sym].find(query).projection(projection)
|
379
|
+
.skip(skip).limit(limit).sort(sort)
|
340
380
|
end
|
341
381
|
|
342
|
-
#
|
382
|
+
# Mutate/update one or more Url or Document records in the DB.
|
343
383
|
#
|
344
|
-
#
|
345
|
-
#
|
346
|
-
def update_url(url)
|
347
|
-
assert_type(url, Url)
|
348
|
-
selection = { url: url }
|
349
|
-
url_hash = Wgit::Model.url(url).merge(Wgit::Model.common_update_data)
|
350
|
-
update = { '$set' => url_hash }
|
351
|
-
_update(true, :urls, selection, update)
|
352
|
-
end
|
353
|
-
|
354
|
-
# Update a Document object in the DB.
|
384
|
+
# This method expects Model.common_update_data to have been merged in
|
385
|
+
# already by the calling method.
|
355
386
|
#
|
356
|
-
# @param
|
357
|
-
# @
|
358
|
-
def
|
359
|
-
|
360
|
-
selection = { url: doc.url }
|
361
|
-
doc_hash = Wgit::Model.document(doc).merge(Wgit::Model.common_update_data)
|
362
|
-
update = { '$set' => doc_hash }
|
363
|
-
_update(true, :documents, selection, update)
|
364
|
-
end
|
387
|
+
# @param single [Boolean] Wether or not a single record is being updated.
|
388
|
+
# @param collection [Symbol] Either :urls or :documents.
|
389
|
+
def mutate(single, collection, selection, update)
|
390
|
+
assert_arr_types([selection, update], Hash)
|
365
391
|
|
366
|
-
|
392
|
+
collection = collection.to_sym
|
393
|
+
unless %i[urls documents].include?(collection)
|
394
|
+
raise "Invalid collection: #{collection}"
|
395
|
+
end
|
367
396
|
|
368
|
-
# Update one or more Url or Document records in the DB.
|
369
|
-
# NOTE: The Model.common_update_data should be merged in the calling
|
370
|
-
# method as the update param can be bespoke, due to its nature.
|
371
|
-
def _update(single, collection, selection, update)
|
372
|
-
assert_arr_types([selection, update], Hash)
|
373
397
|
result = if single
|
374
|
-
|
398
|
+
@client[collection].update_one(selection, update)
|
375
399
|
else
|
376
|
-
|
400
|
+
@client[collection].update_many(selection, update)
|
377
401
|
end
|
378
402
|
raise 'DB write (update) failed' unless write_succeeded?(result)
|
379
403
|
|
380
404
|
result.n
|
381
405
|
end
|
382
406
|
|
383
|
-
alias count
|
384
|
-
alias length
|
407
|
+
alias count size
|
408
|
+
alias length size
|
385
409
|
alias num_documents num_docs
|
386
|
-
alias document?
|
387
|
-
alias insert_url
|
388
|
-
alias insert_doc
|
389
|
-
alias num_objects
|
410
|
+
alias document? doc?
|
411
|
+
alias insert_url insert_urls
|
412
|
+
alias insert_doc insert_docs
|
413
|
+
alias num_objects num_records
|
390
414
|
end
|
391
415
|
end
|