wgit 0.10.2 → 0.10.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/lib/wgit/database/database.rb +82 -18
- data/lib/wgit/document.rb +1 -1
- data/lib/wgit/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a359a5011cce717a84c0fa353cba658a4abcd11d2e8575da701b39eef35f641c
|
4
|
+
data.tar.gz: 730b2eee3c88d9cd99c1d9754744aa4bae71bc10019113e840f2914b3a7909e5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4807d488cf03aa3dcf624249bd5169871bc17dd7e2de273cb841801147c040843912edc225a1f7346e427c022ecb2ce5360581323509221560ee99b31ea6a72b
|
7
|
+
data.tar.gz: ea3f1237116d05bbb24b2e85fdba1b6447821377a07ff5c5e6afea859afa5fb36d4e2c3f708c6f5dc201e0802aa61f0eaa56e63a62882330b5225dbbaa08721d
|
data/CHANGELOG.md
CHANGED
@@ -9,6 +9,33 @@
|
|
9
9
|
- ...
|
10
10
|
---
|
11
11
|
|
12
|
+
## v0.10.5
|
13
|
+
### Added
|
14
|
+
- `Database#last_result` getter method to return the most recent raw mongo result.
|
15
|
+
### Changed/Removed
|
16
|
+
- ...
|
17
|
+
### Fixed
|
18
|
+
- ...
|
19
|
+
---
|
20
|
+
|
21
|
+
## v0.10.4
|
22
|
+
### Added
|
23
|
+
- `Database#search_text` method which returns a Hash of `url => text_results` instead of `Wgit::Documents` (like `#search`).
|
24
|
+
### Changed/Removed
|
25
|
+
- ...
|
26
|
+
### Fixed
|
27
|
+
- ...
|
28
|
+
---
|
29
|
+
|
30
|
+
## v0.10.3
|
31
|
+
### Added
|
32
|
+
- ...
|
33
|
+
### Changed/Removed
|
34
|
+
- Changed `Database#create_collections` and `#create_unique_indexes` by removing `rescue nil` from their database operations. Now any underlying errors with the database client are not masked.
|
35
|
+
### Fixed
|
36
|
+
- ...
|
37
|
+
---
|
38
|
+
|
12
39
|
## v0.10.2
|
13
40
|
### Added
|
14
41
|
- `Wgit::Base#setup` and `#teardown` methods (lifecycle hooks) that can be overridden by subclasses.
|
@@ -45,6 +45,9 @@ module Wgit
|
|
45
45
|
# A custom setter method is also provided for changing the search logic.
|
46
46
|
attr_reader :text_index
|
47
47
|
|
48
|
+
# The raw MongoDB result of the most recent operation.
|
49
|
+
attr_reader :last_result
|
50
|
+
|
48
51
|
# Initializes a connected database client using the provided
|
49
52
|
# connection_string or ENV['WGIT_CONNECTION_STRING'].
|
50
53
|
#
|
@@ -91,29 +94,27 @@ module Wgit
|
|
91
94
|
|
92
95
|
### DDL ###
|
93
96
|
|
94
|
-
# Creates the urls and documents collections
|
95
|
-
# This method is therefore idempotent.
|
97
|
+
# Creates the 'urls' and 'documents' collections.
|
96
98
|
#
|
97
99
|
# @return [nil] Always returns nil.
|
98
100
|
def create_collections
|
99
|
-
|
100
|
-
|
101
|
+
@client[URLS_COLLECTION].create
|
102
|
+
@client[DOCUMENTS_COLLECTION].create
|
101
103
|
|
102
104
|
nil
|
103
105
|
end
|
104
106
|
|
105
|
-
# Creates the urls and documents unique 'url' indexes
|
106
|
-
# exist. This method is therefore idempotent.
|
107
|
+
# Creates the urls and documents unique 'url' indexes.
|
107
108
|
#
|
108
109
|
# @return [nil] Always returns nil.
|
109
110
|
def create_unique_indexes
|
110
111
|
@client[URLS_COLLECTION].indexes.create_one(
|
111
112
|
{ url: 1 }, name: UNIQUE_INDEX, unique: true
|
112
|
-
)
|
113
|
+
)
|
113
114
|
|
114
115
|
@client[DOCUMENTS_COLLECTION].indexes.create_one(
|
115
116
|
{ 'url.url' => 1 }, name: UNIQUE_INDEX, unique: true
|
116
|
-
)
|
117
|
+
)
|
117
118
|
|
118
119
|
nil
|
119
120
|
end
|
@@ -187,6 +188,8 @@ module Wgit
|
|
187
188
|
result = @client[collection].replace_one(query, data_hash, upsert: true)
|
188
189
|
|
189
190
|
result.matched_count.zero?
|
191
|
+
ensure
|
192
|
+
@last_result = result
|
190
193
|
end
|
191
194
|
|
192
195
|
### Retrieve Data ###
|
@@ -296,16 +299,12 @@ module Wgit
|
|
296
299
|
results = retrieve(DOCUMENTS_COLLECTION, query,
|
297
300
|
sort: sort_proj, projection: sort_proj,
|
298
301
|
limit: limit, skip: skip)
|
299
|
-
return [] if results.count < 1 # respond_to? :empty? == false
|
300
302
|
|
301
|
-
|
302
|
-
results = results.map do |mongo_doc|
|
303
|
+
results.map do |mongo_doc|
|
303
304
|
doc = Wgit::Document.new(mongo_doc)
|
304
305
|
yield(doc) if block_given?
|
305
306
|
doc
|
306
307
|
end
|
307
|
-
|
308
|
-
results
|
309
308
|
end
|
310
309
|
|
311
310
|
# Searches the database's Documents for the given query and then searches
|
@@ -350,6 +349,58 @@ module Wgit
|
|
350
349
|
results
|
351
350
|
end
|
352
351
|
|
352
|
+
# Searches the database's Documents for the given query and then searches
|
353
|
+
# each result in turn using `doc.search`. Instead of an Array of Documents,
|
354
|
+
# this method returns a Hash of the docs url => search_results creating a
|
355
|
+
# search engine like result set for quick access to text matches.
|
356
|
+
#
|
357
|
+
# @param query [String] The text query to search with.
|
358
|
+
# @param case_sensitive [Boolean] Whether character case must match.
|
359
|
+
# @param whole_sentence [Boolean] Whether multiple words should be searched
|
360
|
+
# for separately.
|
361
|
+
# @param limit [Integer] The max number of results to return.
|
362
|
+
# @param skip [Integer] The number of results to skip.
|
363
|
+
# @param sentence_limit [Integer] The max length of each search result
|
364
|
+
# sentence.
|
365
|
+
# @param top_result_only [Boolean] Whether to return all of the documents
|
366
|
+
# search results or just the top (most relavent) result.
|
367
|
+
# @yield [doc] Given each search result (Wgit::Document) returned from the
|
368
|
+
# DB.
|
369
|
+
# @return [Hash<String, String | Array<String>>] The search results obtained
|
370
|
+
# from the DB having mapped the docs url => search_results. The format of
|
371
|
+
# search_results depends on the value of `top_result_only`.
|
372
|
+
def search_text(
|
373
|
+
query, case_sensitive: false, whole_sentence: true,
|
374
|
+
limit: 10, skip: 0, sentence_limit: 80, top_result_only: false
|
375
|
+
)
|
376
|
+
results = search(
|
377
|
+
query,
|
378
|
+
case_sensitive: case_sensitive,
|
379
|
+
whole_sentence: whole_sentence,
|
380
|
+
limit: limit,
|
381
|
+
skip: skip
|
382
|
+
)
|
383
|
+
|
384
|
+
results
|
385
|
+
.map do |doc|
|
386
|
+
yield(doc) if block_given?
|
387
|
+
|
388
|
+
results = doc.search(
|
389
|
+
query,
|
390
|
+
case_sensitive: case_sensitive,
|
391
|
+
whole_sentence: whole_sentence,
|
392
|
+
sentence_limit: sentence_limit
|
393
|
+
)
|
394
|
+
|
395
|
+
# Only return result if its text has a match - compact is called below.
|
396
|
+
next nil if results.empty?
|
397
|
+
|
398
|
+
[doc.url, (top_result_only ? results.first : results)]
|
399
|
+
end
|
400
|
+
.compact
|
401
|
+
.to_h
|
402
|
+
end
|
403
|
+
|
353
404
|
# Returns statistics about the database.
|
354
405
|
#
|
355
406
|
# @return [BSON::Document#[]#fetch] Similar to a Hash instance.
|
@@ -456,21 +507,30 @@ module Wgit
|
|
456
507
|
# 0 or 1 because urls are unique.
|
457
508
|
def delete(obj)
|
458
509
|
collection, query = get_type_info(obj)
|
459
|
-
@client[collection].delete_one(query)
|
510
|
+
result = @client[collection].delete_one(query)
|
511
|
+
result.n
|
512
|
+
ensure
|
513
|
+
@last_result = result
|
460
514
|
end
|
461
515
|
|
462
516
|
# Deletes everything in the urls collection.
|
463
517
|
#
|
464
518
|
# @return [Integer] The number of deleted records.
|
465
519
|
def clear_urls
|
466
|
-
@client[URLS_COLLECTION].delete_many({})
|
520
|
+
result = @client[URLS_COLLECTION].delete_many({})
|
521
|
+
result.n
|
522
|
+
ensure
|
523
|
+
@last_result = result
|
467
524
|
end
|
468
525
|
|
469
526
|
# Deletes everything in the documents collection.
|
470
527
|
#
|
471
528
|
# @return [Integer] The number of deleted records.
|
472
529
|
def clear_docs
|
473
|
-
@client[DOCUMENTS_COLLECTION].delete_many({})
|
530
|
+
result = @client[DOCUMENTS_COLLECTION].delete_many({})
|
531
|
+
result.n
|
532
|
+
ensure
|
533
|
+
@last_result = result
|
474
534
|
end
|
475
535
|
|
476
536
|
# Deletes everything in the urls and documents collections. This will nuke
|
@@ -538,6 +598,8 @@ module Wgit
|
|
538
598
|
else
|
539
599
|
raise 'data must be a Hash or an Array of Hashes'
|
540
600
|
end
|
601
|
+
ensure
|
602
|
+
@last_result = result
|
541
603
|
end
|
542
604
|
|
543
605
|
# Return if the write to the DB succeeded or not.
|
@@ -574,8 +636,8 @@ module Wgit
|
|
574
636
|
sort: {}, projection: {},
|
575
637
|
limit: 0, skip: 0)
|
576
638
|
assert_type(query, Hash)
|
577
|
-
@client[collection.to_sym].find(query).projection(projection)
|
578
|
-
|
639
|
+
@last_result = @client[collection.to_sym].find(query).projection(projection)
|
640
|
+
.skip(skip).limit(limit).sort(sort)
|
579
641
|
end
|
580
642
|
|
581
643
|
# Mutate/update one or more Url or Document records in the DB.
|
@@ -595,6 +657,8 @@ module Wgit
|
|
595
657
|
raise 'DB write(s) (update) failed' unless write_succeeded?(result)
|
596
658
|
|
597
659
|
result.n
|
660
|
+
ensure
|
661
|
+
@last_result = result
|
598
662
|
end
|
599
663
|
|
600
664
|
alias num_objects num_records
|
data/lib/wgit/document.rb
CHANGED
@@ -453,7 +453,7 @@ be relative"
|
|
453
453
|
|
454
454
|
if query.is_a?(Regexp)
|
455
455
|
regex = query
|
456
|
-
else # respond_to?
|
456
|
+
else # query.respond_to? :to_s == true
|
457
457
|
query = query.to_s
|
458
458
|
query = query.gsub(' ', '|') unless whole_sentence
|
459
459
|
regex = Regexp.new(query, !case_sensitive)
|
data/lib/wgit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wgit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.10.
|
4
|
+
version: 0.10.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-07-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|