wgit 0.10.2 → 0.10.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/lib/wgit/database/database.rb +82 -18
- data/lib/wgit/document.rb +1 -1
- data/lib/wgit/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a359a5011cce717a84c0fa353cba658a4abcd11d2e8575da701b39eef35f641c
|
4
|
+
data.tar.gz: 730b2eee3c88d9cd99c1d9754744aa4bae71bc10019113e840f2914b3a7909e5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4807d488cf03aa3dcf624249bd5169871bc17dd7e2de273cb841801147c040843912edc225a1f7346e427c022ecb2ce5360581323509221560ee99b31ea6a72b
|
7
|
+
data.tar.gz: ea3f1237116d05bbb24b2e85fdba1b6447821377a07ff5c5e6afea859afa5fb36d4e2c3f708c6f5dc201e0802aa61f0eaa56e63a62882330b5225dbbaa08721d
|
data/CHANGELOG.md
CHANGED
@@ -9,6 +9,33 @@
|
|
9
9
|
- ...
|
10
10
|
---
|
11
11
|
|
12
|
+
## v0.10.5
|
13
|
+
### Added
|
14
|
+
- `Database#last_result` getter method to return the most recent raw mongo result.
|
15
|
+
### Changed/Removed
|
16
|
+
- ...
|
17
|
+
### Fixed
|
18
|
+
- ...
|
19
|
+
---
|
20
|
+
|
21
|
+
## v0.10.4
|
22
|
+
### Added
|
23
|
+
- `Database#search_text` method which returns a Hash of `url => text_results` instead of `Wgit::Documents` (like `#search`).
|
24
|
+
### Changed/Removed
|
25
|
+
- ...
|
26
|
+
### Fixed
|
27
|
+
- ...
|
28
|
+
---
|
29
|
+
|
30
|
+
## v0.10.3
|
31
|
+
### Added
|
32
|
+
- ...
|
33
|
+
### Changed/Removed
|
34
|
+
- Changed `Database#create_collections` and `#create_unique_indexes` by removing `rescue nil` from their database operations. Now any underlying errors with the database client are not masked.
|
35
|
+
### Fixed
|
36
|
+
- ...
|
37
|
+
---
|
38
|
+
|
12
39
|
## v0.10.2
|
13
40
|
### Added
|
14
41
|
- `Wgit::Base#setup` and `#teardown` methods (lifecycle hooks) that can be overridden by subclasses.
|
@@ -45,6 +45,9 @@ module Wgit
|
|
45
45
|
# A custom setter method is also provided for changing the search logic.
|
46
46
|
attr_reader :text_index
|
47
47
|
|
48
|
+
# The raw MongoDB result of the most recent operation.
|
49
|
+
attr_reader :last_result
|
50
|
+
|
48
51
|
# Initializes a connected database client using the provided
|
49
52
|
# connection_string or ENV['WGIT_CONNECTION_STRING'].
|
50
53
|
#
|
@@ -91,29 +94,27 @@ module Wgit
|
|
91
94
|
|
92
95
|
### DDL ###
|
93
96
|
|
94
|
-
# Creates the urls and documents collections
|
95
|
-
# This method is therefore idempotent.
|
97
|
+
# Creates the 'urls' and 'documents' collections.
|
96
98
|
#
|
97
99
|
# @return [nil] Always returns nil.
|
98
100
|
def create_collections
|
99
|
-
|
100
|
-
|
101
|
+
@client[URLS_COLLECTION].create
|
102
|
+
@client[DOCUMENTS_COLLECTION].create
|
101
103
|
|
102
104
|
nil
|
103
105
|
end
|
104
106
|
|
105
|
-
# Creates the urls and documents unique 'url' indexes
|
106
|
-
# exist. This method is therefore idempotent.
|
107
|
+
# Creates the urls and documents unique 'url' indexes.
|
107
108
|
#
|
108
109
|
# @return [nil] Always returns nil.
|
109
110
|
def create_unique_indexes
|
110
111
|
@client[URLS_COLLECTION].indexes.create_one(
|
111
112
|
{ url: 1 }, name: UNIQUE_INDEX, unique: true
|
112
|
-
)
|
113
|
+
)
|
113
114
|
|
114
115
|
@client[DOCUMENTS_COLLECTION].indexes.create_one(
|
115
116
|
{ 'url.url' => 1 }, name: UNIQUE_INDEX, unique: true
|
116
|
-
)
|
117
|
+
)
|
117
118
|
|
118
119
|
nil
|
119
120
|
end
|
@@ -187,6 +188,8 @@ module Wgit
|
|
187
188
|
result = @client[collection].replace_one(query, data_hash, upsert: true)
|
188
189
|
|
189
190
|
result.matched_count.zero?
|
191
|
+
ensure
|
192
|
+
@last_result = result
|
190
193
|
end
|
191
194
|
|
192
195
|
### Retrieve Data ###
|
@@ -296,16 +299,12 @@ module Wgit
|
|
296
299
|
results = retrieve(DOCUMENTS_COLLECTION, query,
|
297
300
|
sort: sort_proj, projection: sort_proj,
|
298
301
|
limit: limit, skip: skip)
|
299
|
-
return [] if results.count < 1 # respond_to? :empty? == false
|
300
302
|
|
301
|
-
|
302
|
-
results = results.map do |mongo_doc|
|
303
|
+
results.map do |mongo_doc|
|
303
304
|
doc = Wgit::Document.new(mongo_doc)
|
304
305
|
yield(doc) if block_given?
|
305
306
|
doc
|
306
307
|
end
|
307
|
-
|
308
|
-
results
|
309
308
|
end
|
310
309
|
|
311
310
|
# Searches the database's Documents for the given query and then searches
|
@@ -350,6 +349,58 @@ module Wgit
|
|
350
349
|
results
|
351
350
|
end
|
352
351
|
|
352
|
+
# Searches the database's Documents for the given query and then searches
|
353
|
+
# each result in turn using `doc.search`. Instead of an Array of Documents,
|
354
|
+
# this method returns a Hash of the docs url => search_results creating a
|
355
|
+
# search engine like result set for quick access to text matches.
|
356
|
+
#
|
357
|
+
# @param query [String] The text query to search with.
|
358
|
+
# @param case_sensitive [Boolean] Whether character case must match.
|
359
|
+
# @param whole_sentence [Boolean] Whether multiple words should be searched
|
360
|
+
# for separately.
|
361
|
+
# @param limit [Integer] The max number of results to return.
|
362
|
+
# @param skip [Integer] The number of results to skip.
|
363
|
+
# @param sentence_limit [Integer] The max length of each search result
|
364
|
+
# sentence.
|
365
|
+
# @param top_result_only [Boolean] Whether to return all of the documents
|
366
|
+
# search results or just the top (most relavent) result.
|
367
|
+
# @yield [doc] Given each search result (Wgit::Document) returned from the
|
368
|
+
# DB.
|
369
|
+
# @return [Hash<String, String | Array<String>>] The search results obtained
|
370
|
+
# from the DB having mapped the docs url => search_results. The format of
|
371
|
+
# search_results depends on the value of `top_result_only`.
|
372
|
+
def search_text(
|
373
|
+
query, case_sensitive: false, whole_sentence: true,
|
374
|
+
limit: 10, skip: 0, sentence_limit: 80, top_result_only: false
|
375
|
+
)
|
376
|
+
results = search(
|
377
|
+
query,
|
378
|
+
case_sensitive: case_sensitive,
|
379
|
+
whole_sentence: whole_sentence,
|
380
|
+
limit: limit,
|
381
|
+
skip: skip
|
382
|
+
)
|
383
|
+
|
384
|
+
results
|
385
|
+
.map do |doc|
|
386
|
+
yield(doc) if block_given?
|
387
|
+
|
388
|
+
results = doc.search(
|
389
|
+
query,
|
390
|
+
case_sensitive: case_sensitive,
|
391
|
+
whole_sentence: whole_sentence,
|
392
|
+
sentence_limit: sentence_limit
|
393
|
+
)
|
394
|
+
|
395
|
+
# Only return result if its text has a match - compact is called below.
|
396
|
+
next nil if results.empty?
|
397
|
+
|
398
|
+
[doc.url, (top_result_only ? results.first : results)]
|
399
|
+
end
|
400
|
+
.compact
|
401
|
+
.to_h
|
402
|
+
end
|
403
|
+
|
353
404
|
# Returns statistics about the database.
|
354
405
|
#
|
355
406
|
# @return [BSON::Document#[]#fetch] Similar to a Hash instance.
|
@@ -456,21 +507,30 @@ module Wgit
|
|
456
507
|
# 0 or 1 because urls are unique.
|
457
508
|
def delete(obj)
|
458
509
|
collection, query = get_type_info(obj)
|
459
|
-
@client[collection].delete_one(query)
|
510
|
+
result = @client[collection].delete_one(query)
|
511
|
+
result.n
|
512
|
+
ensure
|
513
|
+
@last_result = result
|
460
514
|
end
|
461
515
|
|
462
516
|
# Deletes everything in the urls collection.
|
463
517
|
#
|
464
518
|
# @return [Integer] The number of deleted records.
|
465
519
|
def clear_urls
|
466
|
-
@client[URLS_COLLECTION].delete_many({})
|
520
|
+
result = @client[URLS_COLLECTION].delete_many({})
|
521
|
+
result.n
|
522
|
+
ensure
|
523
|
+
@last_result = result
|
467
524
|
end
|
468
525
|
|
469
526
|
# Deletes everything in the documents collection.
|
470
527
|
#
|
471
528
|
# @return [Integer] The number of deleted records.
|
472
529
|
def clear_docs
|
473
|
-
@client[DOCUMENTS_COLLECTION].delete_many({})
|
530
|
+
result = @client[DOCUMENTS_COLLECTION].delete_many({})
|
531
|
+
result.n
|
532
|
+
ensure
|
533
|
+
@last_result = result
|
474
534
|
end
|
475
535
|
|
476
536
|
# Deletes everything in the urls and documents collections. This will nuke
|
@@ -538,6 +598,8 @@ module Wgit
|
|
538
598
|
else
|
539
599
|
raise 'data must be a Hash or an Array of Hashes'
|
540
600
|
end
|
601
|
+
ensure
|
602
|
+
@last_result = result
|
541
603
|
end
|
542
604
|
|
543
605
|
# Return if the write to the DB succeeded or not.
|
@@ -574,8 +636,8 @@ module Wgit
|
|
574
636
|
sort: {}, projection: {},
|
575
637
|
limit: 0, skip: 0)
|
576
638
|
assert_type(query, Hash)
|
577
|
-
@client[collection.to_sym].find(query).projection(projection)
|
578
|
-
|
639
|
+
@last_result = @client[collection.to_sym].find(query).projection(projection)
|
640
|
+
.skip(skip).limit(limit).sort(sort)
|
579
641
|
end
|
580
642
|
|
581
643
|
# Mutate/update one or more Url or Document records in the DB.
|
@@ -595,6 +657,8 @@ module Wgit
|
|
595
657
|
raise 'DB write(s) (update) failed' unless write_succeeded?(result)
|
596
658
|
|
597
659
|
result.n
|
660
|
+
ensure
|
661
|
+
@last_result = result
|
598
662
|
end
|
599
663
|
|
600
664
|
alias num_objects num_records
|
data/lib/wgit/document.rb
CHANGED
@@ -453,7 +453,7 @@ be relative"
|
|
453
453
|
|
454
454
|
if query.is_a?(Regexp)
|
455
455
|
regex = query
|
456
|
-
else # respond_to?
|
456
|
+
else # query.respond_to? :to_s == true
|
457
457
|
query = query.to_s
|
458
458
|
query = query.gsub(' ', '|') unless whole_sentence
|
459
459
|
regex = Regexp.new(query, !case_sensitive)
|
data/lib/wgit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wgit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.10.
|
4
|
+
version: 0.10.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-07-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|