wgit 0.10.1 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/lib/wgit/base.rb +10 -1
- data/lib/wgit/indexer.rb +4 -4
- data/lib/wgit/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c712169a7a2cf41bebb38b2c798aa883c9f685e4d3671929ab7a3ead55da0134
|
4
|
+
data.tar.gz: 0fb789510761c01f3d0459415b653f4eb896d194e31c610300ef94141dfab63a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e175d2d23877fbb5aefc89aca1f98c31d9fee0ba78ec810bb3cc2260fdd97208c35ba9840b8cdc92ea49bdd8f6fd198910443fd61efdd563767048206f5a9788
|
7
|
+
data.tar.gz: 7dad82e66a10228ba0bce8e7d4cf8d17bdab734f90315813854c0c7e17807fa4c7f500711290a3e5eb28c431bab1c22ba4d25d7b585c1516c8f5470636bf37b4
|
data/CHANGELOG.md
CHANGED
@@ -9,6 +9,15 @@
|
|
9
9
|
- ...
|
10
10
|
---
|
11
11
|
|
12
|
+
## v0.10.2
|
13
|
+
### Added
|
14
|
+
- `Wgit::Base#setup` and `#teardown` methods (lifecycle hooks) that can be overridden by subclasses.
|
15
|
+
### Changed/Removed
|
16
|
+
- ...
|
17
|
+
### Fixed
|
18
|
+
- ...
|
19
|
+
---
|
20
|
+
|
12
21
|
## v0.10.1
|
13
22
|
### Added
|
14
23
|
- Support for Ruby 3.
|
data/lib/wgit/base.rb
CHANGED
@@ -4,16 +4,25 @@ module Wgit
|
|
4
4
|
class Base
|
5
5
|
extend Wgit::DSL
|
6
6
|
|
7
|
+
# Runs once before the crawl/index is run. Override as needed.
|
8
|
+
def setup; end
|
9
|
+
|
10
|
+
# Runs once after the crawl/index is complete. Override as needed.
|
11
|
+
def teardown; end
|
12
|
+
|
7
13
|
# Runs the crawl/index passing each crawled `Wgit::Document` and the given
|
8
14
|
# block to the subclass's `#parse` method.
|
9
15
|
def self.run(&block)
|
16
|
+
crawl_method = @method || :crawl
|
10
17
|
obj = new
|
18
|
+
|
11
19
|
unless obj.respond_to?(:parse)
|
12
20
|
raise "#{obj.class} must respond_to? #parse(doc, &block)"
|
13
21
|
end
|
14
22
|
|
15
|
-
|
23
|
+
obj.setup
|
16
24
|
send(crawl_method) { |doc| obj.parse(doc, &block) }
|
25
|
+
obj.teardown
|
17
26
|
|
18
27
|
obj
|
19
28
|
end
|
data/lib/wgit/indexer.rb
CHANGED
@@ -80,8 +80,8 @@ database capacity, exiting.")
|
|
80
80
|
urls_count += write_urls_to_db(ext_links)
|
81
81
|
end
|
82
82
|
|
83
|
-
Wgit.logger.info("Crawled and indexed
|
84
|
-
overall for this iteration.")
|
83
|
+
Wgit.logger.info("Crawled and indexed documents for #{docs_count} \
|
84
|
+
url(s) overall for this iteration.")
|
85
85
|
Wgit.logger.info("Found and saved #{urls_count} external url(s) for \
|
86
86
|
the next iteration.")
|
87
87
|
|
@@ -136,8 +136,8 @@ the next iteration.")
|
|
136
136
|
Wgit.logger.info("Found and saved #{num_inserted_urls} external url(s)")
|
137
137
|
end
|
138
138
|
|
139
|
-
Wgit.logger.info("Crawled and indexed #{total_pages_indexed}
|
140
|
-
the site: #{url}")
|
139
|
+
Wgit.logger.info("Crawled and indexed #{total_pages_indexed} documents \
|
140
|
+
for the site: #{url}")
|
141
141
|
|
142
142
|
total_pages_indexed
|
143
143
|
end
|
data/lib/wgit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wgit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.10.
|
4
|
+
version: 0.10.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-11-
|
11
|
+
date: 2021-11-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|