wgit 0.10.1 → 0.10.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/lib/wgit/base.rb +10 -1
- data/lib/wgit/indexer.rb +4 -4
- data/lib/wgit/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c712169a7a2cf41bebb38b2c798aa883c9f685e4d3671929ab7a3ead55da0134
|
4
|
+
data.tar.gz: 0fb789510761c01f3d0459415b653f4eb896d194e31c610300ef94141dfab63a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e175d2d23877fbb5aefc89aca1f98c31d9fee0ba78ec810bb3cc2260fdd97208c35ba9840b8cdc92ea49bdd8f6fd198910443fd61efdd563767048206f5a9788
|
7
|
+
data.tar.gz: 7dad82e66a10228ba0bce8e7d4cf8d17bdab734f90315813854c0c7e17807fa4c7f500711290a3e5eb28c431bab1c22ba4d25d7b585c1516c8f5470636bf37b4
|
data/CHANGELOG.md
CHANGED
@@ -9,6 +9,15 @@
|
|
9
9
|
- ...
|
10
10
|
---
|
11
11
|
|
12
|
+
## v0.10.2
|
13
|
+
### Added
|
14
|
+
- `Wgit::Base#setup` and `#teardown` methods (lifecycle hooks) that can be overridden by subclasses.
|
15
|
+
### Changed/Removed
|
16
|
+
- ...
|
17
|
+
### Fixed
|
18
|
+
- ...
|
19
|
+
---
|
20
|
+
|
12
21
|
## v0.10.1
|
13
22
|
### Added
|
14
23
|
- Support for Ruby 3.
|
data/lib/wgit/base.rb
CHANGED
@@ -4,16 +4,25 @@ module Wgit
|
|
4
4
|
class Base
|
5
5
|
extend Wgit::DSL
|
6
6
|
|
7
|
+
# Runs once before the crawl/index is run. Override as needed.
|
8
|
+
def setup; end
|
9
|
+
|
10
|
+
# Runs once after the crawl/index is complete. Override as needed.
|
11
|
+
def teardown; end
|
12
|
+
|
7
13
|
# Runs the crawl/index passing each crawled `Wgit::Document` and the given
|
8
14
|
# block to the subclass's `#parse` method.
|
9
15
|
def self.run(&block)
|
16
|
+
crawl_method = @method || :crawl
|
10
17
|
obj = new
|
18
|
+
|
11
19
|
unless obj.respond_to?(:parse)
|
12
20
|
raise "#{obj.class} must respond_to? #parse(doc, &block)"
|
13
21
|
end
|
14
22
|
|
15
|
-
|
23
|
+
obj.setup
|
16
24
|
send(crawl_method) { |doc| obj.parse(doc, &block) }
|
25
|
+
obj.teardown
|
17
26
|
|
18
27
|
obj
|
19
28
|
end
|
data/lib/wgit/indexer.rb
CHANGED
@@ -80,8 +80,8 @@ database capacity, exiting.")
|
|
80
80
|
urls_count += write_urls_to_db(ext_links)
|
81
81
|
end
|
82
82
|
|
83
|
-
Wgit.logger.info("Crawled and indexed
|
84
|
-
overall for this iteration.")
|
83
|
+
Wgit.logger.info("Crawled and indexed documents for #{docs_count} \
|
84
|
+
url(s) overall for this iteration.")
|
85
85
|
Wgit.logger.info("Found and saved #{urls_count} external url(s) for \
|
86
86
|
the next iteration.")
|
87
87
|
|
@@ -136,8 +136,8 @@ the next iteration.")
|
|
136
136
|
Wgit.logger.info("Found and saved #{num_inserted_urls} external url(s)")
|
137
137
|
end
|
138
138
|
|
139
|
-
Wgit.logger.info("Crawled and indexed #{total_pages_indexed}
|
140
|
-
the site: #{url}")
|
139
|
+
Wgit.logger.info("Crawled and indexed #{total_pages_indexed} documents \
|
140
|
+
for the site: #{url}")
|
141
141
|
|
142
142
|
total_pages_indexed
|
143
143
|
end
|
data/lib/wgit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wgit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.10.
|
4
|
+
version: 0.10.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-11-
|
11
|
+
date: 2021-11-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|