datahen 1.0.2 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: affc49d8b9ed853c138aba0ec2af224b7f3065de8d06f9395ab49ff91332408b
|
4
|
+
data.tar.gz: b2668aa8fddba8c71b1b05b5b3e492deca25e82cd8f5dd9b53428a16cd3b4d1f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d1c22a97ad04ce972f5c41dc1749e3613a45da9f43f31c8512703e67f2aaa9862bdb1b373eed6e8bfd2150471598e44054424b93612fd18241db1f78398230d4
|
7
|
+
data.tar.gz: e27c597ddc83e250bcb5edc7db8379f5498ea5636cd599fa7bfa4de8aabd6925e664ffd11aabb4389e5906e45bbca4dbf807db48094070e1dd99abffab1bd874
|
@@ -131,13 +131,14 @@ module Datahen
|
|
131
131
|
Reparse pages in a scraper's current job. You need to specify either a --gid or --parse-fail or --status or --page-type.\x5
|
132
132
|
LONGDESC
|
133
133
|
option :gid, :aliases => :g, type: :string, desc: 'Reparse a specific GID'
|
134
|
+
option :fetch_fail, type: :boolean, desc: 'Reparse only pages that fails fetching.'
|
134
135
|
option :parse_fail, type: :boolean, desc: 'Reparse only pages that fails parsing.'
|
135
136
|
option :status, type: :string, desc: 'Reparse only pages with a specific status.'
|
136
137
|
option :page_type, type: :string, desc: 'Refetches only pages with a specific page type.'
|
137
138
|
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
138
139
|
def reparse(scraper_name)
|
139
|
-
if !options.key?(:gid) && !options.key?(:parse_fail) && !options.key?(:status) && !options.key?(:page_type)
|
140
|
-
puts "Must specify either a --gid, --parse-fail, --status or --page-type"
|
140
|
+
if !options.key?(:gid) && !options.key?(:fetch_fail) && !options.key?(:parse_fail) && !options.key?(:status) && !options.key?(:page_type)
|
141
|
+
puts "Must specify either a --gid, --fetch-fail, --parse-fail, --status or --page-type"
|
141
142
|
return
|
142
143
|
end
|
143
144
|
|
@@ -155,11 +156,14 @@ module Datahen
|
|
155
156
|
Move pages in a scraper's current job to limbo. You need to specify either a --gid or --status.\x5
|
156
157
|
LONGDESC
|
157
158
|
option :gid, :aliases => :g, type: :string, desc: 'Move a specific GID to limbo'
|
159
|
+
option :fetch_fail, type: :boolean, desc: 'Move pages that fails fetching to limbo.'
|
160
|
+
option :parse_fail, type: :boolean, desc: 'Move pages that fails parsing to limbo.'
|
158
161
|
option :status, type: :string, desc: 'Move pages with a specific status to limbo.'
|
162
|
+
option :page_type, type: :string, desc: 'Move pages with a specific page type to limbo.'
|
159
163
|
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
160
164
|
def limbo(scraper_name)
|
161
|
-
if !options.key?(:gid) && !options.key?(:status)
|
162
|
-
puts "Must specify either a --gid or --
|
165
|
+
if !options.key?(:gid) && !options.key?(:fetch_fail) && !options.key?(:parse_fail) && !options.key?(:status) && !options.key?(:page_type)
|
166
|
+
puts "Must specify either a --gid, --fetch-fail, --parse-fail, --status or --page-type"
|
163
167
|
return
|
164
168
|
end
|
165
169
|
|
@@ -27,6 +27,8 @@ module Datahen
|
|
27
27
|
|
28
28
|
def self.exposed_methods
|
29
29
|
[
|
30
|
+
:get_content,
|
31
|
+
:get_failed_content,
|
30
32
|
:content,
|
31
33
|
:failed_content,
|
32
34
|
:outputs,
|
@@ -38,7 +40,8 @@ module Datahen
|
|
38
40
|
:find_outputs,
|
39
41
|
:refetch,
|
40
42
|
:reparse,
|
41
|
-
:limbo
|
43
|
+
:limbo,
|
44
|
+
:finish
|
42
45
|
].freeze
|
43
46
|
end
|
44
47
|
|
data/lib/datahen/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datahen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-08-
|
11
|
+
date: 2022-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|