answersengine 0.3.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f2515f220abb2835bf815a55a62382f6929e32a2629fda0cc7504f49704ef984
|
4
|
+
data.tar.gz: f898220e6b2dc947316915531c6ffc6faa7011de27f1a574d92f832984d85d8c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8a750d0d2c7099b3e8d46a39d0a5f0b874296491a5ade11d6a56c8ca7611011f7d156e39e785b19bf0c735d109bf494c20738d14db2f6aa114668422485cf32
|
7
|
+
data.tar.gz: 0bd4e8cf8fbdec06a034769635272ff4f295d3741704930bf0f0209f7cf055ba267080cc61f65f139e760484dc4dda5984d3d6ad8066b16f42d46b1deb1df00b
|
@@ -94,7 +94,7 @@ module AnswersEngine
|
|
94
94
|
puts "Error: #{options[:vars]} on vars is not a valid JSON"
|
95
95
|
end
|
96
96
|
end
|
97
|
-
end
|
97
|
+
end
|
98
98
|
|
99
99
|
desc "refetch <scraper_name>", "Refetch Pages on a scraper's current job"
|
100
100
|
long_desc <<-LONGDESC
|
@@ -106,35 +106,36 @@ module AnswersEngine
|
|
106
106
|
def refetch(scraper_name)
|
107
107
|
if !options.key?(:gid) && !options.key?(:fetch_fail) && !options.key?(:parse_fail)
|
108
108
|
puts "Must specify either a --gid or --fetch-fail or --parse-fail"
|
109
|
-
|
110
|
-
client = Client::ScraperJobPage.new(options)
|
111
|
-
puts "#{client.refetch(scraper_name)}"
|
109
|
+
return
|
112
110
|
end
|
111
|
+
client = Client::ScraperJobPage.new(options)
|
112
|
+
puts "#{client.refetch(scraper_name)}"
|
113
113
|
end
|
114
114
|
|
115
|
-
desc "
|
115
|
+
desc "reparse <scraper_name>", "Reparse Pages on a scraper's current job"
|
116
116
|
long_desc <<-LONGDESC
|
117
|
-
|
118
|
-
|
119
|
-
option :
|
120
|
-
|
117
|
+
Reparse pages in a scraper's current job. You need to specify either a --gid or --parse-fail.\x5
|
118
|
+
LONGDESC
|
119
|
+
option :gid, :aliases => :g, type: :string, desc: 'Reparse a specific GID'
|
120
|
+
option :parse_fail, type: :boolean, desc: 'Reparse only pages that fails parsing.'
|
121
|
+
def reparse(scraper_name)
|
121
122
|
begin
|
122
123
|
options[:vars] = JSON.parse(options[:vars]) if options[:vars]
|
123
124
|
|
124
|
-
if options
|
125
|
-
|
126
|
-
|
127
|
-
else
|
128
|
-
client = Client::ScraperJobPage.new(options)
|
129
|
-
puts "#{client.reset(scraper_name, gid, options)}"
|
125
|
+
if !options.key?(:gid) && !options.key?(:parse_fail)
|
126
|
+
puts "Must specify either a --gid or --parse-fail"
|
127
|
+
return
|
130
128
|
end
|
131
129
|
|
130
|
+
client = Client::ScraperJobPage.new(options)
|
131
|
+
puts "#{client.reparse(scraper_name)}"
|
132
|
+
|
132
133
|
rescue JSON::ParserError
|
133
134
|
if options[:vars]
|
134
135
|
puts "Error: #{options[:vars]} on vars is not a valid JSON"
|
135
136
|
end
|
136
137
|
end
|
137
|
-
end
|
138
|
+
end
|
138
139
|
|
139
140
|
desc "show <scraper_name> <gid>", "Show a page in scraper's current job"
|
140
141
|
long_desc <<-LONGDESC
|
@@ -166,7 +167,7 @@ module AnswersEngine
|
|
166
167
|
query = {}
|
167
168
|
query["order"] = options.delete(:head) if options[:head]
|
168
169
|
query["job_type"] = "parsing" if options[:parsing]
|
169
|
-
|
170
|
+
|
170
171
|
query["page_token"] = options.delete(:more) if options[:more]
|
171
172
|
query["per_page"] = options.delete(:per_page) if options[:per_page]
|
172
173
|
|
@@ -177,7 +178,7 @@ module AnswersEngine
|
|
177
178
|
else
|
178
179
|
result = client.scraper_all_job_page_log(scraper_name, gid, {query: query})
|
179
180
|
end
|
180
|
-
|
181
|
+
|
181
182
|
if result['entries'].nil? || result["entries"].length == 0
|
182
183
|
puts "No logs yet, please try again later."
|
183
184
|
else
|
@@ -40,7 +40,7 @@ module AnswersEngine
|
|
40
40
|
body[:pages] = opts.fetch(:pages) {[]}
|
41
41
|
body[:seeding_status] = opts.fetch(:seeding_status){ nil }
|
42
42
|
body[:log_error] = opts[:log_error] if opts[:log_error]
|
43
|
-
|
43
|
+
|
44
44
|
@options.merge!({body: body.to_json})
|
45
45
|
|
46
46
|
self.class.put("/jobs/#{job_id}/seeding_update", @options)
|
@@ -50,4 +50,3 @@ module AnswersEngine
|
|
50
50
|
|
51
51
|
end
|
52
52
|
end
|
53
|
-
|
@@ -10,22 +10,18 @@ module AnswersEngine
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def update(job_id, gid, opts={})
|
13
|
-
body = {}
|
13
|
+
body = {}
|
14
14
|
body[:page_type] = opts[:page_type] if opts[:page_type]
|
15
15
|
body[:priority] = opts[:priority] if opts[:priority]
|
16
16
|
body[:vars] = opts[:vars] if opts[:vars]
|
17
|
-
|
17
|
+
|
18
18
|
@options.merge!({body: body.to_json})
|
19
19
|
|
20
20
|
self.class.put("/jobs/#{job_id}/pages/#{gid}", @options)
|
21
21
|
end
|
22
22
|
|
23
|
-
def reset(job_id, gid, opts={})
|
24
|
-
self.class.put("/jobs/#{job_id}/pages/#{gid}/reset", @options)
|
25
|
-
end
|
26
|
-
|
27
23
|
def enqueue(job_id, method, url, opts={})
|
28
|
-
body = {}
|
24
|
+
body = {}
|
29
25
|
body[:method] = method != "" ? method : "GET"
|
30
26
|
body[:url] = url
|
31
27
|
body[:page_type] = opts[:page_type] if opts[:page_type]
|
@@ -39,7 +35,7 @@ module AnswersEngine
|
|
39
35
|
body[:ua_type] = opts[:ua_type] if opts[:ua_type]
|
40
36
|
body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
|
41
37
|
body[:cookie] = opts[:cookie] if opts[:cookie]
|
42
|
-
|
38
|
+
|
43
39
|
@options.merge!({body: body.to_json})
|
44
40
|
|
45
41
|
self.class.post("/jobs/#{job_id}/pages", @options)
|
@@ -51,7 +47,7 @@ module AnswersEngine
|
|
51
47
|
body[:pages] = opts.fetch(:pages) {[]}
|
52
48
|
body[:parsing_status] = opts.fetch(:parsing_status){ nil }
|
53
49
|
body[:log_error] = opts[:log_error] if opts[:log_error]
|
54
|
-
|
50
|
+
|
55
51
|
@options.merge!({body: body.to_json})
|
56
52
|
|
57
53
|
self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", @options)
|
@@ -59,4 +55,3 @@ module AnswersEngine
|
|
59
55
|
end
|
60
56
|
end
|
61
57
|
end
|
62
|
-
|
@@ -10,26 +10,28 @@ module AnswersEngine
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def update(scraper_name, gid, opts={})
|
13
|
-
body = {}
|
13
|
+
body = {}
|
14
14
|
body[:page_type] = opts[:page_type] if opts[:page_type]
|
15
15
|
body[:priority] = opts[:priority] if opts[:priority]
|
16
16
|
body[:vars] = opts[:vars] if opts[:vars]
|
17
|
-
|
17
|
+
|
18
18
|
@options.merge!({body: body.to_json})
|
19
19
|
|
20
20
|
self.class.put("/scrapers/#{scraper_name}/current_job/pages/#{gid}", @options)
|
21
21
|
end
|
22
22
|
|
23
|
-
def refetch(scraper_name, opts=
|
24
|
-
|
23
|
+
def refetch(scraper_name, opts = nil)
|
24
|
+
opts ||= @options
|
25
|
+
self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", opts)
|
25
26
|
end
|
26
27
|
|
27
|
-
def
|
28
|
-
|
28
|
+
def reparse(scraper_name, opts = nil)
|
29
|
+
opts ||= @options
|
30
|
+
self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", opts)
|
29
31
|
end
|
30
32
|
|
31
33
|
def enqueue(scraper_name, method, url, opts={})
|
32
|
-
body = {}
|
34
|
+
body = {}
|
33
35
|
body[:method] = method != "" ? method : "GET"
|
34
36
|
body[:url] = url
|
35
37
|
body[:page_type] = opts[:page_type] if opts[:page_type]
|
@@ -43,7 +45,7 @@ module AnswersEngine
|
|
43
45
|
body[:ua_type] = opts[:ua_type] if opts[:ua_type]
|
44
46
|
body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
|
45
47
|
body[:cookie] = opts[:cookie] if opts[:cookie]
|
46
|
-
|
48
|
+
|
47
49
|
@options.merge!({body: body.to_json})
|
48
50
|
|
49
51
|
self.class.post("/scrapers/#{scraper_name}/current_job/pages", @options)
|
@@ -52,4 +54,3 @@ module AnswersEngine
|
|
52
54
|
end
|
53
55
|
end
|
54
56
|
end
|
55
|
-
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: answersengine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-07-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|