answersengine 0.3.3 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f2515f220abb2835bf815a55a62382f6929e32a2629fda0cc7504f49704ef984
|
4
|
+
data.tar.gz: f898220e6b2dc947316915531c6ffc6faa7011de27f1a574d92f832984d85d8c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8a750d0d2c7099b3e8d46a39d0a5f0b874296491a5ade11d6a56c8ca7611011f7d156e39e785b19bf0c735d109bf494c20738d14db2f6aa114668422485cf32
|
7
|
+
data.tar.gz: 0bd4e8cf8fbdec06a034769635272ff4f295d3741704930bf0f0209f7cf055ba267080cc61f65f139e760484dc4dda5984d3d6ad8066b16f42d46b1deb1df00b
|
@@ -94,7 +94,7 @@ module AnswersEngine
|
|
94
94
|
puts "Error: #{options[:vars]} on vars is not a valid JSON"
|
95
95
|
end
|
96
96
|
end
|
97
|
-
end
|
97
|
+
end
|
98
98
|
|
99
99
|
desc "refetch <scraper_name>", "Refetch Pages on a scraper's current job"
|
100
100
|
long_desc <<-LONGDESC
|
@@ -106,35 +106,36 @@ module AnswersEngine
|
|
106
106
|
def refetch(scraper_name)
|
107
107
|
if !options.key?(:gid) && !options.key?(:fetch_fail) && !options.key?(:parse_fail)
|
108
108
|
puts "Must specify either a --gid or --fetch-fail or --parse-fail"
|
109
|
-
|
110
|
-
client = Client::ScraperJobPage.new(options)
|
111
|
-
puts "#{client.refetch(scraper_name)}"
|
109
|
+
return
|
112
110
|
end
|
111
|
+
client = Client::ScraperJobPage.new(options)
|
112
|
+
puts "#{client.refetch(scraper_name)}"
|
113
113
|
end
|
114
114
|
|
115
|
-
desc "
|
115
|
+
desc "reparse <scraper_name>", "Reparse Pages on a scraper's current job"
|
116
116
|
long_desc <<-LONGDESC
|
117
|
-
|
118
|
-
|
119
|
-
option :
|
120
|
-
|
117
|
+
Reparse pages in a scraper's current job. You need to specify either a --gid or --parse-fail.\x5
|
118
|
+
LONGDESC
|
119
|
+
option :gid, :aliases => :g, type: :string, desc: 'Reparse a specific GID'
|
120
|
+
option :parse_fail, type: :boolean, desc: 'Reparse only pages that fails parsing.'
|
121
|
+
def reparse(scraper_name)
|
121
122
|
begin
|
122
123
|
options[:vars] = JSON.parse(options[:vars]) if options[:vars]
|
123
124
|
|
124
|
-
if options
|
125
|
-
|
126
|
-
|
127
|
-
else
|
128
|
-
client = Client::ScraperJobPage.new(options)
|
129
|
-
puts "#{client.reset(scraper_name, gid, options)}"
|
125
|
+
if !options.key?(:gid) && !options.key?(:parse_fail)
|
126
|
+
puts "Must specify either a --gid or --parse-fail"
|
127
|
+
return
|
130
128
|
end
|
131
129
|
|
130
|
+
client = Client::ScraperJobPage.new(options)
|
131
|
+
puts "#{client.reparse(scraper_name)}"
|
132
|
+
|
132
133
|
rescue JSON::ParserError
|
133
134
|
if options[:vars]
|
134
135
|
puts "Error: #{options[:vars]} on vars is not a valid JSON"
|
135
136
|
end
|
136
137
|
end
|
137
|
-
end
|
138
|
+
end
|
138
139
|
|
139
140
|
desc "show <scraper_name> <gid>", "Show a page in scraper's current job"
|
140
141
|
long_desc <<-LONGDESC
|
@@ -166,7 +167,7 @@ module AnswersEngine
|
|
166
167
|
query = {}
|
167
168
|
query["order"] = options.delete(:head) if options[:head]
|
168
169
|
query["job_type"] = "parsing" if options[:parsing]
|
169
|
-
|
170
|
+
|
170
171
|
query["page_token"] = options.delete(:more) if options[:more]
|
171
172
|
query["per_page"] = options.delete(:per_page) if options[:per_page]
|
172
173
|
|
@@ -177,7 +178,7 @@ module AnswersEngine
|
|
177
178
|
else
|
178
179
|
result = client.scraper_all_job_page_log(scraper_name, gid, {query: query})
|
179
180
|
end
|
180
|
-
|
181
|
+
|
181
182
|
if result['entries'].nil? || result["entries"].length == 0
|
182
183
|
puts "No logs yet, please try again later."
|
183
184
|
else
|
@@ -40,7 +40,7 @@ module AnswersEngine
|
|
40
40
|
body[:pages] = opts.fetch(:pages) {[]}
|
41
41
|
body[:seeding_status] = opts.fetch(:seeding_status){ nil }
|
42
42
|
body[:log_error] = opts[:log_error] if opts[:log_error]
|
43
|
-
|
43
|
+
|
44
44
|
@options.merge!({body: body.to_json})
|
45
45
|
|
46
46
|
self.class.put("/jobs/#{job_id}/seeding_update", @options)
|
@@ -50,4 +50,3 @@ module AnswersEngine
|
|
50
50
|
|
51
51
|
end
|
52
52
|
end
|
53
|
-
|
@@ -10,22 +10,18 @@ module AnswersEngine
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def update(job_id, gid, opts={})
|
13
|
-
body = {}
|
13
|
+
body = {}
|
14
14
|
body[:page_type] = opts[:page_type] if opts[:page_type]
|
15
15
|
body[:priority] = opts[:priority] if opts[:priority]
|
16
16
|
body[:vars] = opts[:vars] if opts[:vars]
|
17
|
-
|
17
|
+
|
18
18
|
@options.merge!({body: body.to_json})
|
19
19
|
|
20
20
|
self.class.put("/jobs/#{job_id}/pages/#{gid}", @options)
|
21
21
|
end
|
22
22
|
|
23
|
-
def reset(job_id, gid, opts={})
|
24
|
-
self.class.put("/jobs/#{job_id}/pages/#{gid}/reset", @options)
|
25
|
-
end
|
26
|
-
|
27
23
|
def enqueue(job_id, method, url, opts={})
|
28
|
-
body = {}
|
24
|
+
body = {}
|
29
25
|
body[:method] = method != "" ? method : "GET"
|
30
26
|
body[:url] = url
|
31
27
|
body[:page_type] = opts[:page_type] if opts[:page_type]
|
@@ -39,7 +35,7 @@ module AnswersEngine
|
|
39
35
|
body[:ua_type] = opts[:ua_type] if opts[:ua_type]
|
40
36
|
body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
|
41
37
|
body[:cookie] = opts[:cookie] if opts[:cookie]
|
42
|
-
|
38
|
+
|
43
39
|
@options.merge!({body: body.to_json})
|
44
40
|
|
45
41
|
self.class.post("/jobs/#{job_id}/pages", @options)
|
@@ -51,7 +47,7 @@ module AnswersEngine
|
|
51
47
|
body[:pages] = opts.fetch(:pages) {[]}
|
52
48
|
body[:parsing_status] = opts.fetch(:parsing_status){ nil }
|
53
49
|
body[:log_error] = opts[:log_error] if opts[:log_error]
|
54
|
-
|
50
|
+
|
55
51
|
@options.merge!({body: body.to_json})
|
56
52
|
|
57
53
|
self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", @options)
|
@@ -59,4 +55,3 @@ module AnswersEngine
|
|
59
55
|
end
|
60
56
|
end
|
61
57
|
end
|
62
|
-
|
@@ -10,26 +10,28 @@ module AnswersEngine
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def update(scraper_name, gid, opts={})
|
13
|
-
body = {}
|
13
|
+
body = {}
|
14
14
|
body[:page_type] = opts[:page_type] if opts[:page_type]
|
15
15
|
body[:priority] = opts[:priority] if opts[:priority]
|
16
16
|
body[:vars] = opts[:vars] if opts[:vars]
|
17
|
-
|
17
|
+
|
18
18
|
@options.merge!({body: body.to_json})
|
19
19
|
|
20
20
|
self.class.put("/scrapers/#{scraper_name}/current_job/pages/#{gid}", @options)
|
21
21
|
end
|
22
22
|
|
23
|
-
def refetch(scraper_name, opts=
|
24
|
-
|
23
|
+
def refetch(scraper_name, opts = nil)
|
24
|
+
opts ||= @options
|
25
|
+
self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", opts)
|
25
26
|
end
|
26
27
|
|
27
|
-
def
|
28
|
-
|
28
|
+
def reparse(scraper_name, opts = nil)
|
29
|
+
opts ||= @options
|
30
|
+
self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", opts)
|
29
31
|
end
|
30
32
|
|
31
33
|
def enqueue(scraper_name, method, url, opts={})
|
32
|
-
body = {}
|
34
|
+
body = {}
|
33
35
|
body[:method] = method != "" ? method : "GET"
|
34
36
|
body[:url] = url
|
35
37
|
body[:page_type] = opts[:page_type] if opts[:page_type]
|
@@ -43,7 +45,7 @@ module AnswersEngine
|
|
43
45
|
body[:ua_type] = opts[:ua_type] if opts[:ua_type]
|
44
46
|
body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
|
45
47
|
body[:cookie] = opts[:cookie] if opts[:cookie]
|
46
|
-
|
48
|
+
|
47
49
|
@options.merge!({body: body.to_json})
|
48
50
|
|
49
51
|
self.class.post("/scrapers/#{scraper_name}/current_job/pages", @options)
|
@@ -52,4 +54,3 @@ module AnswersEngine
|
|
52
54
|
end
|
53
55
|
end
|
54
56
|
end
|
55
|
-
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: answersengine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-07-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|