answersengine 0.3.3 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9ee80d67995d7fd6b3fac3b64b4d20abdc4c2dee40919863bac3a29b7aad0553
4
- data.tar.gz: 6dc0454f1bb29ec09f59e69c2085f00c1eb2e2eea5bfc397669be424148964a9
3
+ metadata.gz: f2515f220abb2835bf815a55a62382f6929e32a2629fda0cc7504f49704ef984
4
+ data.tar.gz: f898220e6b2dc947316915531c6ffc6faa7011de27f1a574d92f832984d85d8c
5
5
  SHA512:
6
- metadata.gz: cb52e957ac46ea26ce2c43c1daad6568e00e40aa617ec764a450837372d737abf1bd4fde706cad543972535d3c8b041826c4b2cc1ef7fb3adc3f932ce4936320
7
- data.tar.gz: 2f9db966d3f56efbbcfa54e79fa57798ea36baf937a2df6872905027cfb324a110958e31a11291861be39e90d9c472bafdc8579f68a44321fb29728229acdc97
6
+ metadata.gz: d8a750d0d2c7099b3e8d46a39d0a5f0b874296491a5ade11d6a56c8ca7611011f7d156e39e785b19bf0c735d109bf494c20738d14db2f6aa114668422485cf32
7
+ data.tar.gz: 0bd4e8cf8fbdec06a034769635272ff4f295d3741704930bf0f0209f7cf055ba267080cc61f65f139e760484dc4dda5984d3d6ad8066b16f42d46b1deb1df00b
@@ -94,7 +94,7 @@ module AnswersEngine
94
94
  puts "Error: #{options[:vars]} on vars is not a valid JSON"
95
95
  end
96
96
  end
97
- end
97
+ end
98
98
 
99
99
  desc "refetch <scraper_name>", "Refetch Pages on a scraper's current job"
100
100
  long_desc <<-LONGDESC
@@ -106,35 +106,36 @@ module AnswersEngine
106
106
  def refetch(scraper_name)
107
107
  if !options.key?(:gid) && !options.key?(:fetch_fail) && !options.key?(:parse_fail)
108
108
  puts "Must specify either a --gid or --fetch-fail or --parse-fail"
109
- else
110
- client = Client::ScraperJobPage.new(options)
111
- puts "#{client.refetch(scraper_name)}"
109
+ return
112
110
  end
111
+ client = Client::ScraperJobPage.new(options)
112
+ puts "#{client.refetch(scraper_name)}"
113
113
  end
114
114
 
115
- desc "reset <scraper_name> <gid>", "Reset fetching and parsing of a page in a scraper's current job"
115
+ desc "reparse <scraper_name>", "Reparse Pages on a scraper's current job"
116
116
  long_desc <<-LONGDESC
117
- Reset fetching and parsing of a page in a scraper's current job.\x5
118
- LONGDESC
119
- option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
120
- def reset(scraper_name, gid)
117
+ Reparse pages in a scraper's current job. You need to specify either a --gid or --parse-fail.\x5
118
+ LONGDESC
119
+ option :gid, :aliases => :g, type: :string, desc: 'Reparse a specific GID'
120
+ option :parse_fail, type: :boolean, desc: 'Reparse only pages that fails parsing.'
121
+ def reparse(scraper_name)
121
122
  begin
122
123
  options[:vars] = JSON.parse(options[:vars]) if options[:vars]
123
124
 
124
- if options[:job]
125
- client = Client::JobPage.new(options)
126
- puts "#{client.reset(options[:job], gid, options)}"
127
- else
128
- client = Client::ScraperJobPage.new(options)
129
- puts "#{client.reset(scraper_name, gid, options)}"
125
+ if !options.key?(:gid) && !options.key?(:parse_fail)
126
+ puts "Must specify either a --gid or --parse-fail"
127
+ return
130
128
  end
131
129
 
130
+ client = Client::ScraperJobPage.new(options)
131
+ puts "#{client.reparse(scraper_name)}"
132
+
132
133
  rescue JSON::ParserError
133
134
  if options[:vars]
134
135
  puts "Error: #{options[:vars]} on vars is not a valid JSON"
135
136
  end
136
137
  end
137
- end
138
+ end
138
139
 
139
140
  desc "show <scraper_name> <gid>", "Show a page in scraper's current job"
140
141
  long_desc <<-LONGDESC
@@ -166,7 +167,7 @@ module AnswersEngine
166
167
  query = {}
167
168
  query["order"] = options.delete(:head) if options[:head]
168
169
  query["job_type"] = "parsing" if options[:parsing]
169
-
170
+
170
171
  query["page_token"] = options.delete(:more) if options[:more]
171
172
  query["per_page"] = options.delete(:per_page) if options[:per_page]
172
173
 
@@ -177,7 +178,7 @@ module AnswersEngine
177
178
  else
178
179
  result = client.scraper_all_job_page_log(scraper_name, gid, {query: query})
179
180
  end
180
-
181
+
181
182
  if result['entries'].nil? || result["entries"].length == 0
182
183
  puts "No logs yet, please try again later."
183
184
  else
@@ -40,7 +40,7 @@ module AnswersEngine
40
40
  body[:pages] = opts.fetch(:pages) {[]}
41
41
  body[:seeding_status] = opts.fetch(:seeding_status){ nil }
42
42
  body[:log_error] = opts[:log_error] if opts[:log_error]
43
-
43
+
44
44
  @options.merge!({body: body.to_json})
45
45
 
46
46
  self.class.put("/jobs/#{job_id}/seeding_update", @options)
@@ -50,4 +50,3 @@ module AnswersEngine
50
50
 
51
51
  end
52
52
  end
53
-
@@ -10,22 +10,18 @@ module AnswersEngine
10
10
  end
11
11
 
12
12
  def update(job_id, gid, opts={})
13
- body = {}
13
+ body = {}
14
14
  body[:page_type] = opts[:page_type] if opts[:page_type]
15
15
  body[:priority] = opts[:priority] if opts[:priority]
16
16
  body[:vars] = opts[:vars] if opts[:vars]
17
-
17
+
18
18
  @options.merge!({body: body.to_json})
19
19
 
20
20
  self.class.put("/jobs/#{job_id}/pages/#{gid}", @options)
21
21
  end
22
22
 
23
- def reset(job_id, gid, opts={})
24
- self.class.put("/jobs/#{job_id}/pages/#{gid}/reset", @options)
25
- end
26
-
27
23
  def enqueue(job_id, method, url, opts={})
28
- body = {}
24
+ body = {}
29
25
  body[:method] = method != "" ? method : "GET"
30
26
  body[:url] = url
31
27
  body[:page_type] = opts[:page_type] if opts[:page_type]
@@ -39,7 +35,7 @@ module AnswersEngine
39
35
  body[:ua_type] = opts[:ua_type] if opts[:ua_type]
40
36
  body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
41
37
  body[:cookie] = opts[:cookie] if opts[:cookie]
42
-
38
+
43
39
  @options.merge!({body: body.to_json})
44
40
 
45
41
  self.class.post("/jobs/#{job_id}/pages", @options)
@@ -51,7 +47,7 @@ module AnswersEngine
51
47
  body[:pages] = opts.fetch(:pages) {[]}
52
48
  body[:parsing_status] = opts.fetch(:parsing_status){ nil }
53
49
  body[:log_error] = opts[:log_error] if opts[:log_error]
54
-
50
+
55
51
  @options.merge!({body: body.to_json})
56
52
 
57
53
  self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", @options)
@@ -59,4 +55,3 @@ module AnswersEngine
59
55
  end
60
56
  end
61
57
  end
62
-
@@ -10,26 +10,28 @@ module AnswersEngine
10
10
  end
11
11
 
12
12
  def update(scraper_name, gid, opts={})
13
- body = {}
13
+ body = {}
14
14
  body[:page_type] = opts[:page_type] if opts[:page_type]
15
15
  body[:priority] = opts[:priority] if opts[:priority]
16
16
  body[:vars] = opts[:vars] if opts[:vars]
17
-
17
+
18
18
  @options.merge!({body: body.to_json})
19
19
 
20
20
  self.class.put("/scrapers/#{scraper_name}/current_job/pages/#{gid}", @options)
21
21
  end
22
22
 
23
- def refetch(scraper_name, opts={})
24
- self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", @options)
23
+ def refetch(scraper_name, opts = nil)
24
+ opts ||= @options
25
+ self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", opts)
25
26
  end
26
27
 
27
- def reset(scraper_name, gid, opts={})
28
- self.class.put("/scrapers/#{scraper_name}/current_job/pages/#{gid}/reset", @options)
28
+ def reparse(scraper_name, opts = nil)
29
+ opts ||= @options
30
+ self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", opts)
29
31
  end
30
32
 
31
33
  def enqueue(scraper_name, method, url, opts={})
32
- body = {}
34
+ body = {}
33
35
  body[:method] = method != "" ? method : "GET"
34
36
  body[:url] = url
35
37
  body[:page_type] = opts[:page_type] if opts[:page_type]
@@ -43,7 +45,7 @@ module AnswersEngine
43
45
  body[:ua_type] = opts[:ua_type] if opts[:ua_type]
44
46
  body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
45
47
  body[:cookie] = opts[:cookie] if opts[:cookie]
46
-
48
+
47
49
  @options.merge!({body: body.to_json})
48
50
 
49
51
  self.class.post("/scrapers/#{scraper_name}/current_job/pages", @options)
@@ -52,4 +54,3 @@ module AnswersEngine
52
54
  end
53
55
  end
54
56
  end
55
-
@@ -1,3 +1,3 @@
1
1
  module AnswersEngine
2
- VERSION = "0.3.3"
2
+ VERSION = "0.4.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: answersengine
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-03-24 00:00:00.000000000 Z
11
+ date: 2019-07-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor