answersengine 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9ee80d67995d7fd6b3fac3b64b4d20abdc4c2dee40919863bac3a29b7aad0553
4
- data.tar.gz: 6dc0454f1bb29ec09f59e69c2085f00c1eb2e2eea5bfc397669be424148964a9
3
+ metadata.gz: f2515f220abb2835bf815a55a62382f6929e32a2629fda0cc7504f49704ef984
4
+ data.tar.gz: f898220e6b2dc947316915531c6ffc6faa7011de27f1a574d92f832984d85d8c
5
5
  SHA512:
6
- metadata.gz: cb52e957ac46ea26ce2c43c1daad6568e00e40aa617ec764a450837372d737abf1bd4fde706cad543972535d3c8b041826c4b2cc1ef7fb3adc3f932ce4936320
7
- data.tar.gz: 2f9db966d3f56efbbcfa54e79fa57798ea36baf937a2df6872905027cfb324a110958e31a11291861be39e90d9c472bafdc8579f68a44321fb29728229acdc97
6
+ metadata.gz: d8a750d0d2c7099b3e8d46a39d0a5f0b874296491a5ade11d6a56c8ca7611011f7d156e39e785b19bf0c735d109bf494c20738d14db2f6aa114668422485cf32
7
+ data.tar.gz: 0bd4e8cf8fbdec06a034769635272ff4f295d3741704930bf0f0209f7cf055ba267080cc61f65f139e760484dc4dda5984d3d6ad8066b16f42d46b1deb1df00b
@@ -94,7 +94,7 @@ module AnswersEngine
94
94
  puts "Error: #{options[:vars]} on vars is not a valid JSON"
95
95
  end
96
96
  end
97
- end
97
+ end
98
98
 
99
99
  desc "refetch <scraper_name>", "Refetch Pages on a scraper's current job"
100
100
  long_desc <<-LONGDESC
@@ -106,35 +106,36 @@ module AnswersEngine
106
106
  def refetch(scraper_name)
107
107
  if !options.key?(:gid) && !options.key?(:fetch_fail) && !options.key?(:parse_fail)
108
108
  puts "Must specify either a --gid or --fetch-fail or --parse-fail"
109
- else
110
- client = Client::ScraperJobPage.new(options)
111
- puts "#{client.refetch(scraper_name)}"
109
+ return
112
110
  end
111
+ client = Client::ScraperJobPage.new(options)
112
+ puts "#{client.refetch(scraper_name)}"
113
113
  end
114
114
 
115
- desc "reset <scraper_name> <gid>", "Reset fetching and parsing of a page in a scraper's current job"
115
+ desc "reparse <scraper_name>", "Reparse Pages on a scraper's current job"
116
116
  long_desc <<-LONGDESC
117
- Reset fetching and parsing of a page in a scraper's current job.\x5
118
- LONGDESC
119
- option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
120
- def reset(scraper_name, gid)
117
+ Reparse pages in a scraper's current job. You need to specify either a --gid or --parse-fail.\x5
118
+ LONGDESC
119
+ option :gid, :aliases => :g, type: :string, desc: 'Reparse a specific GID'
120
+ option :parse_fail, type: :boolean, desc: 'Reparse only pages that fails parsing.'
121
+ def reparse(scraper_name)
121
122
  begin
122
123
  options[:vars] = JSON.parse(options[:vars]) if options[:vars]
123
124
 
124
- if options[:job]
125
- client = Client::JobPage.new(options)
126
- puts "#{client.reset(options[:job], gid, options)}"
127
- else
128
- client = Client::ScraperJobPage.new(options)
129
- puts "#{client.reset(scraper_name, gid, options)}"
125
+ if !options.key?(:gid) && !options.key?(:parse_fail)
126
+ puts "Must specify either a --gid or --parse-fail"
127
+ return
130
128
  end
131
129
 
130
+ client = Client::ScraperJobPage.new(options)
131
+ puts "#{client.reparse(scraper_name)}"
132
+
132
133
  rescue JSON::ParserError
133
134
  if options[:vars]
134
135
  puts "Error: #{options[:vars]} on vars is not a valid JSON"
135
136
  end
136
137
  end
137
- end
138
+ end
138
139
 
139
140
  desc "show <scraper_name> <gid>", "Show a page in scraper's current job"
140
141
  long_desc <<-LONGDESC
@@ -166,7 +167,7 @@ module AnswersEngine
166
167
  query = {}
167
168
  query["order"] = options.delete(:head) if options[:head]
168
169
  query["job_type"] = "parsing" if options[:parsing]
169
-
170
+
170
171
  query["page_token"] = options.delete(:more) if options[:more]
171
172
  query["per_page"] = options.delete(:per_page) if options[:per_page]
172
173
 
@@ -177,7 +178,7 @@ module AnswersEngine
177
178
  else
178
179
  result = client.scraper_all_job_page_log(scraper_name, gid, {query: query})
179
180
  end
180
-
181
+
181
182
  if result['entries'].nil? || result["entries"].length == 0
182
183
  puts "No logs yet, please try again later."
183
184
  else
@@ -40,7 +40,7 @@ module AnswersEngine
40
40
  body[:pages] = opts.fetch(:pages) {[]}
41
41
  body[:seeding_status] = opts.fetch(:seeding_status){ nil }
42
42
  body[:log_error] = opts[:log_error] if opts[:log_error]
43
-
43
+
44
44
  @options.merge!({body: body.to_json})
45
45
 
46
46
  self.class.put("/jobs/#{job_id}/seeding_update", @options)
@@ -50,4 +50,3 @@ module AnswersEngine
50
50
 
51
51
  end
52
52
  end
53
-
@@ -10,22 +10,18 @@ module AnswersEngine
10
10
  end
11
11
 
12
12
  def update(job_id, gid, opts={})
13
- body = {}
13
+ body = {}
14
14
  body[:page_type] = opts[:page_type] if opts[:page_type]
15
15
  body[:priority] = opts[:priority] if opts[:priority]
16
16
  body[:vars] = opts[:vars] if opts[:vars]
17
-
17
+
18
18
  @options.merge!({body: body.to_json})
19
19
 
20
20
  self.class.put("/jobs/#{job_id}/pages/#{gid}", @options)
21
21
  end
22
22
 
23
- def reset(job_id, gid, opts={})
24
- self.class.put("/jobs/#{job_id}/pages/#{gid}/reset", @options)
25
- end
26
-
27
23
  def enqueue(job_id, method, url, opts={})
28
- body = {}
24
+ body = {}
29
25
  body[:method] = method != "" ? method : "GET"
30
26
  body[:url] = url
31
27
  body[:page_type] = opts[:page_type] if opts[:page_type]
@@ -39,7 +35,7 @@ module AnswersEngine
39
35
  body[:ua_type] = opts[:ua_type] if opts[:ua_type]
40
36
  body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
41
37
  body[:cookie] = opts[:cookie] if opts[:cookie]
42
-
38
+
43
39
  @options.merge!({body: body.to_json})
44
40
 
45
41
  self.class.post("/jobs/#{job_id}/pages", @options)
@@ -51,7 +47,7 @@ module AnswersEngine
51
47
  body[:pages] = opts.fetch(:pages) {[]}
52
48
  body[:parsing_status] = opts.fetch(:parsing_status){ nil }
53
49
  body[:log_error] = opts[:log_error] if opts[:log_error]
54
-
50
+
55
51
  @options.merge!({body: body.to_json})
56
52
 
57
53
  self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", @options)
@@ -59,4 +55,3 @@ module AnswersEngine
59
55
  end
60
56
  end
61
57
  end
62
-
@@ -10,26 +10,28 @@ module AnswersEngine
10
10
  end
11
11
 
12
12
  def update(scraper_name, gid, opts={})
13
- body = {}
13
+ body = {}
14
14
  body[:page_type] = opts[:page_type] if opts[:page_type]
15
15
  body[:priority] = opts[:priority] if opts[:priority]
16
16
  body[:vars] = opts[:vars] if opts[:vars]
17
-
17
+
18
18
  @options.merge!({body: body.to_json})
19
19
 
20
20
  self.class.put("/scrapers/#{scraper_name}/current_job/pages/#{gid}", @options)
21
21
  end
22
22
 
23
- def refetch(scraper_name, opts={})
24
- self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", @options)
23
+ def refetch(scraper_name, opts = nil)
24
+ opts ||= @options
25
+ self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", opts)
25
26
  end
26
27
 
27
- def reset(scraper_name, gid, opts={})
28
- self.class.put("/scrapers/#{scraper_name}/current_job/pages/#{gid}/reset", @options)
28
+ def reparse(scraper_name, opts = nil)
29
+ opts ||= @options
30
+ self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", opts)
29
31
  end
30
32
 
31
33
  def enqueue(scraper_name, method, url, opts={})
32
- body = {}
34
+ body = {}
33
35
  body[:method] = method != "" ? method : "GET"
34
36
  body[:url] = url
35
37
  body[:page_type] = opts[:page_type] if opts[:page_type]
@@ -43,7 +45,7 @@ module AnswersEngine
43
45
  body[:ua_type] = opts[:ua_type] if opts[:ua_type]
44
46
  body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
45
47
  body[:cookie] = opts[:cookie] if opts[:cookie]
46
-
48
+
47
49
  @options.merge!({body: body.to_json})
48
50
 
49
51
  self.class.post("/scrapers/#{scraper_name}/current_job/pages", @options)
@@ -52,4 +54,3 @@ module AnswersEngine
52
54
  end
53
55
  end
54
56
  end
55
-
@@ -1,3 +1,3 @@
1
1
  module AnswersEngine
2
- VERSION = "0.3.3"
2
+ VERSION = "0.4.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: answersengine
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-03-24 00:00:00.000000000 Z
11
+ date: 2019-07-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor