datahen 0.14.1 → 0.14.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f53bb8631bee37890dddae7045b89342fcb01611f693fe454a2d001ea9cdfe30
4
- data.tar.gz: fe2ab54f12f5865ea60bb7d2f447e09e7eb9cf5dff8a74bfbdced103f8bd03e1
3
+ metadata.gz: 63ae3d8aba6bb70a89033d592db3242905d70d7e0ea6a90455c370cf3b8a2dde
4
+ data.tar.gz: 3f4644be8702eb789f8d4c3e099750dade2e9d9f83ab6a359f222972d11938bc
5
5
  SHA512:
6
- metadata.gz: 59a642fe8ea6274bfb378e60ae4e8e730bb76cc58a188f0c267636d063afa45eb202c635b05b3a906a67f278e03e9f2e5637750f79ef5b8edf8e5ad3a42668d7
7
- data.tar.gz: b86f95465887321c090724a65db613a57350050ccdd5c1fb4c407701ac560d6bf6a2ef2964b7c44722578205672483d713b8358e5e3e68c29f59076602391f84
6
+ metadata.gz: 20f50d8b4a52d360fe07f32aaa0350a190b80c30157d0d2c1e33ca745013ed8ab1a9a97d81acfc400fa6ff3250394ea44e69bfe894318d4798ec8d99567ab736
7
+ data.tar.gz: 2724495723be6e2d249e8697e28102674c3685ce4cb384bcefaeca03d606361bf2e0e24c098954ad5b872b7e067a0cc5d0c1ed84a10b7b703144ad3a2ba16b4f
@@ -16,6 +16,7 @@ require 'datahen/cli/parser'
16
16
  require 'datahen/cli/seeder'
17
17
  require 'datahen/cli/finisher'
18
18
  require 'datahen/cli/env_var'
19
+ require 'datahen/cli/account'
19
20
 
20
21
 
21
22
 
@@ -41,5 +42,8 @@ module Datahen
41
42
 
42
43
  desc "var SUBCOMMAND ...ARGS", "for environment variable related activities"
43
44
  subcommand "var", EnvVar
45
+
46
+ desc "account SUBCOMMAND ...ARGS", "for account related activities"
47
+ subcommand "account", Account
44
48
  end
45
49
  end
@@ -0,0 +1,17 @@
1
+ module Datahen
2
+ class CLI < Thor
3
+ class Account < Thor
4
+
5
+ desc "profile", "displays the account applied profile"
6
+ long_desc <<-LONGDESC
7
+ Displays the account applied profile
8
+ LONGDESC
9
+ def profile()
10
+ client = Client::Account.new(options)
11
+ puts "#{client.profile()}"
12
+ end
13
+
14
+ end
15
+ end
16
+
17
+ end
@@ -2,7 +2,7 @@ module Datahen
2
2
  class CLI < Thor
3
3
  class EnvVar < Thor
4
4
  desc "list", "List environment variables on the account"
5
-
5
+
6
6
  long_desc <<-LONGDESC
7
7
  List all environment variables on the account.
8
8
  LONGDESC
@@ -19,7 +19,7 @@ module Datahen
19
19
  <name>: Var name can only consist of alphabets, numbers, underscores. Name must be unique to your account, otherwise it will be overwritten.\x5
20
20
  <value>: Value of variable.\x5
21
21
  LONGDESC
22
- option :secret, type: :boolean, desc: 'Set true to make it decrypt the value. Default: false'
22
+ option :secret, type: :boolean, desc: 'Set true to make it decrypt the value. Default: false'
23
23
  def set(name, value)
24
24
  # puts "options #{options}"
25
25
  client = Client::EnvVar.new(options)
@@ -38,10 +38,6 @@ module Datahen
38
38
  puts "#{client.unset(name)}"
39
39
  end
40
40
 
41
-
42
-
43
-
44
-
45
41
  end
46
42
  end
47
43
 
@@ -1,8 +1,8 @@
1
1
  module Datahen
2
2
  class CLI < Thor
3
3
  class Scraper < Thor
4
- desc "list", "List scrapers"
5
4
 
5
+ desc "list", "List scrapers"
6
6
  long_desc <<-LONGDESC
7
7
  List all scrapers.
8
8
  LONGDESC
@@ -29,6 +29,7 @@ module Datahen
29
29
  option :cancel_current_job, type: :boolean, desc: 'Set true to cancel currently active job if scheduler starts. Default: false'
30
30
  option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
31
31
  option :timezone, type: :string, desc: "Set the scheduler's timezone. Must be in IANA Timezone format. Defaults to \"America/Toronto\""
32
+ option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
32
33
  def create(scraper_name, git_repository)
33
34
  # puts "options #{options}"
34
35
  client = Client::Scraper.new(options)
@@ -51,6 +52,7 @@ module Datahen
51
52
  option :cancel_current_job, type: :boolean, desc: 'Set true to cancel currently active job if scheduler starts. Default: false'
52
53
  option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
53
54
  option :timezone, type: :string, desc: "Set the scheduler's timezone. Must be in IANA Timezone format. Defaults to \"America/Toronto\""
55
+ option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
54
56
  def update(scraper_name)
55
57
  client = Client::Scraper.new(options)
56
58
  puts "#{client.update(scraper_name, options)}"
@@ -164,15 +166,25 @@ module Datahen
164
166
  option :"max-timestamp", type: :string, desc: 'Ending timestamp point in time to query historic stats (inclusive)'
165
167
  option :"limit", type: :numeric, desc: 'Limit stats retrieved'
166
168
  option :"order", type: :numeric, desc: 'Order stats by timestamp [DESC]'
169
+ option :live, type: :boolean, desc: 'Get data from the live stats history, not cached stats history.'
170
+ option :filter, type: :string, desc: 'Filter results on `day` or `hour`, if not specified will return all records.'
167
171
  def history(scraper_name)
168
172
  client = Client::JobStat.new(options)
169
173
  if options[:job]
170
- puts "#{client.job_stats_history(options[:job])}"
174
+ puts "#{client.job_stats_history(options[:job], options)}"
171
175
  else
172
- puts "#{client.scraper_job_stats_history(scraper_name)}"
176
+ puts "#{client.scraper_job_stats_history(scraper_name, options)}"
173
177
  end
174
178
  end
175
179
 
180
+ desc "profile <scraper_name>", "displays the scraper applied profile"
181
+ long_desc <<-LONGDESC
182
+ Displays the account applied profile
183
+ LONGDESC
184
+ def profile(scraper_name)
185
+ client = Client::Scraper.new(options)
186
+ puts "#{client.profile(scraper_name)}"
187
+ end
176
188
 
177
189
  desc "job SUBCOMMAND ...ARGS", "manage scrapers jobs"
178
190
  subcommand "job", ScraperJob
@@ -7,10 +7,16 @@ module Datahen
7
7
  end
8
8
 
9
9
  desc "show <scraper_name>", "Show a scraper's current job (Defaults to showing data from cached job)"
10
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
10
11
  option :live, type: :boolean, desc: 'Get data from the live job, not cached job.'
11
12
  def show(scraper_name)
12
- client = Client::ScraperJob.new(options)
13
- puts "#{client.find(scraper_name, options)}"
13
+ if options[:job]
14
+ client = Client::Job.new(options)
15
+ puts "#{client.find(options[:job], options)}"
16
+ else
17
+ client = Client::ScraperJob.new(options)
18
+ puts "#{client.find(scraper_name, options)}"
19
+ end
14
20
  end
15
21
 
16
22
 
@@ -58,7 +64,7 @@ module Datahen
58
64
 
59
65
  desc "pause <scraper_name>", "pauses a scraper's current job"
60
66
  long_desc <<-LONGDESC
61
- pauses a scraper's current job
67
+ Pauses a scraper's current job
62
68
  LONGDESC
63
69
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
64
70
  def pause(scraper_name)
@@ -79,6 +85,7 @@ module Datahen
79
85
  option :workers, :aliases => :w, type: :numeric, desc: 'Set how many standard workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 1. '
80
86
  option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 0. '
81
87
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
88
+ option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
82
89
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
83
90
  def update(scraper_name)
84
91
  if options[:job]
@@ -90,6 +97,21 @@ module Datahen
90
97
  end
91
98
  end
92
99
 
100
+ desc "profile <scraper_name>", "displays a scraper's current job applied profile"
101
+ long_desc <<-LONGDESC
102
+ Displays a scraper's current job applied profile
103
+ LONGDESC
104
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
105
+ def profile(scraper_name)
106
+ if options[:job]
107
+ client = Client::Job.new(options)
108
+ puts "#{client.profile(options[:job])}"
109
+ else
110
+ client = Client::ScraperJob.new(options)
111
+ puts "#{client.profile(scraper_name)}"
112
+ end
113
+ end
114
+
93
115
  desc "var SUBCOMMAND ...ARGS", "for managing scraper's job variables"
94
116
  subcommand "var", ScraperJobVar
95
117
 
@@ -230,7 +230,7 @@ module Datahen
230
230
  end
231
231
  end
232
232
 
233
- desc "failedcontent <gid>", "Show a page's failed content in scraper's current job"
233
+ desc "failedcontent <scraper_name> <gid>", "Show a page's failed content in scraper's current job"
234
234
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
235
235
  def failedcontent(scraper_name, gid)
236
236
  result = nil
@@ -1,4 +1,5 @@
1
1
  require "datahen/client/base"
2
+ require "datahen/client/account"
2
3
  require "datahen/client/auth_token"
3
4
  require "datahen/client/deploy_key"
4
5
  require 'datahen/client/export'
@@ -24,7 +25,6 @@ require "datahen/client/job_var"
24
25
  require "datahen/client/scraper_job_var"
25
26
  require "datahen/client/job_finisher"
26
27
 
27
-
28
28
  module Datahen
29
29
  module Client
30
30
  end
@@ -0,0 +1,13 @@
1
+ module Datahen
2
+ module Client
3
+ class Account < Datahen::Client::Base
4
+
5
+ def profile(opts={})
6
+ params = @options.merge(opts)
7
+
8
+ self.class.get("/profile", params)
9
+ end
10
+
11
+ end
12
+ end
13
+ end
@@ -5,6 +5,8 @@ module Datahen
5
5
  class Base
6
6
  include HTTParty
7
7
 
8
+ default_timeout 60
9
+
8
10
  def self.env_auth_token
9
11
  ENV['DATAHEN_TOKEN']
10
12
  end
@@ -55,6 +57,7 @@ module Datahen
55
57
  query[:"max-timestamp"] = opts[:"max-timestamp"] if opts[:"max-timestamp"]
56
58
  query[:limit] = opts[:limit] if opts[:limit]
57
59
  query[:order] = opts[:order] if opts[:order]
60
+ query[:filter] = opts[:filter] if opts[:filter]
58
61
 
59
62
  if opts[:query]
60
63
  if opts[:query].is_a?(Hash)
@@ -20,6 +20,7 @@ module Datahen
20
20
  body[:standard_worker_count] = opts[:workers] if opts[:workers]
21
21
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
22
22
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
23
+ body[:profile] = opts[:profile] if opts[:profile]
23
24
  params = @options.merge({body: body.to_json})
24
25
 
25
26
  self.class.put("/jobs/#{job_id}", params)
@@ -64,6 +65,12 @@ module Datahen
64
65
  self.class.put("/jobs/#{job_id}/finisher_update", params)
65
66
  end
66
67
 
68
+ def profile(job_id, opts={})
69
+ params = @options.merge(opts)
70
+
71
+ self.class.get("/jobs/#{job_id}/profile", params)
72
+ end
73
+
67
74
  end
68
75
 
69
76
  end
@@ -18,12 +18,20 @@ module Datahen
18
18
  end
19
19
  end
20
20
 
21
- def job_stats_history(job_id)
22
- self.class.get("/jobs/#{job_id}/stats/history", @options)
21
+ def job_stats_history(job_id, opts={})
22
+ if opts[:live]
23
+ self.class.get("/jobs/#{job_id}/stats/history", @options)
24
+ else
25
+ self.class.get("/cached/jobs/#{job_id}/stats/history", @options)
26
+ end
23
27
  end
24
28
 
25
- def scraper_job_stats_history(scraper_name)
26
- self.class.get("/scrapers/#{scraper_name}/current_job/stats/history", @options)
29
+ def scraper_job_stats_history(scraper_name, opts={})
30
+ if opts[:live]
31
+ self.class.get("/scrapers/#{scraper_name}/current_job/stats/history", @options)
32
+ else
33
+ self.class.get("/cached/scrapers/#{scraper_name}/current_job/stats/history", @options)
34
+ end
27
35
  end
28
36
 
29
37
  end
@@ -25,6 +25,7 @@ module Datahen
25
25
  body[:cancel_current_job] = opts[:cancel_current_job] if opts[:cancel_current_job]
26
26
  body[:schedule] = opts[:schedule] if opts[:schedule]
27
27
  body[:timezone] = opts[:timezone] if opts[:timezone]
28
+ body[:profile] = opts[:profile] if opts[:profile]
28
29
  params = @options.merge({body: body.to_json})
29
30
  self.class.post("/scrapers", params)
30
31
  end
@@ -43,6 +44,7 @@ module Datahen
43
44
  body[:cancel_current_job] = opts[:cancel_current_job] if opts.has_key?("cancel_current_job") || opts.has_key?(:cancel_current_job)
44
45
  body[:schedule] = opts[:schedule] if opts[:schedule]
45
46
  body[:timezone] = opts[:timezone] if opts[:timezone]
47
+ body[:profile] = opts[:profile] if opts[:profile]
46
48
  params = @options.merge({body: body.to_json})
47
49
 
48
50
  self.class.put("/scrapers/#{scraper_name}", params)
@@ -52,6 +54,12 @@ module Datahen
52
54
  params = @options.merge(opts)
53
55
  self.class.delete("/scrapers/#{scraper_name}", params)
54
56
  end
57
+
58
+ def profile(scraper_name, opts={})
59
+ params = @options.merge(opts)
60
+
61
+ self.class.get("/scrapers/#{scraper_name}/profile", params)
62
+ end
55
63
  end
56
64
  end
57
65
  end
@@ -29,6 +29,7 @@ module Datahen
29
29
  body[:standard_worker_count] = opts[:workers] if opts[:workers]
30
30
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
31
31
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
32
+ body[:profile] = opts[:profile] if opts[:profile]
32
33
  params = @options.merge({body: body.to_json})
33
34
 
34
35
  self.class.put("/scrapers/#{scraper_name}/current_job", params)
@@ -48,6 +49,12 @@ module Datahen
48
49
  opts[:status] = 'paused'
49
50
  update(scraper_name, opts)
50
51
  end
52
+
53
+ def profile(scraper_name, opts={})
54
+ params = @options.merge(opts)
55
+
56
+ self.class.get("/scrapers/#{scraper_name}/current_job/profile", params)
57
+ end
51
58
  end
52
59
  end
53
60
  end
@@ -60,7 +60,12 @@ module Datahen
60
60
 
61
61
  def init_global_page()
62
62
  client = Client::GlobalPage.new()
63
- client.find(gid)
63
+ global_page = client.find(gid)
64
+ unless global_page.code == 200
65
+ raise "GID #{gid} not found. Aborting execution!"
66
+ else
67
+ global_page
68
+ end
64
69
  end
65
70
 
66
71
  def get_content(job_id, gid)
@@ -287,11 +292,12 @@ module Datahen
287
292
  end
288
293
 
289
294
  # behave differently if it is a real save
295
+ save_status = status
290
296
  if save
291
297
  log_msg = "Saving #{log_msgs.join(' and ')}."
292
298
  puts "#{log_msg}"
293
299
  else
294
- status = "#{status}_try"
300
+ save_status = "#{status}_try"
295
301
  end
296
302
 
297
303
  # saving to server
@@ -300,7 +306,7 @@ module Datahen
300
306
  gid: gid,
301
307
  pages: pages_slice,
302
308
  outputs: outputs_slice,
303
- status: status)
309
+ status: save_status)
304
310
 
305
311
  if response.code == 200
306
312
  if save
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.14.1"
2
+ VERSION = "0.14.9"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.1
4
+ version: 0.14.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-26 00:00:00.000000000 Z
11
+ date: 2020-08-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -189,6 +189,7 @@ files:
189
189
  - exe/hen
190
190
  - lib/datahen.rb
191
191
  - lib/datahen/cli.rb
192
+ - lib/datahen/cli/account.rb
192
193
  - lib/datahen/cli/env_var.rb
193
194
  - lib/datahen/cli/finisher.rb
194
195
  - lib/datahen/cli/global_page.rb
@@ -206,6 +207,7 @@ files:
206
207
  - lib/datahen/cli/scraper_var.rb
207
208
  - lib/datahen/cli/seeder.rb
208
209
  - lib/datahen/client.rb
210
+ - lib/datahen/client/account.rb
209
211
  - lib/datahen/client/auth_token.rb
210
212
  - lib/datahen/client/backblaze_content.rb
211
213
  - lib/datahen/client/base.rb
@@ -264,7 +266,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
264
266
  - !ruby/object:Gem::Version
265
267
  version: '0'
266
268
  requirements: []
267
- rubygems_version: 3.1.2
269
+ rubygems_version: 3.0.3
268
270
  signing_key:
269
271
  specification_version: 4
270
272
  summary: DataHen toolbelt for developers