datahen 0.14.1 → 0.14.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f53bb8631bee37890dddae7045b89342fcb01611f693fe454a2d001ea9cdfe30
4
- data.tar.gz: fe2ab54f12f5865ea60bb7d2f447e09e7eb9cf5dff8a74bfbdced103f8bd03e1
3
+ metadata.gz: 63ae3d8aba6bb70a89033d592db3242905d70d7e0ea6a90455c370cf3b8a2dde
4
+ data.tar.gz: 3f4644be8702eb789f8d4c3e099750dade2e9d9f83ab6a359f222972d11938bc
5
5
  SHA512:
6
- metadata.gz: 59a642fe8ea6274bfb378e60ae4e8e730bb76cc58a188f0c267636d063afa45eb202c635b05b3a906a67f278e03e9f2e5637750f79ef5b8edf8e5ad3a42668d7
7
- data.tar.gz: b86f95465887321c090724a65db613a57350050ccdd5c1fb4c407701ac560d6bf6a2ef2964b7c44722578205672483d713b8358e5e3e68c29f59076602391f84
6
+ metadata.gz: 20f50d8b4a52d360fe07f32aaa0350a190b80c30157d0d2c1e33ca745013ed8ab1a9a97d81acfc400fa6ff3250394ea44e69bfe894318d4798ec8d99567ab736
7
+ data.tar.gz: 2724495723be6e2d249e8697e28102674c3685ce4cb384bcefaeca03d606361bf2e0e24c098954ad5b872b7e067a0cc5d0c1ed84a10b7b703144ad3a2ba16b4f
@@ -16,6 +16,7 @@ require 'datahen/cli/parser'
16
16
  require 'datahen/cli/seeder'
17
17
  require 'datahen/cli/finisher'
18
18
  require 'datahen/cli/env_var'
19
+ require 'datahen/cli/account'
19
20
 
20
21
 
21
22
 
@@ -41,5 +42,8 @@ module Datahen
41
42
 
42
43
  desc "var SUBCOMMAND ...ARGS", "for environment variable related activities"
43
44
  subcommand "var", EnvVar
45
+
46
+ desc "account SUBCOMMAND ...ARGS", "for account related activities"
47
+ subcommand "account", Account
44
48
  end
45
49
  end
@@ -0,0 +1,17 @@
1
+ module Datahen
2
+ class CLI < Thor
3
+ class Account < Thor
4
+
5
+ desc "profile", "displays the account applied profile"
6
+ long_desc <<-LONGDESC
7
+ Displays the account applied profile
8
+ LONGDESC
9
+ def profile()
10
+ client = Client::Account.new(options)
11
+ puts "#{client.profile()}"
12
+ end
13
+
14
+ end
15
+ end
16
+
17
+ end
@@ -2,7 +2,7 @@ module Datahen
2
2
  class CLI < Thor
3
3
  class EnvVar < Thor
4
4
  desc "list", "List environment variables on the account"
5
-
5
+
6
6
  long_desc <<-LONGDESC
7
7
  List all environment variables on the account.
8
8
  LONGDESC
@@ -19,7 +19,7 @@ module Datahen
19
19
  <name>: Var name can only consist of alphabets, numbers, underscores. Name must be unique to your account, otherwise it will be overwritten.\x5
20
20
  <value>: Value of variable.\x5
21
21
  LONGDESC
22
- option :secret, type: :boolean, desc: 'Set true to make it decrypt the value. Default: false'
22
+ option :secret, type: :boolean, desc: 'Set true to make it decrypt the value. Default: false'
23
23
  def set(name, value)
24
24
  # puts "options #{options}"
25
25
  client = Client::EnvVar.new(options)
@@ -38,10 +38,6 @@ module Datahen
38
38
  puts "#{client.unset(name)}"
39
39
  end
40
40
 
41
-
42
-
43
-
44
-
45
41
  end
46
42
  end
47
43
 
@@ -1,8 +1,8 @@
1
1
  module Datahen
2
2
  class CLI < Thor
3
3
  class Scraper < Thor
4
- desc "list", "List scrapers"
5
4
 
5
+ desc "list", "List scrapers"
6
6
  long_desc <<-LONGDESC
7
7
  List all scrapers.
8
8
  LONGDESC
@@ -29,6 +29,7 @@ module Datahen
29
29
  option :cancel_current_job, type: :boolean, desc: 'Set true to cancel currently active job if scheduler starts. Default: false'
30
30
  option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
31
31
  option :timezone, type: :string, desc: "Set the scheduler's timezone. Must be in IANA Timezone format. Defaults to \"America/Toronto\""
32
+ option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
32
33
  def create(scraper_name, git_repository)
33
34
  # puts "options #{options}"
34
35
  client = Client::Scraper.new(options)
@@ -51,6 +52,7 @@ module Datahen
51
52
  option :cancel_current_job, type: :boolean, desc: 'Set true to cancel currently active job if scheduler starts. Default: false'
52
53
  option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
53
54
  option :timezone, type: :string, desc: "Set the scheduler's timezone. Must be in IANA Timezone format. Defaults to \"America/Toronto\""
55
+ option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
54
56
  def update(scraper_name)
55
57
  client = Client::Scraper.new(options)
56
58
  puts "#{client.update(scraper_name, options)}"
@@ -164,15 +166,25 @@ module Datahen
164
166
  option :"max-timestamp", type: :string, desc: 'Ending timestamp point in time to query historic stats (inclusive)'
165
167
  option :"limit", type: :numeric, desc: 'Limit stats retrieved'
166
168
  option :"order", type: :numeric, desc: 'Order stats by timestamp [DESC]'
169
+ option :live, type: :boolean, desc: 'Get data from the live stats history, not cached stats history.'
170
+ option :filter, type: :string, desc: 'Filter results on `day` or `hour`, if not specified will return all records.'
167
171
  def history(scraper_name)
168
172
  client = Client::JobStat.new(options)
169
173
  if options[:job]
170
- puts "#{client.job_stats_history(options[:job])}"
174
+ puts "#{client.job_stats_history(options[:job], options)}"
171
175
  else
172
- puts "#{client.scraper_job_stats_history(scraper_name)}"
176
+ puts "#{client.scraper_job_stats_history(scraper_name, options)}"
173
177
  end
174
178
  end
175
179
 
180
+ desc "profile <scraper_name>", "displays the scraper applied profile"
181
+ long_desc <<-LONGDESC
182
+ Displays the account applied profile
183
+ LONGDESC
184
+ def profile(scraper_name)
185
+ client = Client::Scraper.new(options)
186
+ puts "#{client.profile(scraper_name)}"
187
+ end
176
188
 
177
189
  desc "job SUBCOMMAND ...ARGS", "manage scrapers jobs"
178
190
  subcommand "job", ScraperJob
@@ -7,10 +7,16 @@ module Datahen
7
7
  end
8
8
 
9
9
  desc "show <scraper_name>", "Show a scraper's current job (Defaults to showing data from cached job)"
10
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
10
11
  option :live, type: :boolean, desc: 'Get data from the live job, not cached job.'
11
12
  def show(scraper_name)
12
- client = Client::ScraperJob.new(options)
13
- puts "#{client.find(scraper_name, options)}"
13
+ if options[:job]
14
+ client = Client::Job.new(options)
15
+ puts "#{client.find(options[:job], options)}"
16
+ else
17
+ client = Client::ScraperJob.new(options)
18
+ puts "#{client.find(scraper_name, options)}"
19
+ end
14
20
  end
15
21
 
16
22
 
@@ -58,7 +64,7 @@ module Datahen
58
64
 
59
65
  desc "pause <scraper_name>", "pauses a scraper's current job"
60
66
  long_desc <<-LONGDESC
61
- pauses a scraper's current job
67
+ Pauses a scraper's current job
62
68
  LONGDESC
63
69
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
64
70
  def pause(scraper_name)
@@ -79,6 +85,7 @@ module Datahen
79
85
  option :workers, :aliases => :w, type: :numeric, desc: 'Set how many standard workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 1. '
80
86
  option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 0. '
81
87
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
88
+ option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
82
89
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
83
90
  def update(scraper_name)
84
91
  if options[:job]
@@ -90,6 +97,21 @@ module Datahen
90
97
  end
91
98
  end
92
99
 
100
+ desc "profile <scraper_name>", "displays a scraper's current job applied profile"
101
+ long_desc <<-LONGDESC
102
+ Displays a scraper's current job applied profile
103
+ LONGDESC
104
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
105
+ def profile(scraper_name)
106
+ if options[:job]
107
+ client = Client::Job.new(options)
108
+ puts "#{client.profile(options[:job])}"
109
+ else
110
+ client = Client::ScraperJob.new(options)
111
+ puts "#{client.profile(scraper_name)}"
112
+ end
113
+ end
114
+
93
115
  desc "var SUBCOMMAND ...ARGS", "for managing scraper's job variables"
94
116
  subcommand "var", ScraperJobVar
95
117
 
@@ -230,7 +230,7 @@ module Datahen
230
230
  end
231
231
  end
232
232
 
233
- desc "failedcontent <gid>", "Show a page's failed content in scraper's current job"
233
+ desc "failedcontent <scraper_name> <gid>", "Show a page's failed content in scraper's current job"
234
234
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
235
235
  def failedcontent(scraper_name, gid)
236
236
  result = nil
@@ -1,4 +1,5 @@
1
1
  require "datahen/client/base"
2
+ require "datahen/client/account"
2
3
  require "datahen/client/auth_token"
3
4
  require "datahen/client/deploy_key"
4
5
  require 'datahen/client/export'
@@ -24,7 +25,6 @@ require "datahen/client/job_var"
24
25
  require "datahen/client/scraper_job_var"
25
26
  require "datahen/client/job_finisher"
26
27
 
27
-
28
28
  module Datahen
29
29
  module Client
30
30
  end
@@ -0,0 +1,13 @@
1
+ module Datahen
2
+ module Client
3
+ class Account < Datahen::Client::Base
4
+
5
+ def profile(opts={})
6
+ params = @options.merge(opts)
7
+
8
+ self.class.get("/profile", params)
9
+ end
10
+
11
+ end
12
+ end
13
+ end
@@ -5,6 +5,8 @@ module Datahen
5
5
  class Base
6
6
  include HTTParty
7
7
 
8
+ default_timeout 60
9
+
8
10
  def self.env_auth_token
9
11
  ENV['DATAHEN_TOKEN']
10
12
  end
@@ -55,6 +57,7 @@ module Datahen
55
57
  query[:"max-timestamp"] = opts[:"max-timestamp"] if opts[:"max-timestamp"]
56
58
  query[:limit] = opts[:limit] if opts[:limit]
57
59
  query[:order] = opts[:order] if opts[:order]
60
+ query[:filter] = opts[:filter] if opts[:filter]
58
61
 
59
62
  if opts[:query]
60
63
  if opts[:query].is_a?(Hash)
@@ -20,6 +20,7 @@ module Datahen
20
20
  body[:standard_worker_count] = opts[:workers] if opts[:workers]
21
21
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
22
22
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
23
+ body[:profile] = opts[:profile] if opts[:profile]
23
24
  params = @options.merge({body: body.to_json})
24
25
 
25
26
  self.class.put("/jobs/#{job_id}", params)
@@ -64,6 +65,12 @@ module Datahen
64
65
  self.class.put("/jobs/#{job_id}/finisher_update", params)
65
66
  end
66
67
 
68
+ def profile(job_id, opts={})
69
+ params = @options.merge(opts)
70
+
71
+ self.class.get("/jobs/#{job_id}/profile", params)
72
+ end
73
+
67
74
  end
68
75
 
69
76
  end
@@ -18,12 +18,20 @@ module Datahen
18
18
  end
19
19
  end
20
20
 
21
- def job_stats_history(job_id)
22
- self.class.get("/jobs/#{job_id}/stats/history", @options)
21
+ def job_stats_history(job_id, opts={})
22
+ if opts[:live]
23
+ self.class.get("/jobs/#{job_id}/stats/history", @options)
24
+ else
25
+ self.class.get("/cached/jobs/#{job_id}/stats/history", @options)
26
+ end
23
27
  end
24
28
 
25
- def scraper_job_stats_history(scraper_name)
26
- self.class.get("/scrapers/#{scraper_name}/current_job/stats/history", @options)
29
+ def scraper_job_stats_history(scraper_name, opts={})
30
+ if opts[:live]
31
+ self.class.get("/scrapers/#{scraper_name}/current_job/stats/history", @options)
32
+ else
33
+ self.class.get("/cached/scrapers/#{scraper_name}/current_job/stats/history", @options)
34
+ end
27
35
  end
28
36
 
29
37
  end
@@ -25,6 +25,7 @@ module Datahen
25
25
  body[:cancel_current_job] = opts[:cancel_current_job] if opts[:cancel_current_job]
26
26
  body[:schedule] = opts[:schedule] if opts[:schedule]
27
27
  body[:timezone] = opts[:timezone] if opts[:timezone]
28
+ body[:profile] = opts[:profile] if opts[:profile]
28
29
  params = @options.merge({body: body.to_json})
29
30
  self.class.post("/scrapers", params)
30
31
  end
@@ -43,6 +44,7 @@ module Datahen
43
44
  body[:cancel_current_job] = opts[:cancel_current_job] if opts.has_key?("cancel_current_job") || opts.has_key?(:cancel_current_job)
44
45
  body[:schedule] = opts[:schedule] if opts[:schedule]
45
46
  body[:timezone] = opts[:timezone] if opts[:timezone]
47
+ body[:profile] = opts[:profile] if opts[:profile]
46
48
  params = @options.merge({body: body.to_json})
47
49
 
48
50
  self.class.put("/scrapers/#{scraper_name}", params)
@@ -52,6 +54,12 @@ module Datahen
52
54
  params = @options.merge(opts)
53
55
  self.class.delete("/scrapers/#{scraper_name}", params)
54
56
  end
57
+
58
+ def profile(scraper_name, opts={})
59
+ params = @options.merge(opts)
60
+
61
+ self.class.get("/scrapers/#{scraper_name}/profile", params)
62
+ end
55
63
  end
56
64
  end
57
65
  end
@@ -29,6 +29,7 @@ module Datahen
29
29
  body[:standard_worker_count] = opts[:workers] if opts[:workers]
30
30
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
31
31
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
32
+ body[:profile] = opts[:profile] if opts[:profile]
32
33
  params = @options.merge({body: body.to_json})
33
34
 
34
35
  self.class.put("/scrapers/#{scraper_name}/current_job", params)
@@ -48,6 +49,12 @@ module Datahen
48
49
  opts[:status] = 'paused'
49
50
  update(scraper_name, opts)
50
51
  end
52
+
53
+ def profile(scraper_name, opts={})
54
+ params = @options.merge(opts)
55
+
56
+ self.class.get("/scrapers/#{scraper_name}/current_job/profile", params)
57
+ end
51
58
  end
52
59
  end
53
60
  end
@@ -60,7 +60,12 @@ module Datahen
60
60
 
61
61
  def init_global_page()
62
62
  client = Client::GlobalPage.new()
63
- client.find(gid)
63
+ global_page = client.find(gid)
64
+ unless global_page.code == 200
65
+ raise "GID #{gid} not found. Aborting execution!"
66
+ else
67
+ global_page
68
+ end
64
69
  end
65
70
 
66
71
  def get_content(job_id, gid)
@@ -287,11 +292,12 @@ module Datahen
287
292
  end
288
293
 
289
294
  # behave differently if it is a real save
295
+ save_status = status
290
296
  if save
291
297
  log_msg = "Saving #{log_msgs.join(' and ')}."
292
298
  puts "#{log_msg}"
293
299
  else
294
- status = "#{status}_try"
300
+ save_status = "#{status}_try"
295
301
  end
296
302
 
297
303
  # saving to server
@@ -300,7 +306,7 @@ module Datahen
300
306
  gid: gid,
301
307
  pages: pages_slice,
302
308
  outputs: outputs_slice,
303
- status: status)
309
+ status: save_status)
304
310
 
305
311
  if response.code == 200
306
312
  if save
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.14.1"
2
+ VERSION = "0.14.9"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.1
4
+ version: 0.14.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-26 00:00:00.000000000 Z
11
+ date: 2020-08-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -189,6 +189,7 @@ files:
189
189
  - exe/hen
190
190
  - lib/datahen.rb
191
191
  - lib/datahen/cli.rb
192
+ - lib/datahen/cli/account.rb
192
193
  - lib/datahen/cli/env_var.rb
193
194
  - lib/datahen/cli/finisher.rb
194
195
  - lib/datahen/cli/global_page.rb
@@ -206,6 +207,7 @@ files:
206
207
  - lib/datahen/cli/scraper_var.rb
207
208
  - lib/datahen/cli/seeder.rb
208
209
  - lib/datahen/client.rb
210
+ - lib/datahen/client/account.rb
209
211
  - lib/datahen/client/auth_token.rb
210
212
  - lib/datahen/client/backblaze_content.rb
211
213
  - lib/datahen/client/base.rb
@@ -264,7 +266,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
264
266
  - !ruby/object:Gem::Version
265
267
  version: '0'
266
268
  requirements: []
267
- rubygems_version: 3.1.2
269
+ rubygems_version: 3.0.3
268
270
  signing_key:
269
271
  specification_version: 4
270
272
  summary: DataHen toolbelt for developers