datahen 0.18.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0e1fcf7422236924fd818a1527337a6089cd444b1f35510b72fe140facbed7b0
4
- data.tar.gz: 05be57d3e058ee9969d210ded0b1d043b388390d5f2ac834ece490691683f39d
3
+ metadata.gz: ea12b1c12b5a5db4a650b35869de91b9b2ccc8c0c5b4e35da904fc77bfee5ebc
4
+ data.tar.gz: bd96345cc669816cc281d76065cf64d150268aa8f14659e6395796d2aebd52ec
5
5
  SHA512:
6
- metadata.gz: 4e076509fa8a0fa7fa78406916530bfe2c1b6075ac1007baab43a447911d1c7d8e90bddd8a1438c339a4fadef4e05e629e4907f1f1bae3f4c1f283dba63c25c9
7
- data.tar.gz: e7ceb1208c87cd75fa7202f55549c6b2f2ce24980f7642827aab6f721107ca8ddb59829b93742126e465d6930c6c5574de2d045d3a968a7f3a826bf099ee3c4b
6
+ metadata.gz: 763c11bb6d96fdd92c8d2eb8c7965729b3812dbc0dfa9abb47151a61175f695870369d98cac0663ebdf2c644eda028833be313fb8e7924a353f82049c6430c22
7
+ data.tar.gz: 43e074b6acde5a0367fc11f74c0a3dab0c7e1aecfc781c1e927c8e55bb6e367701ec0dcec2aa90d63c988eca16af90577a63e5f8191a5c7c055e9d0fb9e5bbea
@@ -11,6 +11,9 @@ module Datahen
11
11
  puts "#{client.profile()}"
12
12
  end
13
13
 
14
+ desc "deploy_key SUBCOMMAND ...ARGS", "manage deploy key"
15
+ subcommand "deploy_key", AccountDeployKey
16
+
14
17
  end
15
18
  end
16
19
 
@@ -0,0 +1,26 @@
1
+ module Datahen
2
+ class CLI < Thor
3
+ class AccountDeployKey < Thor
4
+ package_name "account deploy_key"
5
+ def self.banner(command, namespace = nil, subcommand = false)
6
+ "#{basename} #{@package_name} #{command.usage}"
7
+ end
8
+
9
+ desc "show", "Show public deploy key"
10
+ def show()
11
+ client = Client::DeployKey.new()
12
+ puts "#{client.find()}"
13
+ end
14
+
15
+ desc "recreate", "Recreate public deploy key"
16
+ long_desc <<-LONGDESC
17
+ Recreate public deploy key.
18
+ LONGDESC
19
+ def recreate()
20
+ client = Client::DeployKey.new()
21
+ puts "#{client.create()}"
22
+ end
23
+ end
24
+ end
25
+
26
+ end
@@ -8,6 +8,7 @@ module Datahen
8
8
  LONGDESC
9
9
  option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
10
10
  option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
11
+ option :status, :aliases => :s, type: :string, desc: 'Scraper status. Status can be: done, cancelled, paused, finishing.'
11
12
  def list
12
13
  client = Client::Scraper.new(options)
13
14
  puts "#{client.all}"
@@ -23,8 +24,9 @@ module Datahen
23
24
  option :freshness_type, :aliases => :t, desc: 'Set how fresh the page cache is. Possible values: day, week, month, year. Default: any'
24
25
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
25
26
  option :force_fetch, :aliases => :f, type: :boolean, desc: 'Set true to force fetch page that is not within freshness criteria. Default: false'
26
- option :workers, :aliases => :w, type: :numeric, desc: 'Set how many standard workers to use. Default: 1'
27
- option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
27
+ option :parsers, :aliases => :pw, type: :numeric, desc: 'Set how many parser workers to use. Default: 1'
28
+ option :fetchers, :aliases => :fw, type: :numeric, desc: 'Set how many fetcher workers to use. Default: 1'
29
+ option :browsers, :aliases => :bw, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
28
30
  option :disable_scheduler, type: :boolean, desc: 'Set true to disable scheduler. Default: false'
29
31
  option :cancel_current_job, type: :boolean, desc: 'Set true to cancel currently active job if scheduler starts. Default: false'
30
32
  option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
@@ -51,8 +53,9 @@ module Datahen
51
53
  option :freshness_type, :aliases => :t, desc: 'Set how fresh the page cache is. Possible values: day, week, month, year. Default: any'
52
54
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
53
55
  option :force_fetch, :aliases => :f, type: :boolean, desc: 'Set true to force fetch page that is not within freshness criteria. Default: false'
54
- option :workers, :aliases => :w, type: :numeric, desc: 'Set how many standard workers to use. Default: 1'
55
- option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
56
+ option :parsers, :aliases => :pw, type: :numeric, desc: 'Set how many parser workers to use. Default: 1'
57
+ option :fetchers, :aliases => :fw, type: :numeric, desc: 'Set how many fetcher workers to use. Default: 1'
58
+ option :browsers, :aliases => :bw, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
56
59
  option :disable_scheduler, type: :boolean, desc: 'Set true to disable scheduler. Default: false'
57
60
  option :cancel_current_job, type: :boolean, desc: 'Set true to cancel currently active job if scheduler starts. Default: false'
58
61
  option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
@@ -96,8 +99,9 @@ module Datahen
96
99
  long_desc <<-LONGDESC
97
100
  Starts a scraper by creating an active scrape job\x5
98
101
  LONGDESC
99
- option :workers, :aliases => :w, type: :numeric, desc: 'Set how many standard workers to use. Default: 1'
100
- option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
102
+ option :parsers, :aliases => :pw, type: :numeric, desc: 'Set how many parser workers to use. Default: 1'
103
+ option :fetchers, :aliases => :fw, type: :numeric, desc: 'Set how many fetcher workers to use. Default: 1'
104
+ option :browsers, :aliases => :bw, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
101
105
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
102
106
  option :vars, type: :string, banner: :JSON, desc: 'Set input vars. Must be in json format. i.e: [{"name":"foo", "value":"bar", "secret":false}] '
103
107
  option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
@@ -99,8 +99,9 @@ module Datahen
99
99
  long_desc <<-LONGDESC
100
100
  Updates a scraper's current job.
101
101
  LONGDESC
102
- option :workers, :aliases => :w, type: :numeric, desc: 'Set how many standard workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 1. '
103
- option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 0. '
102
+ option :parsers, :aliases => :pw, type: :numeric, desc: 'Set how many parser workers to use. Scraper job must be restarted (paused then resumed) for it to take effect. Default: 1. '
103
+ option :fetchers, :aliases => :fw, type: :numeric, desc: 'Set how many fetcher workers to use. Scraper job must be restarted (paused then resumed) for it to take effect. Default: 1. '
104
+ option :browsers, :aliases => :bw, type: :numeric, desc: 'Set how many browser workers to use. Scraper job must be restarted (paused then resumed) for it to take effect. Default: 0. '
104
105
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
105
106
  option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
106
107
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
data/lib/datahen/cli.rb CHANGED
@@ -16,10 +16,9 @@ require 'datahen/cli/parser'
16
16
  require 'datahen/cli/seeder'
17
17
  require 'datahen/cli/finisher'
18
18
  require 'datahen/cli/env_var'
19
+ require 'datahen/cli/account_deploy_key'
19
20
  require 'datahen/cli/account'
20
21
 
21
-
22
-
23
22
  module Datahen
24
23
  class CLI < Thor
25
24
  desc "scraper SUBCOMMAND ...ARGS", "manage scrapers"
@@ -17,7 +17,8 @@ module Datahen
17
17
  def update(job_id, opts={})
18
18
  body = {}
19
19
  body[:status] = opts[:status] if opts[:status]
20
- body[:standard_worker_count] = opts[:workers] if opts[:workers]
20
+ body[:parser_worker_count] = opts[:parsers] if opts[:parsers]
21
+ body[:fetcher_worker_count] = opts[:fetchers] if opts[:fetchers]
21
22
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
22
23
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
23
24
  body[:profile] = opts[:profile] if opts[:profile]
@@ -18,7 +18,8 @@ module Datahen
18
18
  body[:git_branch] = opts[:branch] || opts[:git_branch] || "master" if opts[:branch] || opts[:git_branch]
19
19
  body[:freshness_type] = opts[:freshness_type] if opts[:freshness_type]
20
20
  body[:force_fetch] = opts[:force_fetch] if opts[:force_fetch]
21
- body[:standard_worker_count] = opts[:workers] || opts[:standard_worker_count] if opts[:workers] || opts[:standard_worker_count]
21
+ body[:parser_worker_count] = opts[:parsers] || opts[:parser_worker_count] if opts[:parsers] || opts[:parser_worker_count]
22
+ body[:fetcher_worker_count] = opts[:fetchers] || opts[:fetcher_worker_count] if opts[:fetchers] || opts[:fetcher_worker_count]
22
23
  body[:browser_worker_count] = opts[:browsers] || opts[:browser_worker_count] if opts[:browsers] || opts[:browser_worker_count]
23
24
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
24
25
  body[:disable_scheduler] = opts[:disable_scheduler] if opts[:disable_scheduler]
@@ -42,7 +43,8 @@ module Datahen
42
43
  body[:git_branch] = opts[:branch] || opts[:git_branch] if opts[:branch] || opts[:git_branch]
43
44
  body[:freshness_type] = opts[:freshness_type] if opts[:freshness_type]
44
45
  body[:force_fetch] = opts[:force_fetch] if opts.has_key?("force_fetch") || opts.has_key?(:force_fetch)
45
- body[:standard_worker_count] = opts[:workers] || opts[:standard_worker_count] if opts[:workers] || opts[:standard_worker_count]
46
+ body[:parser_worker_count] = opts[:parsers] || opts[:parser_worker_count] if opts[:parsers] || opts[:parser_worker_count]
47
+ body[:fetcher_worker_count] = opts[:fetchers] || opts[:fetcher_worker_count] if opts[:fetchers] || opts[:fetcher_worker_count]
46
48
  body[:browser_worker_count] = opts[:browsers] || opts[:browser_worker_count] if opts[:browsers] || opts[:browser_worker_count]
47
49
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
48
50
  body[:disable_scheduler] = opts[:disable_scheduler] if opts.has_key?("disable_scheduler") || opts.has_key?(:disable_scheduler)
@@ -8,7 +8,8 @@ module Datahen
8
8
 
9
9
  def create(scraper_name, opts={})
10
10
  body = {}
11
- body[:standard_worker_count] = opts[:workers] if opts[:workers]
11
+ body[:parser_worker_count] = opts[:parsers] if opts[:parsers]
12
+ body[:fetcher_worker_count] = opts[:fetchers] if opts[:fetchers]
12
13
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
13
14
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
14
15
  body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
@@ -36,7 +37,8 @@ module Datahen
36
37
  def update(scraper_name, opts={})
37
38
  body = {}
38
39
  body[:status] = opts[:status] if opts[:status]
39
- body[:standard_worker_count] = opts[:workers] if opts[:workers]
40
+ body[:parser_worker_count] = opts[:parsers] if opts[:parsers]
41
+ body[:fetcher_worker_count] = opts[:fetchers] if opts[:fetchers]
40
42
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
41
43
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
42
44
  body[:profile] = opts[:profile] if opts[:profile]
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.18.0"
2
+ VERSION = "1.0.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.18.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-09-09 00:00:00.000000000 Z
11
+ date: 2022-06-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -212,6 +212,7 @@ files:
212
212
  - lib/datahen.rb
213
213
  - lib/datahen/cli.rb
214
214
  - lib/datahen/cli/account.rb
215
+ - lib/datahen/cli/account_deploy_key.rb
215
216
  - lib/datahen/cli/env_var.rb
216
217
  - lib/datahen/cli/finisher.rb
217
218
  - lib/datahen/cli/global_page.rb