datahen 0.18.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0e1fcf7422236924fd818a1527337a6089cd444b1f35510b72fe140facbed7b0
4
- data.tar.gz: 05be57d3e058ee9969d210ded0b1d043b388390d5f2ac834ece490691683f39d
3
+ metadata.gz: ea12b1c12b5a5db4a650b35869de91b9b2ccc8c0c5b4e35da904fc77bfee5ebc
4
+ data.tar.gz: bd96345cc669816cc281d76065cf64d150268aa8f14659e6395796d2aebd52ec
5
5
  SHA512:
6
- metadata.gz: 4e076509fa8a0fa7fa78406916530bfe2c1b6075ac1007baab43a447911d1c7d8e90bddd8a1438c339a4fadef4e05e629e4907f1f1bae3f4c1f283dba63c25c9
7
- data.tar.gz: e7ceb1208c87cd75fa7202f55549c6b2f2ce24980f7642827aab6f721107ca8ddb59829b93742126e465d6930c6c5574de2d045d3a968a7f3a826bf099ee3c4b
6
+ metadata.gz: 763c11bb6d96fdd92c8d2eb8c7965729b3812dbc0dfa9abb47151a61175f695870369d98cac0663ebdf2c644eda028833be313fb8e7924a353f82049c6430c22
7
+ data.tar.gz: 43e074b6acde5a0367fc11f74c0a3dab0c7e1aecfc781c1e927c8e55bb6e367701ec0dcec2aa90d63c988eca16af90577a63e5f8191a5c7c055e9d0fb9e5bbea
@@ -11,6 +11,9 @@ module Datahen
11
11
  puts "#{client.profile()}"
12
12
  end
13
13
 
14
+ desc "deploy_key SUBCOMMAND ...ARGS", "manage deploy key"
15
+ subcommand "deploy_key", AccountDeployKey
16
+
14
17
  end
15
18
  end
16
19
 
@@ -0,0 +1,26 @@
1
+ module Datahen
2
+ class CLI < Thor
3
+ class AccountDeployKey < Thor
4
+ package_name "account deploy_key"
5
+ def self.banner(command, namespace = nil, subcommand = false)
6
+ "#{basename} #{@package_name} #{command.usage}"
7
+ end
8
+
9
+ desc "show", "Show public deploy key"
10
+ def show()
11
+ client = Client::DeployKey.new()
12
+ puts "#{client.find()}"
13
+ end
14
+
15
+ desc "recreate", "Recreate public deploy key"
16
+ long_desc <<-LONGDESC
17
+ Recreate public deploy key.
18
+ LONGDESC
19
+ def recreate()
20
+ client = Client::DeployKey.new()
21
+ puts "#{client.create()}"
22
+ end
23
+ end
24
+ end
25
+
26
+ end
@@ -8,6 +8,7 @@ module Datahen
8
8
  LONGDESC
9
9
  option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
10
10
  option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
11
+ option :status, :aliases => :s, type: :string, desc: 'Scraper status. Status can be: done, cancelled, paused, finishing.'
11
12
  def list
12
13
  client = Client::Scraper.new(options)
13
14
  puts "#{client.all}"
@@ -23,8 +24,9 @@ module Datahen
23
24
  option :freshness_type, :aliases => :t, desc: 'Set how fresh the page cache is. Possible values: day, week, month, year. Default: any'
24
25
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
25
26
  option :force_fetch, :aliases => :f, type: :boolean, desc: 'Set true to force fetch page that is not within freshness criteria. Default: false'
26
- option :workers, :aliases => :w, type: :numeric, desc: 'Set how many standard workers to use. Default: 1'
27
- option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
27
+ option :parsers, :aliases => :pw, type: :numeric, desc: 'Set how many parser workers to use. Default: 1'
28
+ option :fetchers, :aliases => :fw, type: :numeric, desc: 'Set how many fetcher workers to use. Default: 1'
29
+ option :browsers, :aliases => :bw, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
28
30
  option :disable_scheduler, type: :boolean, desc: 'Set true to disable scheduler. Default: false'
29
31
  option :cancel_current_job, type: :boolean, desc: 'Set true to cancel currently active job if scheduler starts. Default: false'
30
32
  option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
@@ -51,8 +53,9 @@ module Datahen
51
53
  option :freshness_type, :aliases => :t, desc: 'Set how fresh the page cache is. Possible values: day, week, month, year. Default: any'
52
54
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
53
55
  option :force_fetch, :aliases => :f, type: :boolean, desc: 'Set true to force fetch page that is not within freshness criteria. Default: false'
54
- option :workers, :aliases => :w, type: :numeric, desc: 'Set how many standard workers to use. Default: 1'
55
- option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
56
+ option :parsers, :aliases => :pw, type: :numeric, desc: 'Set how many parser workers to use. Default: 1'
57
+ option :fetchers, :aliases => :fw, type: :numeric, desc: 'Set how many fetcher workers to use. Default: 1'
58
+ option :browsers, :aliases => :bw, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
56
59
  option :disable_scheduler, type: :boolean, desc: 'Set true to disable scheduler. Default: false'
57
60
  option :cancel_current_job, type: :boolean, desc: 'Set true to cancel currently active job if scheduler starts. Default: false'
58
61
  option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
@@ -96,8 +99,9 @@ module Datahen
96
99
  long_desc <<-LONGDESC
97
100
  Starts a scraper by creating an active scrape job\x5
98
101
  LONGDESC
99
- option :workers, :aliases => :w, type: :numeric, desc: 'Set how many standard workers to use. Default: 1'
100
- option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
102
+ option :parsers, :aliases => :pw, type: :numeric, desc: 'Set how many parser workers to use. Default: 1'
103
+ option :fetchers, :aliases => :fw, type: :numeric, desc: 'Set how many fetcher workers to use. Default: 1'
104
+ option :browsers, :aliases => :bw, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
101
105
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
102
106
  option :vars, type: :string, banner: :JSON, desc: 'Set input vars. Must be in json format. i.e: [{"name":"foo", "value":"bar", "secret":false}] '
103
107
  option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
@@ -99,8 +99,9 @@ module Datahen
99
99
  long_desc <<-LONGDESC
100
100
  Updates a scraper's current job.
101
101
  LONGDESC
102
- option :workers, :aliases => :w, type: :numeric, desc: 'Set how many standard workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 1. '
103
- option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 0. '
102
+ option :parsers, :aliases => :pw, type: :numeric, desc: 'Set how many parser workers to use. Scraper job must be restarted (paused then resumed) for it to take effect. Default: 1. '
103
+ option :fetchers, :aliases => :fw, type: :numeric, desc: 'Set how many fetcher workers to use. Scraper job must be restarted (paused then resumed) for it to take effect. Default: 1. '
104
+ option :browsers, :aliases => :bw, type: :numeric, desc: 'Set how many browser workers to use. Scraper job must be restarted (paused then resumed) for it to take effect. Default: 0. '
104
105
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
105
106
  option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
106
107
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
data/lib/datahen/cli.rb CHANGED
@@ -16,10 +16,9 @@ require 'datahen/cli/parser'
16
16
  require 'datahen/cli/seeder'
17
17
  require 'datahen/cli/finisher'
18
18
  require 'datahen/cli/env_var'
19
+ require 'datahen/cli/account_deploy_key'
19
20
  require 'datahen/cli/account'
20
21
 
21
-
22
-
23
22
  module Datahen
24
23
  class CLI < Thor
25
24
  desc "scraper SUBCOMMAND ...ARGS", "manage scrapers"
@@ -17,7 +17,8 @@ module Datahen
17
17
  def update(job_id, opts={})
18
18
  body = {}
19
19
  body[:status] = opts[:status] if opts[:status]
20
- body[:standard_worker_count] = opts[:workers] if opts[:workers]
20
+ body[:parser_worker_count] = opts[:parsers] if opts[:parsers]
21
+ body[:fetcher_worker_count] = opts[:fetchers] if opts[:fetchers]
21
22
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
22
23
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
23
24
  body[:profile] = opts[:profile] if opts[:profile]
@@ -18,7 +18,8 @@ module Datahen
18
18
  body[:git_branch] = opts[:branch] || opts[:git_branch] || "master" if opts[:branch] || opts[:git_branch]
19
19
  body[:freshness_type] = opts[:freshness_type] if opts[:freshness_type]
20
20
  body[:force_fetch] = opts[:force_fetch] if opts[:force_fetch]
21
- body[:standard_worker_count] = opts[:workers] || opts[:standard_worker_count] if opts[:workers] || opts[:standard_worker_count]
21
+ body[:parser_worker_count] = opts[:parsers] || opts[:parser_worker_count] if opts[:parsers] || opts[:parser_worker_count]
22
+ body[:fetcher_worker_count] = opts[:fetchers] || opts[:fetcher_worker_count] if opts[:fetchers] || opts[:fetcher_worker_count]
22
23
  body[:browser_worker_count] = opts[:browsers] || opts[:browser_worker_count] if opts[:browsers] || opts[:browser_worker_count]
23
24
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
24
25
  body[:disable_scheduler] = opts[:disable_scheduler] if opts[:disable_scheduler]
@@ -42,7 +43,8 @@ module Datahen
42
43
  body[:git_branch] = opts[:branch] || opts[:git_branch] if opts[:branch] || opts[:git_branch]
43
44
  body[:freshness_type] = opts[:freshness_type] if opts[:freshness_type]
44
45
  body[:force_fetch] = opts[:force_fetch] if opts.has_key?("force_fetch") || opts.has_key?(:force_fetch)
45
- body[:standard_worker_count] = opts[:workers] || opts[:standard_worker_count] if opts[:workers] || opts[:standard_worker_count]
46
+ body[:parser_worker_count] = opts[:parsers] || opts[:parser_worker_count] if opts[:parsers] || opts[:parser_worker_count]
47
+ body[:fetcher_worker_count] = opts[:fetchers] || opts[:fetcher_worker_count] if opts[:fetchers] || opts[:fetcher_worker_count]
46
48
  body[:browser_worker_count] = opts[:browsers] || opts[:browser_worker_count] if opts[:browsers] || opts[:browser_worker_count]
47
49
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
48
50
  body[:disable_scheduler] = opts[:disable_scheduler] if opts.has_key?("disable_scheduler") || opts.has_key?(:disable_scheduler)
@@ -8,7 +8,8 @@ module Datahen
8
8
 
9
9
  def create(scraper_name, opts={})
10
10
  body = {}
11
- body[:standard_worker_count] = opts[:workers] if opts[:workers]
11
+ body[:parser_worker_count] = opts[:parsers] if opts[:parsers]
12
+ body[:fetcher_worker_count] = opts[:fetchers] if opts[:fetchers]
12
13
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
13
14
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
14
15
  body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
@@ -36,7 +37,8 @@ module Datahen
36
37
  def update(scraper_name, opts={})
37
38
  body = {}
38
39
  body[:status] = opts[:status] if opts[:status]
39
- body[:standard_worker_count] = opts[:workers] if opts[:workers]
40
+ body[:parser_worker_count] = opts[:parsers] if opts[:parsers]
41
+ body[:fetcher_worker_count] = opts[:fetchers] if opts[:fetchers]
40
42
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
41
43
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
42
44
  body[:profile] = opts[:profile] if opts[:profile]
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.18.0"
2
+ VERSION = "1.0.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.18.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-09-09 00:00:00.000000000 Z
11
+ date: 2022-06-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -212,6 +212,7 @@ files:
212
212
  - lib/datahen.rb
213
213
  - lib/datahen/cli.rb
214
214
  - lib/datahen/cli/account.rb
215
+ - lib/datahen/cli/account_deploy_key.rb
215
216
  - lib/datahen/cli/env_var.rb
216
217
  - lib/datahen/cli/finisher.rb
217
218
  - lib/datahen/cli/global_page.rb