datahen 0.18.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/datahen/cli/account.rb +3 -0
- data/lib/datahen/cli/account_deploy_key.rb +26 -0
- data/lib/datahen/cli/scraper.rb +10 -6
- data/lib/datahen/cli/scraper_job.rb +3 -2
- data/lib/datahen/cli.rb +1 -2
- data/lib/datahen/client/job.rb +2 -1
- data/lib/datahen/client/scraper.rb +4 -2
- data/lib/datahen/client/scraper_job.rb +4 -2
- data/lib/datahen/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea12b1c12b5a5db4a650b35869de91b9b2ccc8c0c5b4e35da904fc77bfee5ebc
|
4
|
+
data.tar.gz: bd96345cc669816cc281d76065cf64d150268aa8f14659e6395796d2aebd52ec
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 763c11bb6d96fdd92c8d2eb8c7965729b3812dbc0dfa9abb47151a61175f695870369d98cac0663ebdf2c644eda028833be313fb8e7924a353f82049c6430c22
|
7
|
+
data.tar.gz: 43e074b6acde5a0367fc11f74c0a3dab0c7e1aecfc781c1e927c8e55bb6e367701ec0dcec2aa90d63c988eca16af90577a63e5f8191a5c7c055e9d0fb9e5bbea
|
data/lib/datahen/cli/account.rb
CHANGED
@@ -0,0 +1,26 @@
|
|
1
|
+
module Datahen
|
2
|
+
class CLI < Thor
|
3
|
+
class AccountDeployKey < Thor
|
4
|
+
package_name "account deploy_key"
|
5
|
+
def self.banner(command, namespace = nil, subcommand = false)
|
6
|
+
"#{basename} #{@package_name} #{command.usage}"
|
7
|
+
end
|
8
|
+
|
9
|
+
desc "show", "Show public deploy key"
|
10
|
+
def show()
|
11
|
+
client = Client::DeployKey.new()
|
12
|
+
puts "#{client.find()}"
|
13
|
+
end
|
14
|
+
|
15
|
+
desc "recreate", "Recreate public deploy key"
|
16
|
+
long_desc <<-LONGDESC
|
17
|
+
Recreate public deploy key.
|
18
|
+
LONGDESC
|
19
|
+
def recreate()
|
20
|
+
client = Client::DeployKey.new()
|
21
|
+
puts "#{client.create()}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
data/lib/datahen/cli/scraper.rb
CHANGED
@@ -8,6 +8,7 @@ module Datahen
|
|
8
8
|
LONGDESC
|
9
9
|
option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
|
10
10
|
option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
|
11
|
+
option :status, :aliases => :s, type: :string, desc: 'Scraper status. Status can be: done, cancelled, paused, finishing.'
|
11
12
|
def list
|
12
13
|
client = Client::Scraper.new(options)
|
13
14
|
puts "#{client.all}"
|
@@ -23,8 +24,9 @@ module Datahen
|
|
23
24
|
option :freshness_type, :aliases => :t, desc: 'Set how fresh the page cache is. Possible values: day, week, month, year. Default: any'
|
24
25
|
option :proxy_type, desc: 'Set the Proxy type. Default: standard'
|
25
26
|
option :force_fetch, :aliases => :f, type: :boolean, desc: 'Set true to force fetch page that is not within freshness criteria. Default: false'
|
26
|
-
option :
|
27
|
-
option :
|
27
|
+
option :parsers, :aliases => :pw, type: :numeric, desc: 'Set how many parser workers to use. Default: 1'
|
28
|
+
option :fetchers, :aliases => :fw, type: :numeric, desc: 'Set how many fetcher workers to use. Default: 1'
|
29
|
+
option :browsers, :aliases => :bw, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
|
28
30
|
option :disable_scheduler, type: :boolean, desc: 'Set true to disable scheduler. Default: false'
|
29
31
|
option :cancel_current_job, type: :boolean, desc: 'Set true to cancel currently active job if scheduler starts. Default: false'
|
30
32
|
option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
|
@@ -51,8 +53,9 @@ module Datahen
|
|
51
53
|
option :freshness_type, :aliases => :t, desc: 'Set how fresh the page cache is. Possible values: day, week, month, year. Default: any'
|
52
54
|
option :proxy_type, desc: 'Set the Proxy type. Default: standard'
|
53
55
|
option :force_fetch, :aliases => :f, type: :boolean, desc: 'Set true to force fetch page that is not within freshness criteria. Default: false'
|
54
|
-
option :
|
55
|
-
option :
|
56
|
+
option :parsers, :aliases => :pw, type: :numeric, desc: 'Set how many parser workers to use. Default: 1'
|
57
|
+
option :fetchers, :aliases => :fw, type: :numeric, desc: 'Set how many fetcher workers to use. Default: 1'
|
58
|
+
option :browsers, :aliases => :bw, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
|
56
59
|
option :disable_scheduler, type: :boolean, desc: 'Set true to disable scheduler. Default: false'
|
57
60
|
option :cancel_current_job, type: :boolean, desc: 'Set true to cancel currently active job if scheduler starts. Default: false'
|
58
61
|
option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
|
@@ -96,8 +99,9 @@ module Datahen
|
|
96
99
|
long_desc <<-LONGDESC
|
97
100
|
Starts a scraper by creating an active scrape job\x5
|
98
101
|
LONGDESC
|
99
|
-
option :
|
100
|
-
option :
|
102
|
+
option :parsers, :aliases => :pw, type: :numeric, desc: 'Set how many parser workers to use. Default: 1'
|
103
|
+
option :fetchers, :aliases => :fw, type: :numeric, desc: 'Set how many fetcher workers to use. Default: 1'
|
104
|
+
option :browsers, :aliases => :bw, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
|
101
105
|
option :proxy_type, desc: 'Set the Proxy type. Default: standard'
|
102
106
|
option :vars, type: :string, banner: :JSON, desc: 'Set input vars. Must be in json format. i.e: [{"name":"foo", "value":"bar", "secret":false}] '
|
103
107
|
option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
|
@@ -99,8 +99,9 @@ module Datahen
|
|
99
99
|
long_desc <<-LONGDESC
|
100
100
|
Updates a scraper's current job.
|
101
101
|
LONGDESC
|
102
|
-
option :
|
103
|
-
option :
|
102
|
+
option :parsers, :aliases => :pw, type: :numeric, desc: 'Set how many parser workers to use. Scraper job must be restarted (paused then resumed) for it to take effect. Default: 1. '
|
103
|
+
option :fetchers, :aliases => :fw, type: :numeric, desc: 'Set how many fetcher workers to use. Scraper job must be restarted (paused then resumed) for it to take effect. Default: 1. '
|
104
|
+
option :browsers, :aliases => :bw, type: :numeric, desc: 'Set how many browser workers to use. Scraper job must be restarted (paused then resumed) for it to take effect. Default: 0. '
|
104
105
|
option :proxy_type, desc: 'Set the Proxy type. Default: standard'
|
105
106
|
option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
|
106
107
|
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
data/lib/datahen/cli.rb
CHANGED
@@ -16,10 +16,9 @@ require 'datahen/cli/parser'
|
|
16
16
|
require 'datahen/cli/seeder'
|
17
17
|
require 'datahen/cli/finisher'
|
18
18
|
require 'datahen/cli/env_var'
|
19
|
+
require 'datahen/cli/account_deploy_key'
|
19
20
|
require 'datahen/cli/account'
|
20
21
|
|
21
|
-
|
22
|
-
|
23
22
|
module Datahen
|
24
23
|
class CLI < Thor
|
25
24
|
desc "scraper SUBCOMMAND ...ARGS", "manage scrapers"
|
data/lib/datahen/client/job.rb
CHANGED
@@ -17,7 +17,8 @@ module Datahen
|
|
17
17
|
def update(job_id, opts={})
|
18
18
|
body = {}
|
19
19
|
body[:status] = opts[:status] if opts[:status]
|
20
|
-
body[:
|
20
|
+
body[:parser_worker_count] = opts[:parsers] if opts[:parsers]
|
21
|
+
body[:fetcher_worker_count] = opts[:fetchers] if opts[:fetchers]
|
21
22
|
body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
|
22
23
|
body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
|
23
24
|
body[:profile] = opts[:profile] if opts[:profile]
|
@@ -18,7 +18,8 @@ module Datahen
|
|
18
18
|
body[:git_branch] = opts[:branch] || opts[:git_branch] || "master" if opts[:branch] || opts[:git_branch]
|
19
19
|
body[:freshness_type] = opts[:freshness_type] if opts[:freshness_type]
|
20
20
|
body[:force_fetch] = opts[:force_fetch] if opts[:force_fetch]
|
21
|
-
body[:
|
21
|
+
body[:parser_worker_count] = opts[:parsers] || opts[:parser_worker_count] if opts[:parsers] || opts[:parser_worker_count]
|
22
|
+
body[:fetcher_worker_count] = opts[:fetchers] || opts[:fetcher_worker_count] if opts[:fetchers] || opts[:fetcher_worker_count]
|
22
23
|
body[:browser_worker_count] = opts[:browsers] || opts[:browser_worker_count] if opts[:browsers] || opts[:browser_worker_count]
|
23
24
|
body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
|
24
25
|
body[:disable_scheduler] = opts[:disable_scheduler] if opts[:disable_scheduler]
|
@@ -42,7 +43,8 @@ module Datahen
|
|
42
43
|
body[:git_branch] = opts[:branch] || opts[:git_branch] if opts[:branch] || opts[:git_branch]
|
43
44
|
body[:freshness_type] = opts[:freshness_type] if opts[:freshness_type]
|
44
45
|
body[:force_fetch] = opts[:force_fetch] if opts.has_key?("force_fetch") || opts.has_key?(:force_fetch)
|
45
|
-
body[:
|
46
|
+
body[:parser_worker_count] = opts[:parsers] || opts[:parser_worker_count] if opts[:parsers] || opts[:parser_worker_count]
|
47
|
+
body[:fetcher_worker_count] = opts[:fetchers] || opts[:fetcher_worker_count] if opts[:fetchers] || opts[:fetcher_worker_count]
|
46
48
|
body[:browser_worker_count] = opts[:browsers] || opts[:browser_worker_count] if opts[:browsers] || opts[:browser_worker_count]
|
47
49
|
body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
|
48
50
|
body[:disable_scheduler] = opts[:disable_scheduler] if opts.has_key?("disable_scheduler") || opts.has_key?(:disable_scheduler)
|
@@ -8,7 +8,8 @@ module Datahen
|
|
8
8
|
|
9
9
|
def create(scraper_name, opts={})
|
10
10
|
body = {}
|
11
|
-
body[:
|
11
|
+
body[:parser_worker_count] = opts[:parsers] if opts[:parsers]
|
12
|
+
body[:fetcher_worker_count] = opts[:fetchers] if opts[:fetchers]
|
12
13
|
body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
|
13
14
|
body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
|
14
15
|
body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
|
@@ -36,7 +37,8 @@ module Datahen
|
|
36
37
|
def update(scraper_name, opts={})
|
37
38
|
body = {}
|
38
39
|
body[:status] = opts[:status] if opts[:status]
|
39
|
-
body[:
|
40
|
+
body[:parser_worker_count] = opts[:parsers] if opts[:parsers]
|
41
|
+
body[:fetcher_worker_count] = opts[:fetchers] if opts[:fetchers]
|
40
42
|
body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
|
41
43
|
body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
|
42
44
|
body[:profile] = opts[:profile] if opts[:profile]
|
data/lib/datahen/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datahen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-06-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -212,6 +212,7 @@ files:
|
|
212
212
|
- lib/datahen.rb
|
213
213
|
- lib/datahen/cli.rb
|
214
214
|
- lib/datahen/cli/account.rb
|
215
|
+
- lib/datahen/cli/account_deploy_key.rb
|
215
216
|
- lib/datahen/cli/env_var.rb
|
216
217
|
- lib/datahen/cli/finisher.rb
|
217
218
|
- lib/datahen/cli/global_page.rb
|