datahen 0.17.0 → 0.18.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/datahen/cli/scraper.rb +3 -0
- data/lib/datahen/cli/scraper_job.rb +1 -0
- data/lib/datahen/cli/scraper_page.rb +2 -0
- data/lib/datahen/client/job.rb +1 -0
- data/lib/datahen/client/job_page.rb +2 -0
- data/lib/datahen/client/scraper.rb +2 -0
- data/lib/datahen/client/scraper_job.rb +2 -0
- data/lib/datahen/client/scraper_job_page.rb +2 -0
- data/lib/datahen/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0e1fcf7422236924fd818a1527337a6089cd444b1f35510b72fe140facbed7b0
|
4
|
+
data.tar.gz: 05be57d3e058ee9969d210ded0b1d043b388390d5f2ac834ece490691683f39d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e076509fa8a0fa7fa78406916530bfe2c1b6075ac1007baab43a447911d1c7d8e90bddd8a1438c339a4fadef4e05e629e4907f1f1bae3f4c1f283dba63c25c9
|
7
|
+
data.tar.gz: e7ceb1208c87cd75fa7202f55549c6b2f2ce24980f7642827aab6f721107ca8ddb59829b93742126e465d6930c6c5574de2d045d3a968a7f3a826bf099ee3c4b
|
data/lib/datahen/cli/scraper.rb
CHANGED
@@ -34,6 +34,7 @@ module Datahen
|
|
34
34
|
option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
|
35
35
|
option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
|
36
36
|
option :enable_global_cache, type: :boolean, desc: 'Set true to enable page cache. Default: false'
|
37
|
+
option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
|
37
38
|
def create(scraper_name, git_repository)
|
38
39
|
# puts "options #{options}"
|
39
40
|
client = Client::Scraper.new(options)
|
@@ -61,6 +62,7 @@ module Datahen
|
|
61
62
|
option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
|
62
63
|
option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
|
63
64
|
option :enable_global_cache, type: :boolean, desc: 'Set true to enable page cache. Default: false'
|
65
|
+
option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
|
64
66
|
def update(scraper_name)
|
65
67
|
client = Client::Scraper.new(options)
|
66
68
|
puts "#{client.update(scraper_name, options)}"
|
@@ -99,6 +101,7 @@ module Datahen
|
|
99
101
|
option :proxy_type, desc: 'Set the Proxy type. Default: standard'
|
100
102
|
option :vars, type: :string, banner: :JSON, desc: 'Set input vars. Must be in json format. i.e: [{"name":"foo", "value":"bar", "secret":false}] '
|
101
103
|
option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
|
104
|
+
option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
|
102
105
|
def start(scraper_name)
|
103
106
|
client = Client::ScraperJob.new(options)
|
104
107
|
puts "Starting a scrape job..."
|
@@ -106,6 +106,7 @@ module Datahen
|
|
106
106
|
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
107
107
|
option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
|
108
108
|
option :enable_global_cache, type: :boolean, desc: 'Set true to enable page cache. Default: false'
|
109
|
+
option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
|
109
110
|
def update(scraper_name)
|
110
111
|
if options[:job]
|
111
112
|
client = Client::Job.new(options)
|
@@ -46,6 +46,7 @@ module Datahen
|
|
46
46
|
option :ua_type, :aliases => :u, desc: 'Set user agent type. Default: desktop'
|
47
47
|
option :no_redirect, :aliases => :n, type: :boolean, desc: 'Set true to not follow redirect. Default: false'
|
48
48
|
option :max_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
|
49
|
+
option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
|
49
50
|
def add(scraper_name, url)
|
50
51
|
begin
|
51
52
|
options[:headers] = JSON.parse(options[:headers]) if options[:headers]
|
@@ -80,6 +81,7 @@ module Datahen
|
|
80
81
|
option :priority, type: :numeric, desc: 'Set fetch priority. The higher the value, the sooner the page gets fetched. Default: 0'
|
81
82
|
option :vars, :aliases => :v, type: :string, desc: 'Set user-defined page variables. Must be in json format. i.e: {"Foo":"bar"}'
|
82
83
|
option :max_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
|
84
|
+
option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
|
83
85
|
def update(scraper_name, gid)
|
84
86
|
begin
|
85
87
|
options[:vars] = JSON.parse(options[:vars]) if options[:vars]
|
data/lib/datahen/client/job.rb
CHANGED
@@ -23,6 +23,7 @@ module Datahen
|
|
23
23
|
body[:profile] = opts[:profile] if opts[:profile]
|
24
24
|
body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
|
25
25
|
body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
|
26
|
+
body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
|
26
27
|
params = @options.merge({body: body.to_json})
|
27
28
|
|
28
29
|
self.class.put("/jobs/#{job_id}", params)
|
@@ -17,6 +17,7 @@ module Datahen
|
|
17
17
|
body[:vars] = opts[:vars] if opts[:vars]
|
18
18
|
body[:max_size] = opts[:max_size] if opts[:max_size]
|
19
19
|
body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
|
20
|
+
body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
|
20
21
|
|
21
22
|
params = @options.merge({body: body.to_json})
|
22
23
|
|
@@ -40,6 +41,7 @@ module Datahen
|
|
40
41
|
body[:cookie] = opts[:cookie] if opts[:cookie]
|
41
42
|
body[:max_size] = opts[:max_size] if opts[:max_size]
|
42
43
|
body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
|
44
|
+
body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
|
43
45
|
|
44
46
|
params = @options.merge({body: body.to_json})
|
45
47
|
|
@@ -30,6 +30,7 @@ module Datahen
|
|
30
30
|
body[:max_job_count] = opts[:max_job_count] if opts[:max_job_count]
|
31
31
|
body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
|
32
32
|
body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
|
33
|
+
body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
|
33
34
|
params = @options.merge({body: body.to_json})
|
34
35
|
self.class.post("/scrapers", params)
|
35
36
|
end
|
@@ -53,6 +54,7 @@ module Datahen
|
|
53
54
|
body[:max_job_count] = opts[:max_job_count] if opts.has_key?("max_job_count") || opts.has_key?(:max_job_count)
|
54
55
|
body[:max_page_size] = opts[:max_page_size] if opts.has_key?("max_page_size") || opts.has_key?(:max_page_size)
|
55
56
|
body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
|
57
|
+
body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
|
56
58
|
params = @options.merge({body: body.to_json})
|
57
59
|
|
58
60
|
self.class.put("/scrapers/#{scraper_name}", params)
|
@@ -13,6 +13,7 @@ module Datahen
|
|
13
13
|
body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
|
14
14
|
body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
|
15
15
|
body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
|
16
|
+
body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
|
16
17
|
if opts[:vars]
|
17
18
|
if opts[:vars].is_a?(Array)
|
18
19
|
body[:vars] = opts[:vars]
|
@@ -41,6 +42,7 @@ module Datahen
|
|
41
42
|
body[:profile] = opts[:profile] if opts[:profile]
|
42
43
|
body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
|
43
44
|
body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
|
45
|
+
body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
|
44
46
|
params = @options.merge({body: body.to_json})
|
45
47
|
|
46
48
|
self.class.put("/scrapers/#{scraper_name}/current_job", params)
|
@@ -17,6 +17,7 @@ module Datahen
|
|
17
17
|
body[:vars] = opts[:vars] if opts[:vars]
|
18
18
|
body[:max_size] = opts[:max_size] if opts[:max_size]
|
19
19
|
body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
|
20
|
+
body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
|
20
21
|
|
21
22
|
params = @options.merge({body: body.to_json})
|
22
23
|
|
@@ -63,6 +64,7 @@ module Datahen
|
|
63
64
|
body[:cookie] = opts[:cookie] if opts[:cookie]
|
64
65
|
body[:max_size] = opts[:max_size] if opts[:max_size]
|
65
66
|
body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
|
67
|
+
body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
|
66
68
|
|
67
69
|
params = @options.merge({body: body.to_json})
|
68
70
|
|
data/lib/datahen/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datahen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.18.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-09-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -276,7 +276,7 @@ metadata:
|
|
276
276
|
allowed_push_host: https://rubygems.org
|
277
277
|
homepage_uri: https://datahen.com
|
278
278
|
source_code_uri: https://github.com/DataHenOfficial/datahen-ruby
|
279
|
-
post_install_message:
|
279
|
+
post_install_message:
|
280
280
|
rdoc_options: []
|
281
281
|
require_paths:
|
282
282
|
- lib
|
@@ -292,7 +292,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
292
292
|
version: '0'
|
293
293
|
requirements: []
|
294
294
|
rubygems_version: 3.0.3
|
295
|
-
signing_key:
|
295
|
+
signing_key:
|
296
296
|
specification_version: 4
|
297
297
|
summary: DataHen toolbelt for developers
|
298
298
|
test_files: []
|