datahen 0.17.0 → 0.18.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 82f77763f1977307312821247750463f03c0a7dcb52d78d1ae8ca344265b6e56
4
- data.tar.gz: 5f04de4cc0df0b7edcddc2514ef90bb1627e141d8a7b3fcc3ccbc2272b5a629a
3
+ metadata.gz: 0e1fcf7422236924fd818a1527337a6089cd444b1f35510b72fe140facbed7b0
4
+ data.tar.gz: 05be57d3e058ee9969d210ded0b1d043b388390d5f2ac834ece490691683f39d
5
5
  SHA512:
6
- metadata.gz: cabad1dc36b89878f2a361ab4d4f0c62af932ab3d69b0ca6f929c24809be74d3e82b00fa577215f9019cd243fac286f684cacdcedf8f4cb221400b7c7014c5e3
7
- data.tar.gz: b86c0f71a4c555b2e8dce19509d93b888c8dacd72ea0e57db623b3ad1ddb9f3a0ac5bafaac4fdcaf28673c3a77e2c2da4a4aaf0d00fa04465dda641ada86eb52
6
+ metadata.gz: 4e076509fa8a0fa7fa78406916530bfe2c1b6075ac1007baab43a447911d1c7d8e90bddd8a1438c339a4fadef4e05e629e4907f1f1bae3f4c1f283dba63c25c9
7
+ data.tar.gz: e7ceb1208c87cd75fa7202f55549c6b2f2ce24980f7642827aab6f721107ca8ddb59829b93742126e465d6930c6c5574de2d045d3a968a7f3a826bf099ee3c4b
@@ -34,6 +34,7 @@ module Datahen
34
34
  option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
35
35
  option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
36
36
  option :enable_global_cache, type: :boolean, desc: 'Set true to enable page cache. Default: false'
37
+ option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
37
38
  def create(scraper_name, git_repository)
38
39
  # puts "options #{options}"
39
40
  client = Client::Scraper.new(options)
@@ -61,6 +62,7 @@ module Datahen
61
62
  option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
62
63
  option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
63
64
  option :enable_global_cache, type: :boolean, desc: 'Set true to enable page cache. Default: false'
65
+ option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
64
66
  def update(scraper_name)
65
67
  client = Client::Scraper.new(options)
66
68
  puts "#{client.update(scraper_name, options)}"
@@ -99,6 +101,7 @@ module Datahen
99
101
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
100
102
  option :vars, type: :string, banner: :JSON, desc: 'Set input vars. Must be in json format. i.e: [{"name":"foo", "value":"bar", "secret":false}] '
101
103
  option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
104
+ option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
102
105
  def start(scraper_name)
103
106
  client = Client::ScraperJob.new(options)
104
107
  puts "Starting a scrape job..."
@@ -106,6 +106,7 @@ module Datahen
106
106
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
107
107
  option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
108
108
  option :enable_global_cache, type: :boolean, desc: 'Set true to enable page cache. Default: false'
109
+ option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
109
110
  def update(scraper_name)
110
111
  if options[:job]
111
112
  client = Client::Job.new(options)
@@ -46,6 +46,7 @@ module Datahen
46
46
  option :ua_type, :aliases => :u, desc: 'Set user agent type. Default: desktop'
47
47
  option :no_redirect, :aliases => :n, type: :boolean, desc: 'Set true to not follow redirect. Default: false'
48
48
  option :max_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
49
+ option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
49
50
  def add(scraper_name, url)
50
51
  begin
51
52
  options[:headers] = JSON.parse(options[:headers]) if options[:headers]
@@ -80,6 +81,7 @@ module Datahen
80
81
  option :priority, type: :numeric, desc: 'Set fetch priority. The higher the value, the sooner the page gets fetched. Default: 0'
81
82
  option :vars, :aliases => :v, type: :string, desc: 'Set user-defined page variables. Must be in json format. i.e: {"Foo":"bar"}'
82
83
  option :max_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
84
+ option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
83
85
  def update(scraper_name, gid)
84
86
  begin
85
87
  options[:vars] = JSON.parse(options[:vars]) if options[:vars]
@@ -23,6 +23,7 @@ module Datahen
23
23
  body[:profile] = opts[:profile] if opts[:profile]
24
24
  body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
25
25
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
26
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
26
27
  params = @options.merge({body: body.to_json})
27
28
 
28
29
  self.class.put("/jobs/#{job_id}", params)
@@ -17,6 +17,7 @@ module Datahen
17
17
  body[:vars] = opts[:vars] if opts[:vars]
18
18
  body[:max_size] = opts[:max_size] if opts[:max_size]
19
19
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
20
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
20
21
 
21
22
  params = @options.merge({body: body.to_json})
22
23
 
@@ -40,6 +41,7 @@ module Datahen
40
41
  body[:cookie] = opts[:cookie] if opts[:cookie]
41
42
  body[:max_size] = opts[:max_size] if opts[:max_size]
42
43
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
44
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
43
45
 
44
46
  params = @options.merge({body: body.to_json})
45
47
 
@@ -30,6 +30,7 @@ module Datahen
30
30
  body[:max_job_count] = opts[:max_job_count] if opts[:max_job_count]
31
31
  body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
32
32
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
33
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
33
34
  params = @options.merge({body: body.to_json})
34
35
  self.class.post("/scrapers", params)
35
36
  end
@@ -53,6 +54,7 @@ module Datahen
53
54
  body[:max_job_count] = opts[:max_job_count] if opts.has_key?("max_job_count") || opts.has_key?(:max_job_count)
54
55
  body[:max_page_size] = opts[:max_page_size] if opts.has_key?("max_page_size") || opts.has_key?(:max_page_size)
55
56
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
57
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
56
58
  params = @options.merge({body: body.to_json})
57
59
 
58
60
  self.class.put("/scrapers/#{scraper_name}", params)
@@ -13,6 +13,7 @@ module Datahen
13
13
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
14
14
  body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
15
15
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
16
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
16
17
  if opts[:vars]
17
18
  if opts[:vars].is_a?(Array)
18
19
  body[:vars] = opts[:vars]
@@ -41,6 +42,7 @@ module Datahen
41
42
  body[:profile] = opts[:profile] if opts[:profile]
42
43
  body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
43
44
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
45
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
44
46
  params = @options.merge({body: body.to_json})
45
47
 
46
48
  self.class.put("/scrapers/#{scraper_name}/current_job", params)
@@ -17,6 +17,7 @@ module Datahen
17
17
  body[:vars] = opts[:vars] if opts[:vars]
18
18
  body[:max_size] = opts[:max_size] if opts[:max_size]
19
19
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
20
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
20
21
 
21
22
  params = @options.merge({body: body.to_json})
22
23
 
@@ -63,6 +64,7 @@ module Datahen
63
64
  body[:cookie] = opts[:cookie] if opts[:cookie]
64
65
  body[:max_size] = opts[:max_size] if opts[:max_size]
65
66
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
67
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
66
68
 
67
69
  params = @options.merge({body: body.to_json})
68
70
 
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.17.0"
2
+ VERSION = "0.18.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.0
4
+ version: 0.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-08-20 00:00:00.000000000 Z
11
+ date: 2021-09-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -276,7 +276,7 @@ metadata:
276
276
  allowed_push_host: https://rubygems.org
277
277
  homepage_uri: https://datahen.com
278
278
  source_code_uri: https://github.com/DataHenOfficial/datahen-ruby
279
- post_install_message:
279
+ post_install_message:
280
280
  rdoc_options: []
281
281
  require_paths:
282
282
  - lib
@@ -292,7 +292,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
292
292
  version: '0'
293
293
  requirements: []
294
294
  rubygems_version: 3.0.3
295
- signing_key:
295
+ signing_key:
296
296
  specification_version: 4
297
297
  summary: DataHen toolbelt for developers
298
298
  test_files: []