datahen 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 82f77763f1977307312821247750463f03c0a7dcb52d78d1ae8ca344265b6e56
4
- data.tar.gz: 5f04de4cc0df0b7edcddc2514ef90bb1627e141d8a7b3fcc3ccbc2272b5a629a
3
+ metadata.gz: 0e1fcf7422236924fd818a1527337a6089cd444b1f35510b72fe140facbed7b0
4
+ data.tar.gz: 05be57d3e058ee9969d210ded0b1d043b388390d5f2ac834ece490691683f39d
5
5
  SHA512:
6
- metadata.gz: cabad1dc36b89878f2a361ab4d4f0c62af932ab3d69b0ca6f929c24809be74d3e82b00fa577215f9019cd243fac286f684cacdcedf8f4cb221400b7c7014c5e3
7
- data.tar.gz: b86c0f71a4c555b2e8dce19509d93b888c8dacd72ea0e57db623b3ad1ddb9f3a0ac5bafaac4fdcaf28673c3a77e2c2da4a4aaf0d00fa04465dda641ada86eb52
6
+ metadata.gz: 4e076509fa8a0fa7fa78406916530bfe2c1b6075ac1007baab43a447911d1c7d8e90bddd8a1438c339a4fadef4e05e629e4907f1f1bae3f4c1f283dba63c25c9
7
+ data.tar.gz: e7ceb1208c87cd75fa7202f55549c6b2f2ce24980f7642827aab6f721107ca8ddb59829b93742126e465d6930c6c5574de2d045d3a968a7f3a826bf099ee3c4b
@@ -34,6 +34,7 @@ module Datahen
34
34
  option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
35
35
  option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
36
36
  option :enable_global_cache, type: :boolean, desc: 'Set true to enable page cache. Default: false'
37
+ option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
37
38
  def create(scraper_name, git_repository)
38
39
  # puts "options #{options}"
39
40
  client = Client::Scraper.new(options)
@@ -61,6 +62,7 @@ module Datahen
61
62
  option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
62
63
  option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
63
64
  option :enable_global_cache, type: :boolean, desc: 'Set true to enable page cache. Default: false'
65
+ option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
64
66
  def update(scraper_name)
65
67
  client = Client::Scraper.new(options)
66
68
  puts "#{client.update(scraper_name, options)}"
@@ -99,6 +101,7 @@ module Datahen
99
101
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
100
102
  option :vars, type: :string, banner: :JSON, desc: 'Set input vars. Must be in json format. i.e: [{"name":"foo", "value":"bar", "secret":false}] '
101
103
  option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
104
+ option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
102
105
  def start(scraper_name)
103
106
  client = Client::ScraperJob.new(options)
104
107
  puts "Starting a scrape job..."
@@ -106,6 +106,7 @@ module Datahen
106
106
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
107
107
  option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
108
108
  option :enable_global_cache, type: :boolean, desc: 'Set true to enable page cache. Default: false'
109
+ option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
109
110
  def update(scraper_name)
110
111
  if options[:job]
111
112
  client = Client::Job.new(options)
@@ -46,6 +46,7 @@ module Datahen
46
46
  option :ua_type, :aliases => :u, desc: 'Set user agent type. Default: desktop'
47
47
  option :no_redirect, :aliases => :n, type: :boolean, desc: 'Set true to not follow redirect. Default: false'
48
48
  option :max_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
49
+ option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
49
50
  def add(scraper_name, url)
50
51
  begin
51
52
  options[:headers] = JSON.parse(options[:headers]) if options[:headers]
@@ -80,6 +81,7 @@ module Datahen
80
81
  option :priority, type: :numeric, desc: 'Set fetch priority. The higher the value, the sooner the page gets fetched. Default: 0'
81
82
  option :vars, :aliases => :v, type: :string, desc: 'Set user-defined page variables. Must be in json format. i.e: {"Foo":"bar"}'
82
83
  option :max_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
84
+ option :retry_interval, type: :numeric, desc: 'Set a value to set retry time interval on seconds when refetching a page. Set a value grather than 0 to set it as new time to refetch, 0 means default time. Default: 0'
83
85
  def update(scraper_name, gid)
84
86
  begin
85
87
  options[:vars] = JSON.parse(options[:vars]) if options[:vars]
@@ -23,6 +23,7 @@ module Datahen
23
23
  body[:profile] = opts[:profile] if opts[:profile]
24
24
  body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
25
25
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
26
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
26
27
  params = @options.merge({body: body.to_json})
27
28
 
28
29
  self.class.put("/jobs/#{job_id}", params)
@@ -17,6 +17,7 @@ module Datahen
17
17
  body[:vars] = opts[:vars] if opts[:vars]
18
18
  body[:max_size] = opts[:max_size] if opts[:max_size]
19
19
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
20
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
20
21
 
21
22
  params = @options.merge({body: body.to_json})
22
23
 
@@ -40,6 +41,7 @@ module Datahen
40
41
  body[:cookie] = opts[:cookie] if opts[:cookie]
41
42
  body[:max_size] = opts[:max_size] if opts[:max_size]
42
43
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
44
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
43
45
 
44
46
  params = @options.merge({body: body.to_json})
45
47
 
@@ -30,6 +30,7 @@ module Datahen
30
30
  body[:max_job_count] = opts[:max_job_count] if opts[:max_job_count]
31
31
  body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
32
32
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
33
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
33
34
  params = @options.merge({body: body.to_json})
34
35
  self.class.post("/scrapers", params)
35
36
  end
@@ -53,6 +54,7 @@ module Datahen
53
54
  body[:max_job_count] = opts[:max_job_count] if opts.has_key?("max_job_count") || opts.has_key?(:max_job_count)
54
55
  body[:max_page_size] = opts[:max_page_size] if opts.has_key?("max_page_size") || opts.has_key?(:max_page_size)
55
56
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
57
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
56
58
  params = @options.merge({body: body.to_json})
57
59
 
58
60
  self.class.put("/scrapers/#{scraper_name}", params)
@@ -13,6 +13,7 @@ module Datahen
13
13
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
14
14
  body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
15
15
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
16
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
16
17
  if opts[:vars]
17
18
  if opts[:vars].is_a?(Array)
18
19
  body[:vars] = opts[:vars]
@@ -41,6 +42,7 @@ module Datahen
41
42
  body[:profile] = opts[:profile] if opts[:profile]
42
43
  body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
43
44
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
45
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
44
46
  params = @options.merge({body: body.to_json})
45
47
 
46
48
  self.class.put("/scrapers/#{scraper_name}/current_job", params)
@@ -17,6 +17,7 @@ module Datahen
17
17
  body[:vars] = opts[:vars] if opts[:vars]
18
18
  body[:max_size] = opts[:max_size] if opts[:max_size]
19
19
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
20
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
20
21
 
21
22
  params = @options.merge({body: body.to_json})
22
23
 
@@ -63,6 +64,7 @@ module Datahen
63
64
  body[:cookie] = opts[:cookie] if opts[:cookie]
64
65
  body[:max_size] = opts[:max_size] if opts[:max_size]
65
66
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
67
+ body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
66
68
 
67
69
  params = @options.merge({body: body.to_json})
68
70
 
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.17.0"
2
+ VERSION = "0.18.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.0
4
+ version: 0.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-08-20 00:00:00.000000000 Z
11
+ date: 2021-09-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -276,7 +276,7 @@ metadata:
276
276
  allowed_push_host: https://rubygems.org
277
277
  homepage_uri: https://datahen.com
278
278
  source_code_uri: https://github.com/DataHenOfficial/datahen-ruby
279
- post_install_message:
279
+ post_install_message:
280
280
  rdoc_options: []
281
281
  require_paths:
282
282
  - lib
@@ -292,7 +292,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
292
292
  version: '0'
293
293
  requirements: []
294
294
  rubygems_version: 3.0.3
295
- signing_key:
295
+ signing_key:
296
296
  specification_version: 4
297
297
  summary: DataHen toolbelt for developers
298
298
  test_files: []