datahen 0.15.11 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f77e31da8e2a7ff08086c4aa9d174608a9c3f186679d456b22310b48384d3572
4
- data.tar.gz: 0bf53ae0886b16bf6fe08b0db07b1a631b69f31d8e3a6868a4d483549049e4ed
3
+ metadata.gz: 39397d5cb4e60a6d24cdec5bd979f543a23019b7c9b9dffe6140a204d330465c
4
+ data.tar.gz: 1db7c2b448179c2bc4b56e99428dfb4303cbb1451df032ec43cb5264f58935ec
5
5
  SHA512:
6
- metadata.gz: a491874347ed6ac97c0a0e4f0d2c5830140b9367c1e01b4c95e7a447b071df643d12793ed7d6e5a0224b8876905cf74bc13b987e6e3e03e937d1f821557b8ec3
7
- data.tar.gz: c553a372790654726f2921b6d9d582ca90b314b3fe8d78625cf2442e01cf2ce96ae556e78812d5bd6a03f350beb32aa3158981554179410c12926d480c911887
6
+ metadata.gz: 7058506211d537c8ea3c9a521625fd339b255f41188a70341cc04683ca1abc1fa7f19ed796026b5e07679bb2fd7e57f096d319fbe8a75ae6fb7fd59a704a9824
7
+ data.tar.gz: b8b60607cd27acbd654afe0816b0b7738871ca61c370fbafa747985d3723fec64f9b07c6078b34c1192db1d7b160682522a4901a7047c3d067860bc2b745b0b0
@@ -32,6 +32,7 @@ module Datahen
32
32
  option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
33
33
  option :multiple_jobs, type: :boolean, desc: 'Set true to enable multiple jobs. Default: false'
34
34
  option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
35
+ option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
35
36
  def create(scraper_name, git_repository)
36
37
  # puts "options #{options}"
37
38
  client = Client::Scraper.new(options)
@@ -57,6 +58,7 @@ module Datahen
57
58
  option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
58
59
  option :multiple_jobs, type: :boolean, desc: 'Set true to enable multiple jobs. Default: false'
59
60
  option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
61
+ option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
60
62
  def update(scraper_name)
61
63
  client = Client::Scraper.new(options)
62
64
  puts "#{client.update(scraper_name, options)}"
@@ -94,6 +96,7 @@ module Datahen
94
96
  option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
95
97
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
96
98
  option :vars, type: :string, banner: :JSON, desc: 'Set input vars. Must be in json format. i.e: [{"name":"foo", "value":"bar", "secret":false}] '
99
+ option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
97
100
  def start(scraper_name)
98
101
  client = Client::ScraperJob.new(options)
99
102
  puts "Starting a scrape job..."
@@ -104,6 +104,7 @@ module Datahen
104
104
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
105
105
  option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
106
106
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
107
+ option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
107
108
  def update(scraper_name)
108
109
  if options[:job]
109
110
  client = Client::Job.new(options)
@@ -45,6 +45,7 @@ module Datahen
45
45
  option :freshness, :aliases => :s, desc: 'Set how fresh the page cache is. Accepts timestap format.'
46
46
  option :ua_type, :aliases => :u, desc: 'Set user agent type. Default: desktop'
47
47
  option :no_redirect, :aliases => :n, type: :boolean, desc: 'Set true to not follow redirect. Default: false'
48
+ option :max_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
48
49
  def add(scraper_name, url)
49
50
  begin
50
51
  options[:headers] = JSON.parse(options[:headers]) if options[:headers]
@@ -78,6 +79,7 @@ module Datahen
78
79
  option :page_type, :aliases => :t, desc: 'Set page type'
79
80
  option :priority, type: :numeric, desc: 'Set fetch priority. The higher the value, the sooner the page gets fetched. Default: 0'
80
81
  option :vars, :aliases => :v, type: :string, desc: 'Set user-defined page variables. Must be in json format. i.e: {"Foo":"bar"}'
82
+ option :max_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
81
83
  def update(scraper_name, gid)
82
84
  begin
83
85
  options[:vars] = JSON.parse(options[:vars]) if options[:vars]
@@ -21,6 +21,7 @@ module Datahen
21
21
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
22
22
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
23
23
  body[:profile] = opts[:profile] if opts[:profile]
24
+ body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
24
25
  params = @options.merge({body: body.to_json})
25
26
 
26
27
  self.class.put("/jobs/#{job_id}", params)
@@ -15,6 +15,7 @@ module Datahen
15
15
  body[:page_type] = opts[:page_type] if opts[:page_type]
16
16
  body[:priority] = opts[:priority] if opts[:priority]
17
17
  body[:vars] = opts[:vars] if opts[:vars]
18
+ body[:max_size] = opts[:max_size] if opts[:max_size]
18
19
 
19
20
  params = @options.merge({body: body.to_json})
20
21
 
@@ -36,6 +37,7 @@ module Datahen
36
37
  body[:ua_type] = opts[:ua_type] if opts[:ua_type]
37
38
  body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
38
39
  body[:cookie] = opts[:cookie] if opts[:cookie]
40
+ body[:max_size] = opts[:max_size] if opts[:max_size]
39
41
 
40
42
  params = @options.merge({body: body.to_json})
41
43
 
@@ -28,6 +28,7 @@ module Datahen
28
28
  body[:profile] = opts[:profile] if opts[:profile]
29
29
  body[:multiple_jobs] = opts[:multiple_jobs] if opts[:multiple_jobs]
30
30
  body[:max_job_count] = opts[:max_job_count] if opts[:max_job_count]
31
+ body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
31
32
  params = @options.merge({body: body.to_json})
32
33
  self.class.post("/scrapers", params)
33
34
  end
@@ -49,6 +50,7 @@ module Datahen
49
50
  body[:profile] = opts[:profile] if opts[:profile]
50
51
  body[:multiple_jobs] = opts[:multiple_jobs] if opts.has_key?("multiple_jobs") || opts.has_key?(:multiple_jobs)
51
52
  body[:max_job_count] = opts[:max_job_count] if opts.has_key?("max_job_count") || opts.has_key?(:max_job_count)
53
+ body[:max_page_size] = opts[:max_page_size] if opts.has_key?("max_page_size") || opts.has_key?(:max_page_size)
52
54
  params = @options.merge({body: body.to_json})
53
55
 
54
56
  self.class.put("/scrapers/#{scraper_name}", params)
@@ -11,6 +11,7 @@ module Datahen
11
11
  body[:standard_worker_count] = opts[:workers] if opts[:workers]
12
12
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
13
13
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
14
+ body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
14
15
  if opts[:vars]
15
16
  if opts[:vars].is_a?(Array)
16
17
  body[:vars] = opts[:vars]
@@ -37,6 +38,7 @@ module Datahen
37
38
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
38
39
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
39
40
  body[:profile] = opts[:profile] if opts[:profile]
41
+ body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
40
42
  params = @options.merge({body: body.to_json})
41
43
 
42
44
  self.class.put("/scrapers/#{scraper_name}/current_job", params)
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.15.11"
2
+ VERSION = "0.16.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.11
4
+ version: 0.16.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-29 00:00:00.000000000 Z
11
+ date: 2021-07-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -276,7 +276,7 @@ metadata:
276
276
  allowed_push_host: https://rubygems.org
277
277
  homepage_uri: https://datahen.com
278
278
  source_code_uri: https://github.com/DataHenOfficial/datahen-ruby
279
- post_install_message:
279
+ post_install_message:
280
280
  rdoc_options: []
281
281
  require_paths:
282
282
  - lib
@@ -292,7 +292,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
292
292
  version: '0'
293
293
  requirements: []
294
294
  rubygems_version: 3.0.3
295
- signing_key:
295
+ signing_key:
296
296
  specification_version: 4
297
297
  summary: DataHen toolbelt for developers
298
298
  test_files: []