datahen 0.15.11 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f77e31da8e2a7ff08086c4aa9d174608a9c3f186679d456b22310b48384d3572
4
- data.tar.gz: 0bf53ae0886b16bf6fe08b0db07b1a631b69f31d8e3a6868a4d483549049e4ed
3
+ metadata.gz: 39397d5cb4e60a6d24cdec5bd979f543a23019b7c9b9dffe6140a204d330465c
4
+ data.tar.gz: 1db7c2b448179c2bc4b56e99428dfb4303cbb1451df032ec43cb5264f58935ec
5
5
  SHA512:
6
- metadata.gz: a491874347ed6ac97c0a0e4f0d2c5830140b9367c1e01b4c95e7a447b071df643d12793ed7d6e5a0224b8876905cf74bc13b987e6e3e03e937d1f821557b8ec3
7
- data.tar.gz: c553a372790654726f2921b6d9d582ca90b314b3fe8d78625cf2442e01cf2ce96ae556e78812d5bd6a03f350beb32aa3158981554179410c12926d480c911887
6
+ metadata.gz: 7058506211d537c8ea3c9a521625fd339b255f41188a70341cc04683ca1abc1fa7f19ed796026b5e07679bb2fd7e57f096d319fbe8a75ae6fb7fd59a704a9824
7
+ data.tar.gz: b8b60607cd27acbd654afe0816b0b7738871ca61c370fbafa747985d3723fec64f9b07c6078b34c1192db1d7b160682522a4901a7047c3d067860bc2b745b0b0
@@ -32,6 +32,7 @@ module Datahen
32
32
  option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
33
33
  option :multiple_jobs, type: :boolean, desc: 'Set true to enable multiple jobs. Default: false'
34
34
  option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
35
+ option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
35
36
  def create(scraper_name, git_repository)
36
37
  # puts "options #{options}"
37
38
  client = Client::Scraper.new(options)
@@ -57,6 +58,7 @@ module Datahen
57
58
  option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
58
59
  option :multiple_jobs, type: :boolean, desc: 'Set true to enable multiple jobs. Default: false'
59
60
  option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
61
+ option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
60
62
  def update(scraper_name)
61
63
  client = Client::Scraper.new(options)
62
64
  puts "#{client.update(scraper_name, options)}"
@@ -94,6 +96,7 @@ module Datahen
94
96
  option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Default: 0'
95
97
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
96
98
  option :vars, type: :string, banner: :JSON, desc: 'Set input vars. Must be in json format. i.e: [{"name":"foo", "value":"bar", "secret":false}] '
99
+ option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
97
100
  def start(scraper_name)
98
101
  client = Client::ScraperJob.new(options)
99
102
  puts "Starting a scrape job..."
@@ -104,6 +104,7 @@ module Datahen
104
104
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
105
105
  option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
106
106
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
107
+ option :max_page_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
107
108
  def update(scraper_name)
108
109
  if options[:job]
109
110
  client = Client::Job.new(options)
@@ -45,6 +45,7 @@ module Datahen
45
45
  option :freshness, :aliases => :s, desc: 'Set how fresh the page cache is. Accepts timestap format.'
46
46
  option :ua_type, :aliases => :u, desc: 'Set user agent type. Default: desktop'
47
47
  option :no_redirect, :aliases => :n, type: :boolean, desc: 'Set true to not follow redirect. Default: false'
48
+ option :max_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
48
49
  def add(scraper_name, url)
49
50
  begin
50
51
  options[:headers] = JSON.parse(options[:headers]) if options[:headers]
@@ -78,6 +79,7 @@ module Datahen
78
79
  option :page_type, :aliases => :t, desc: 'Set page type'
79
80
  option :priority, type: :numeric, desc: 'Set fetch priority. The higher the value, the sooner the page gets fetched. Default: 0'
80
81
  option :vars, :aliases => :v, type: :string, desc: 'Set user-defined page variables. Must be in json format. i.e: {"Foo":"bar"}'
82
+ option :max_size, type: :numeric, desc: 'Set a value to set max page size when fetching a page. Set a value grather than 0 to set it as limit, 0 means any size. Default: 0'
81
83
  def update(scraper_name, gid)
82
84
  begin
83
85
  options[:vars] = JSON.parse(options[:vars]) if options[:vars]
@@ -21,6 +21,7 @@ module Datahen
21
21
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
22
22
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
23
23
  body[:profile] = opts[:profile] if opts[:profile]
24
+ body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
24
25
  params = @options.merge({body: body.to_json})
25
26
 
26
27
  self.class.put("/jobs/#{job_id}", params)
@@ -15,6 +15,7 @@ module Datahen
15
15
  body[:page_type] = opts[:page_type] if opts[:page_type]
16
16
  body[:priority] = opts[:priority] if opts[:priority]
17
17
  body[:vars] = opts[:vars] if opts[:vars]
18
+ body[:max_size] = opts[:max_size] if opts[:max_size]
18
19
 
19
20
  params = @options.merge({body: body.to_json})
20
21
 
@@ -36,6 +37,7 @@ module Datahen
36
37
  body[:ua_type] = opts[:ua_type] if opts[:ua_type]
37
38
  body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
38
39
  body[:cookie] = opts[:cookie] if opts[:cookie]
40
+ body[:max_size] = opts[:max_size] if opts[:max_size]
39
41
 
40
42
  params = @options.merge({body: body.to_json})
41
43
 
@@ -28,6 +28,7 @@ module Datahen
28
28
  body[:profile] = opts[:profile] if opts[:profile]
29
29
  body[:multiple_jobs] = opts[:multiple_jobs] if opts[:multiple_jobs]
30
30
  body[:max_job_count] = opts[:max_job_count] if opts[:max_job_count]
31
+ body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
31
32
  params = @options.merge({body: body.to_json})
32
33
  self.class.post("/scrapers", params)
33
34
  end
@@ -49,6 +50,7 @@ module Datahen
49
50
  body[:profile] = opts[:profile] if opts[:profile]
50
51
  body[:multiple_jobs] = opts[:multiple_jobs] if opts.has_key?("multiple_jobs") || opts.has_key?(:multiple_jobs)
51
52
  body[:max_job_count] = opts[:max_job_count] if opts.has_key?("max_job_count") || opts.has_key?(:max_job_count)
53
+ body[:max_page_size] = opts[:max_page_size] if opts.has_key?("max_page_size") || opts.has_key?(:max_page_size)
52
54
  params = @options.merge({body: body.to_json})
53
55
 
54
56
  self.class.put("/scrapers/#{scraper_name}", params)
@@ -11,6 +11,7 @@ module Datahen
11
11
  body[:standard_worker_count] = opts[:workers] if opts[:workers]
12
12
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
13
13
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
14
+ body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
14
15
  if opts[:vars]
15
16
  if opts[:vars].is_a?(Array)
16
17
  body[:vars] = opts[:vars]
@@ -37,6 +38,7 @@ module Datahen
37
38
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
38
39
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
39
40
  body[:profile] = opts[:profile] if opts[:profile]
41
+ body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
40
42
  params = @options.merge({body: body.to_json})
41
43
 
42
44
  self.class.put("/scrapers/#{scraper_name}/current_job", params)
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.15.11"
2
+ VERSION = "0.16.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.11
4
+ version: 0.16.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-29 00:00:00.000000000 Z
11
+ date: 2021-07-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -276,7 +276,7 @@ metadata:
276
276
  allowed_push_host: https://rubygems.org
277
277
  homepage_uri: https://datahen.com
278
278
  source_code_uri: https://github.com/DataHenOfficial/datahen-ruby
279
- post_install_message:
279
+ post_install_message:
280
280
  rdoc_options: []
281
281
  require_paths:
282
282
  - lib
@@ -292,7 +292,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
292
292
  version: '0'
293
293
  requirements: []
294
294
  rubygems_version: 3.0.3
295
- signing_key:
295
+ signing_key:
296
296
  specification_version: 4
297
297
  summary: DataHen toolbelt for developers
298
298
  test_files: []