datahen 1.6.3 → 1.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/datahen/cli/scraper.rb +3 -0
- data/lib/datahen/cli/scraper_job.rb +1 -0
- data/lib/datahen/cli/scraper_page.rb +5 -1
- data/lib/datahen/client/job.rb +1 -0
- data/lib/datahen/client/job_page.rb +2 -0
- data/lib/datahen/client/scraper.rb +2 -0
- data/lib/datahen/client/scraper_job.rb +3 -0
- data/lib/datahen/client/scraper_job_page.rb +2 -0
- data/lib/datahen/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f39d4f5c3ef7be33ebfc7b71fb2b746aad84f01fa68ce2b7e215d10fbef89d62
|
|
4
|
+
data.tar.gz: 6c8ab93908ab10c58ff3d04d30874832b663d184bf5cc5d87b152c6d146adeef
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 65437e816eab4062e9cf93c489bb3630a54ea906ab36cc7e038079722514b3b613b0efee265ed0bdcf42cea14a51469af7ce456622dd80a024b600493db8266b
|
|
7
|
+
data.tar.gz: 26f8e2f0d0d18212fe0443a993ea36751c0e50c84083746af2dfb69ca3ea1f028a09c81be5f0358cb5b81d59a4ec4ea2e55a6ec05683030a669cddd382982d22
|
data/lib/datahen/cli/scraper.rb
CHANGED
|
@@ -41,6 +41,7 @@ module Datahen
|
|
|
41
41
|
option :soft_refetch_limit, type: :numeric, desc: 'Set the soft refetch limit value.'
|
|
42
42
|
option :parsing_try_limit, type: :numeric, desc: 'Set the parsing try limit value.'
|
|
43
43
|
option :prevent_kb_autoscaler, type: :boolean, desc: 'Set true to prevent the autoscaler from restarting the job. Default: false'
|
|
44
|
+
option :enable_browserforge, type: :boolean, desc: 'Set true to enable browserforge. Default: false'
|
|
44
45
|
def create(scraper_name, git_repository)
|
|
45
46
|
# puts "options #{options}"
|
|
46
47
|
client = Client::Scraper.new(options)
|
|
@@ -74,6 +75,7 @@ module Datahen
|
|
|
74
75
|
option :soft_refetch_limit, type: :numeric, desc: 'Set the soft refetch limit value.'
|
|
75
76
|
option :parsing_try_limit, type: :numeric, desc: 'Set the parsing try limit value.'
|
|
76
77
|
option :prevent_kb_autoscaler, type: :boolean, desc: 'Set true to prevent the autoscaler from restarting the job. Default: false'
|
|
78
|
+
option :enable_browserforge, type: :boolean, desc: 'Set true to enable browserforge. Default: false'
|
|
77
79
|
def update(scraper_name)
|
|
78
80
|
client = Client::Scraper.new(options)
|
|
79
81
|
puts "#{client.update(scraper_name, options)}"
|
|
@@ -118,6 +120,7 @@ module Datahen
|
|
|
118
120
|
option :soft_refetch_limit, type: :numeric, desc: 'Set the soft refetch limit value.'
|
|
119
121
|
option :parsing_try_limit, type: :numeric, desc: 'Set the parsing try limit value.'
|
|
120
122
|
option :prevent_kb_autoscaler, type: :boolean, desc: 'Set true to prevent the autoscaler from restarting the job. Default: false'
|
|
123
|
+
option :enable_browserforge, type: :boolean, desc: 'Set true to enable browserforge. Default: false'
|
|
121
124
|
def start(scraper_name)
|
|
122
125
|
client = Client::ScraperJob.new(options)
|
|
123
126
|
puts "Starting a scrape job..."
|
|
@@ -113,6 +113,7 @@ module Datahen
|
|
|
113
113
|
option :parsing_try_limit, type: :numeric, desc: 'Set the parsing try limit value.'
|
|
114
114
|
option :prevent_kb_autoscaler, type: :boolean, desc: 'Set true to prevent the autoscaler from restarting the job. Default: false'
|
|
115
115
|
option :deletion_protected, type: :boolean, desc: 'Set true to prevent the job to be deleted from max job count or job older than X days rules. Default: false'
|
|
116
|
+
option :enable_browserforge, type: :boolean, desc: 'Set true to enable browserforge. Default: false'
|
|
116
117
|
def update(scraper_name)
|
|
117
118
|
if options[:job]
|
|
118
119
|
client = Client::Job.new(options)
|
|
@@ -101,10 +101,14 @@ module Datahen
|
|
|
101
101
|
option :soft_fetching_try_limit, type: :numeric, desc: 'Set the soft fetching try limit value.'
|
|
102
102
|
option :soft_refetch_limit, type: :numeric, desc: 'Set the soft refetch limit value.'
|
|
103
103
|
option :parsing_try_limit, type: :numeric, desc: 'Set the parsing try limit value.'
|
|
104
|
+
option :use_browserforge, type: :boolean, desc: 'Set true to use browserforge options. Default: false'
|
|
105
|
+
option :browserforge_config, type: :string, desc: 'Set browserforge configuration. Must be in json format. i.e: {"Foo":"bar"}'
|
|
106
|
+
|
|
104
107
|
def update(scraper_name, gid)
|
|
105
108
|
begin
|
|
106
109
|
options[:vars] = JSON.parse(options[:vars]) if options[:vars]
|
|
107
|
-
|
|
110
|
+
options[:browserforge_config] = JSON.parse(options[:browserforge_config]) if options[:browserforge_config]
|
|
111
|
+
|
|
108
112
|
if options[:job]
|
|
109
113
|
client = Client::JobPage.new(options)
|
|
110
114
|
puts "#{client.update(options[:job], gid, options)}"
|
data/lib/datahen/client/job.rb
CHANGED
|
@@ -30,6 +30,7 @@ module Datahen
|
|
|
30
30
|
body[:parsing_try_limit] = opts[:parsing_try_limit] if opts[:parsing_try_limit]
|
|
31
31
|
body[:prevent_kb_autoscaler] = opts[:prevent_kb_autoscaler] if opts.has_key?("prevent_kb_autoscaler") || opts.has_key?(:prevent_kb_autoscaler)
|
|
32
32
|
body[:deletion_protected] = opts[:deletion_protected] if opts.has_key?("deletion_protected") || opts.has_key?(:deletion_protected)
|
|
33
|
+
body[:enable_browserforge] = opts[:enable_browserforge] if opts.has_key?("enable_browserforge") || opts.has_key?(:enable_browserforge)
|
|
33
34
|
params = @options.merge({body: body.to_json})
|
|
34
35
|
|
|
35
36
|
self.class.put("/jobs/#{job_id}", params)
|
|
@@ -21,6 +21,8 @@ module Datahen
|
|
|
21
21
|
body[:soft_fetching_try_limit] = opts[:soft_fetching_try_limit] if opts[:soft_fetching_try_limit]
|
|
22
22
|
body[:soft_refetch_limit] = opts[:soft_refetch_limit] if opts[:soft_refetch_limit]
|
|
23
23
|
body[:parsing_try_limit] = opts[:parsing_try_limit] if opts[:parsing_try_limit]
|
|
24
|
+
body[:use_browserforge] = opts[:use_browserforge] if opts.has_key?("use_browserforge") || opts.has_key?(:use_browserforge)
|
|
25
|
+
body[:browserforge_config] = opts[:browserforge_config] if opts.has_key?("browserforge_config") || opts.has_key?(:browserforge_config)
|
|
24
26
|
|
|
25
27
|
params = @options.merge({body: body.to_json})
|
|
26
28
|
|
|
@@ -36,6 +36,7 @@ module Datahen
|
|
|
36
36
|
body[:soft_refetch_limit] = opts[:soft_refetch_limit] if opts[:soft_refetch_limit]
|
|
37
37
|
body[:parsing_try_limit] = opts[:parsing_try_limit] if opts[:parsing_try_limit]
|
|
38
38
|
body[:prevent_kb_autoscaler] = opts[:prevent_kb_autoscaler] if opts.has_key?("prevent_kb_autoscaler") || opts.has_key?(:prevent_kb_autoscaler)
|
|
39
|
+
body[:enable_browserforge] = opts[:enable_browserforge] if opts.has_key?("enable_browserforge") || opts.has_key?(:enable_browserforge)
|
|
39
40
|
params = @options.merge({body: body.to_json})
|
|
40
41
|
self.class.post("/scrapers", params)
|
|
41
42
|
end
|
|
@@ -65,6 +66,7 @@ module Datahen
|
|
|
65
66
|
body[:soft_refetch_limit] = opts[:soft_refetch_limit] if opts[:soft_refetch_limit]
|
|
66
67
|
body[:parsing_try_limit] = opts[:parsing_try_limit] if opts[:parsing_try_limit]
|
|
67
68
|
body[:prevent_kb_autoscaler] = opts[:prevent_kb_autoscaler] if opts.has_key?("prevent_kb_autoscaler") || opts.has_key?(:prevent_kb_autoscaler)
|
|
69
|
+
body[:enable_browserforge] = opts[:enable_browserforge] if opts.has_key?("enable_browserforge") || opts.has_key?(:enable_browserforge)
|
|
68
70
|
params = @options.merge({body: body.to_json})
|
|
69
71
|
|
|
70
72
|
self.class.put("/scrapers/#{scraper_name}", params)
|
|
@@ -20,6 +20,7 @@ module Datahen
|
|
|
20
20
|
body[:parsing_try_limit] = opts[:parsing_try_limit] if opts[:parsing_try_limit]
|
|
21
21
|
body[:prevent_kb_autoscaler] = opts[:prevent_kb_autoscaler] if opts.has_key?("prevent_kb_autoscaler") || opts.has_key?(:prevent_kb_autoscaler)
|
|
22
22
|
body[:deletion_protected] = opts[:deletion_protected] if opts.has_key?("deletion_protected") || opts.has_key?(:deletion_protected)
|
|
23
|
+
body[:enable_browserforge] = opts[:enable_browserforge] if opts.has_key?("enable_browserforge") || opts.has_key?(:enable_browserforge)
|
|
23
24
|
if opts[:vars]
|
|
24
25
|
if opts[:vars].is_a?(Array)
|
|
25
26
|
body[:vars] = opts[:vars]
|
|
@@ -55,6 +56,8 @@ module Datahen
|
|
|
55
56
|
body[:parsing_try_limit] = opts[:parsing_try_limit] if opts[:parsing_try_limit]
|
|
56
57
|
body[:prevent_kb_autoscaler] = opts[:prevent_kb_autoscaler] if opts.has_key?("prevent_kb_autoscaler") || opts.has_key?(:prevent_kb_autoscaler)
|
|
57
58
|
body[:deletion_protected] = opts[:deletion_protected] if opts.has_key?("deletion_protected") || opts.has_key?(:deletion_protected)
|
|
59
|
+
body[:enable_browserforge] = opts[:enable_browserforge] if opts.has_key?("enable_browserforge") || opts.has_key?(:enable_browserforge)
|
|
60
|
+
|
|
58
61
|
params = @options.merge({body: body.to_json})
|
|
59
62
|
|
|
60
63
|
self.class.put("/scrapers/#{scraper_name}/current_job", params)
|
|
@@ -18,6 +18,8 @@ module Datahen
|
|
|
18
18
|
body[:max_size] = opts[:max_size] if opts[:max_size]
|
|
19
19
|
body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
|
|
20
20
|
body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
|
|
21
|
+
body[:use_browserforge] = opts[:use_browserforge] if opts.has_key?("use_browserforge") || opts.has_key?(:use_browserforge)
|
|
22
|
+
body[:browserforge_config] = opts[:browserforge_config] if opts.has_key?("browserforge_config") || opts.has_key?(:browserforge_config)
|
|
21
23
|
|
|
22
24
|
params = @options.merge({body: body.to_json})
|
|
23
25
|
|
data/lib/datahen/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: datahen
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.6.
|
|
4
|
+
version: 1.6.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Parama Danoesubroto
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-05-12 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: thor
|
|
@@ -284,7 +284,7 @@ metadata:
|
|
|
284
284
|
allowed_push_host: https://rubygems.org
|
|
285
285
|
homepage_uri: https://datahen.com
|
|
286
286
|
source_code_uri: https://github.com/DataHenOfficial/datahen-ruby
|
|
287
|
-
post_install_message:
|
|
287
|
+
post_install_message:
|
|
288
288
|
rdoc_options: []
|
|
289
289
|
require_paths:
|
|
290
290
|
- lib
|
|
@@ -299,8 +299,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
299
299
|
- !ruby/object:Gem::Version
|
|
300
300
|
version: '0'
|
|
301
301
|
requirements: []
|
|
302
|
-
rubygems_version: 3.
|
|
303
|
-
signing_key:
|
|
302
|
+
rubygems_version: 3.5.11
|
|
303
|
+
signing_key:
|
|
304
304
|
specification_version: 4
|
|
305
305
|
summary: DataHen toolbelt for developers
|
|
306
306
|
test_files: []
|