datahen 0.13.0 → 0.13.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c3d1584f235873f6d22ae107c60e7b50a6d8ab5918f45a4052bef80e58b7cbf7
4
- data.tar.gz: b6b6ce4871017eddb70fd279c92246e498e57c3d7ca2c0bd72c225bdcdb5e119
3
+ metadata.gz: c05f6ab973fe74a21e7f68411a66b97444575cc75c5812e99c5e0da4ffe05d56
4
+ data.tar.gz: dac1d2be8f6281d3da328abd5f844036765991764bb1e3ffad4b72ee4c18eac6
5
5
  SHA512:
6
- metadata.gz: 713f5907efc90be21ba04b83c060f0f656d1f72176191399e1ff575952cad2b54c834ddfd6722441e933857d12539f55553adf2b4d71adead952465f0ccf1005
7
- data.tar.gz: bd7809ed5fbd6d12dc8680f31f194ecfb51ba0138765a88c9dde4745be2bd46533da97885f63524edca09cee1ab2d4cc3d7fd96bf04a2b6a6930a046646a8093
6
+ metadata.gz: 48c15f9830308488d434dce7b7cf4888795724d88d4ca9de63cc2deca8397798b815145430af2143eb646366bfc4e97f37e563315214258b434276919e724ac7
7
+ data.tar.gz: faee0369f81ab45dbb6c62258a6214d530d51d5dcb0f6b6ebd7028bda2c48ccbe630b41006ffcc96dba773a28e84e724b6b1f2266ef7866605e5423f1ced61d2
@@ -10,12 +10,13 @@ module Datahen
10
10
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
11
11
  option :global, :aliases => :g, type: :boolean, default: false, desc: 'Use globalpage instead of a job page'
12
12
  option :vars, :aliases => :v, type: :string, desc: 'Set user-defined page variables. Must be in json format. i.e: {"Foo":"bar"}'
13
+ option :"keep-outputs", :aliases => :ko, type: :boolean, default: false, desc: "Don't delete existing outputs"
13
14
  def try_parse(scraper_name, parser_file, gid)
14
- begin
15
-
15
+ begin
16
+
16
17
  if options[:job]
17
18
  job_id = options[:job]
18
- elsif options[:global]
19
+ elsif options[:global]
19
20
  job_id = nil
20
21
  else
21
22
  job = Client::ScraperJob.new(options).find(scraper_name)
@@ -24,7 +25,7 @@ module Datahen
24
25
 
25
26
 
26
27
  vars = JSON.parse(options[:vars]) if options[:vars]
27
- puts Datahen::Scraper::Parser.exec_parser_page(parser_file, gid, job_id, false, vars)
28
+ puts Datahen::Scraper::Parser.exec_parser_page(parser_file, gid, job_id, false, vars, options[:"keep-outputs"])
28
29
 
29
30
  rescue JSON::ParserError
30
31
  if options[:vars]
@@ -40,6 +41,8 @@ module Datahen
40
41
  <GID>: Global ID of the page.\x5
41
42
  LONGDESC
42
43
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
44
+ option :vars, :aliases => :v, type: :string, desc: 'Set user-defined page variables. Must be in json format. i.e: {"Foo":"bar"}'
45
+ option :"keep-outputs", :aliases => :ko, type: :boolean, default: false, desc: "Don't delete existing outputs"
43
46
  def exec_parse(scraper_name, parser_file, *gids)
44
47
  gids.each do |gid|
45
48
  begin
@@ -52,7 +55,8 @@ module Datahen
52
55
  job_id = job['id']
53
56
  end
54
57
 
55
- puts Datahen::Scraper::Parser.exec_parser_page(parser_file, gid, job_id, true)
58
+ vars = JSON.parse(options[:vars]) if options[:vars]
59
+ puts Datahen::Scraper::Parser.exec_parser_page(parser_file, gid, job_id, true, vars, options[:"keep-outputs"])
56
60
  rescue => e
57
61
  puts e
58
62
  end
@@ -12,7 +12,6 @@ module Datahen
12
12
  puts "#{client.find(export_id)}"
13
13
  end
14
14
 
15
-
16
15
  desc "list", "Gets a list of exports"
17
16
  long_desc <<-LONGDESC
18
17
  List exports.
@@ -34,13 +33,13 @@ module Datahen
34
33
  def download(export_id)
35
34
  client = Client::ScraperExport.new(options)
36
35
  result = JSON.parse(client.download(export_id).to_s)
37
-
36
+
38
37
  if result['signed_url']
39
38
  puts "Download url: \"#{result['signed_url']}\""
40
39
  `open "#{result['signed_url']}"`
41
40
  else
42
41
  puts "Exported file does not exist"
43
- end
42
+ end
44
43
  end
45
44
 
46
45
 
@@ -11,9 +11,15 @@ module Datahen
11
11
  long_desc <<-LONGDESC
12
12
  Reset finisher on a scraper's current job.\x5
13
13
  LONGDESC
14
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
14
15
  def reset(scraper_name)
15
- client = Client::ScraperFinisher.new(options)
16
- puts "#{client.reset(scraper_name)}"
16
+ if options[:job]
17
+ client = Client::JobFinisher.new(options)
18
+ puts "#{client.reset(options[:job])}"
19
+ else
20
+ client = Client::ScraperFinisher.new(options)
21
+ puts "#{client.reset(scraper_name)}"
22
+ end
17
23
  end
18
24
  end
19
25
  end
@@ -29,27 +29,45 @@ module Datahen
29
29
  long_desc <<-LONGDESC
30
30
  Cancels a scraper's current job
31
31
  LONGDESC
32
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
32
33
  def cancel(scraper_name)
33
- client = Client::ScraperJob.new(options)
34
- puts "#{client.cancel(scraper_name)}"
34
+ if options[:job]
35
+ client = Client::Job.new(options)
36
+ puts "#{client.cancel(options[:job])}"
37
+ else
38
+ client = Client::ScraperJob.new(options)
39
+ puts "#{client.cancel(scraper_name)}"
40
+ end
35
41
  end
36
42
 
37
43
  desc "resume <scraper_name>", "resumes a scraper's current job"
38
44
  long_desc <<-LONGDESC
39
45
  Resumes a scraper's current job
40
46
  LONGDESC
47
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
41
48
  def resume(scraper_name)
42
- client = Client::ScraperJob.new(options)
43
- puts "#{client.resume(scraper_name)}"
49
+ if options[:job]
50
+ client = Client::Job.new(options)
51
+ puts "#{client.resume(options[:job])}"
52
+ else
53
+ client = Client::ScraperJob.new(options)
54
+ puts "#{client.resume(scraper_name)}"
55
+ end
44
56
  end
45
57
 
46
58
  desc "pause <scraper_name>", "pauses a scraper's current job"
47
59
  long_desc <<-LONGDESC
48
60
  pauses a scraper's current job
49
61
  LONGDESC
62
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
50
63
  def pause(scraper_name)
51
- client = Client::ScraperJob.new(options)
52
- puts "#{client.pause(scraper_name)}"
64
+ if options[:job]
65
+ client = Client::Job.new(options)
66
+ puts "#{client.pause(options[:job])}"
67
+ else
68
+ client = Client::ScraperJob.new(options)
69
+ puts "#{client.pause(scraper_name)}"
70
+ end
53
71
  end
54
72
 
55
73
 
@@ -60,9 +78,15 @@ module Datahen
60
78
  option :workers, :aliases => :w, type: :numeric, desc: 'Set how many standard workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 1. '
61
79
  option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 0. '
62
80
  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
81
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
63
82
  def update(scraper_name)
64
- client = Client::ScraperJob.new(options)
65
- puts "#{client.update(scraper_name, options)}"
83
+ if options[:job]
84
+ client = Client::Job.new(options)
85
+ puts "#{client.update(options[:job], options)}"
86
+ else
87
+ client = Client::ScraperJob.new(options)
88
+ puts "#{client.update(scraper_name, options)}"
89
+ end
66
90
  end
67
91
 
68
92
  desc "var SUBCOMMAND ...ARGS", "for managing scraper's job variables"
@@ -13,9 +13,15 @@ module Datahen
13
13
  LONGDESC
14
14
  option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
15
15
  option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
16
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
16
17
  def list(scraper_name)
17
- client = Client::ScraperJobVar.new(options)
18
- puts "#{client.all(scraper_name)}"
18
+ if options[:job]
19
+ client = Client::JobVar.new(options)
20
+ puts "#{client.all(options[:job])}"
21
+ else
22
+ client = Client::ScraperJobVar.new(options)
23
+ puts "#{client.all(scraper_name)}"
24
+ end
19
25
  end
20
26
 
21
27
  desc "set <scraper_name> <var_name> <value>", "Set an environment var on the scrape job"
@@ -24,23 +30,40 @@ module Datahen
24
30
  <var_name>: Var name can only consist of alphabets, numbers, underscores. Name must be unique to your scrape job, otherwise it will be overwritten.\x5
25
31
  <value>: Value of variable.\x5
26
32
  LONGDESC
27
- option :secret, type: :boolean, desc: 'Set true to make it decrypt the value. Default: false'
33
+ option :secret, type: :boolean, desc: 'Set true to make it decrypt the value. Default: false'
34
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
28
35
  def set(scraper_name, var_name, value)
29
- # puts "options #{options}"
30
- client = Client::ScraperJobVar.new(options)
31
- puts "#{client.set(scraper_name, var_name, value, options)}"
36
+ if options[:job]
37
+ client = Client::JobVar.new(options)
38
+ puts "#{client.set(options[:job], var_name, value, options)}"
39
+ else
40
+ client = Client::ScraperJobVar.new(options)
41
+ puts "#{client.set(scraper_name, var_name, value, options)}"
42
+ end
32
43
  end
33
44
 
34
45
  desc "show <scraper_name> <var_name>", "Show an environment variable on the scrape job"
46
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
35
47
  def show(scraper_name, var_name)
36
- client = Client::ScraperJobVar.new(options)
37
- puts "#{client.find(scraper_name, var_name)}"
48
+ if options[:job]
49
+ client = Client::JobVar.new(options)
50
+ puts "#{client.find(options[:job], var_name)}"
51
+ else
52
+ client = Client::ScraperJobVar.new(options)
53
+ puts "#{client.find(scraper_name, var_name)}"
54
+ end
38
55
  end
39
56
 
40
57
  desc "unset <scraper_name> <var_name>", "Deletes an environment variable on the scrape job"
58
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
41
59
  def unset(scraper_name, var_name)
42
- client = Client::ScraperJobVar.new(options)
43
- puts "#{client.unset(scraper_name, var_name)}"
60
+ if options[:job]
61
+ client = Client::JobVar.new(options)
62
+ puts "#{client.unset(options[:job], var_name)}"
63
+ else
64
+ client = Client::ScraperJobVar.new(options)
65
+ puts "#{client.unset(scraper_name, var_name)}"
66
+ end
44
67
  end
45
68
  end
46
69
  end
@@ -105,13 +105,19 @@ module Datahen
105
105
  option :fetch_fail, type: :boolean, desc: 'Refetches only pages that fails fetching.'
106
106
  option :parse_fail, type: :boolean, desc: 'Refetches only pages that fails parsing.'
107
107
  option :status, type: :string, desc: 'Refetches only pages with a specific status.'
108
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
108
109
  def refetch(scraper_name)
109
110
  if !options.key?(:gid) && !options.key?(:fetch_fail) && !options.key?(:parse_fail) && !options.key?(:status)
110
111
  puts "Must specify either a --gid, --fetch-fail, --parse-fail or --status"
111
112
  return
112
113
  end
113
- client = Client::ScraperJobPage.new(options)
114
- puts "#{client.refetch(scraper_name)}"
114
+ if options[:job]
115
+ client = Client::JobPage.new(options)
116
+ puts "#{client.refetch(options[:job])}"
117
+ else
118
+ client = Client::ScraperJobPage.new(options)
119
+ puts "#{client.refetch(scraper_name)}"
120
+ end
115
121
  end
116
122
 
117
123
  desc "reparse <scraper_name>", "Reparse Pages on a scraper's current job"
@@ -121,6 +127,7 @@ module Datahen
121
127
  option :gid, :aliases => :g, type: :string, desc: 'Reparse a specific GID'
122
128
  option :parse_fail, type: :boolean, desc: 'Reparse only pages that fails parsing.'
123
129
  option :status, type: :string, desc: 'Reparse only pages with a specific status.'
130
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
124
131
  def reparse(scraper_name)
125
132
  begin
126
133
  options[:vars] = JSON.parse(options[:vars]) if options[:vars]
@@ -130,8 +137,13 @@ module Datahen
130
137
  return
131
138
  end
132
139
 
133
- client = Client::ScraperJobPage.new(options)
134
- puts "#{client.reparse(scraper_name)}"
140
+ if options[:job]
141
+ client = Client::JobPage.new(options)
142
+ puts "#{client.reparse(options[:job])}"
143
+ else
144
+ client = Client::ScraperJobPage.new(options)
145
+ puts "#{client.reparse(scraper_name)}"
146
+ end
135
147
 
136
148
  rescue JSON::ParserError
137
149
  if options[:vars]
@@ -7,6 +7,7 @@ module Datahen
7
7
  <seeder_file>: Seeder script file will be executed.\x5
8
8
  LONGDESC
9
9
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
10
+ option :"keep-outputs", :aliases => :ko, type: :boolean, default: false, desc: "Don't delete existing outputs"
10
11
  def try_seed(scraper_name, seeder_file)
11
12
  if options[:job]
12
13
  job_id = options[:job]
@@ -14,8 +15,8 @@ module Datahen
14
15
  job = Client::ScraperJob.new(options).find(scraper_name)
15
16
  job_id = job['id']
16
17
  end
17
-
18
- puts Datahen::Scraper::Seeder.exec_seeder(seeder_file, job_id, false)
18
+
19
+ puts Datahen::Scraper::Seeder.exec_seeder(seeder_file, job_id, false, options[:"keep-outputs"])
19
20
  end
20
21
 
21
22
  desc "exec <scraper_name> <seeder_file>", "Executes a seeder script onto a scraper's current job."
@@ -24,6 +25,7 @@ module Datahen
24
25
  <seeder_file>: Seeder script file that will be executed on the scraper's current job.\x5
25
26
  LONGDESC
26
27
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
28
+ option :"keep-outputs", :aliases => :ko, type: :boolean, default: false, desc: "Don't delete existing outputs"
27
29
  def exec_parse(scraper_name, seeder_file)
28
30
  if options[:job]
29
31
  job_id = options[:job]
@@ -20,7 +20,9 @@ require "datahen/client/job_stat"
20
20
  require "datahen/client/backblaze_content"
21
21
  require "datahen/client/env_var"
22
22
  require "datahen/client/scraper_var"
23
+ require "datahen/client/job_var"
23
24
  require "datahen/client/scraper_job_var"
25
+ require "datahen/client/job_finisher"
24
26
 
25
27
 
26
28
  module Datahen
@@ -15,6 +15,7 @@ module Datahen
15
15
  body[:status] = opts[:status] if opts[:status]
16
16
  body[:standard_worker_count] = opts[:workers] if opts[:workers]
17
17
  body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
18
+ body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
18
19
  params = @options.merge({body: body.to_json})
19
20
 
20
21
  self.class.put("/jobs/#{job_id}", params)
@@ -41,6 +42,7 @@ module Datahen
41
42
  body[:pages] = opts.fetch(:pages) {[]}
42
43
  body[:seeding_status] = opts.fetch(:seeding_status){ nil }
43
44
  body[:log_error] = opts[:log_error] if opts[:log_error]
45
+ body[:keep_outputs] = !!opts[:keep_outputs] if opts.has_key?(:keep_outputs)
44
46
 
45
47
  params = @options.merge({body: body.to_json})
46
48
 
@@ -0,0 +1,16 @@
1
+ module Datahen
2
+ module Client
3
+ class JobFinisher < Datahen::Client::Base
4
+ # Reset finisher on a scraper's current job.
5
+ #
6
+ # @param [Integer] job_id Job ID
7
+ # @param [Hash] opts ({}) API custom parameters.
8
+ #
9
+ # @return [HTTParty::Response]
10
+ def reset(job_id, opts={})
11
+ params = @options.merge(opts)
12
+ self.class.put("/jobs/#{job_id}/finisher/reset", params)
13
+ end
14
+ end
15
+ end
16
+ end
@@ -48,6 +48,7 @@ module Datahen
48
48
  body[:pages] = opts.fetch(:pages) {[]}
49
49
  body[:parsing_status] = opts.fetch(:parsing_status){ nil }
50
50
  body[:log_error] = opts[:log_error] if opts[:log_error]
51
+ body[:keep_outputs] = !!opts[:keep_outputs] if opts.has_key?(:keep_outputs)
51
52
 
52
53
  params = @options.merge({body: body.to_json})
53
54
 
@@ -61,6 +62,16 @@ module Datahen
61
62
  def find_failed_content(job_id, gid)
62
63
  self.class.get("/jobs/#{job_id}/pages/#{gid}/failed_content", @options)
63
64
  end
65
+
66
+ def reparse(job_id, opts={})
67
+ params = @options.merge(opts)
68
+ self.class.put("/jobs/#{job_id}/pages/reparse", params)
69
+ end
70
+
71
+ def refetch(job_id, opts={})
72
+ params = @options.merge(opts)
73
+ self.class.put("/jobs/#{job_id}/pages/refetch", params)
74
+ end
64
75
  end
65
76
  end
66
77
  end
@@ -0,0 +1,28 @@
1
+ module Datahen
2
+ module Client
3
+ class JobVar < Datahen::Client::Base
4
+
5
+ def find(job_id, var_name)
6
+ self.class.get("/jobs/#{job_id}/vars/#{var_name}", @options)
7
+ end
8
+
9
+ def all(job_id, opts={})
10
+ params = @options.merge opts
11
+ self.class.get("/jobs/#{job_id}/vars", params)
12
+ end
13
+
14
+ def set(job_id, var_name, value, opts={})
15
+ body = {}
16
+ body[:value] = value
17
+ body[:secret] = opts[:secret] if opts[:secret]
18
+ params = @options.merge({body: body.to_json})
19
+ self.class.put("/jobs/#{job_id}/vars/#{var_name}", params)
20
+ end
21
+
22
+ def unset(job_id, var_name, opts={})
23
+ params = @options.merge(opts)
24
+ self.class.delete("/jobs/#{job_id}/vars/#{var_name}", params)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -26,6 +26,9 @@ module Datahen
26
26
  self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", params)
27
27
  end
28
28
 
29
+ # Deprecated, please use Datahen::Client::JobVar#refetch instead.
30
+ #
31
+ # @note This method will be removed at some point in the future.
29
32
  def refetch_by_job(job_id, opts={})
30
33
  params = @options.merge(opts)
31
34
  self.class.put("/jobs/#{job_id}/pages/refetch", params)
@@ -36,11 +39,6 @@ module Datahen
36
39
  self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", params)
37
40
  end
38
41
 
39
- def reparse_by_job(job_id, opts={})
40
- params = @options.merge(opts)
41
- self.class.put("/jobs/#{job_id}/pages/reparse", params)
42
- end
43
-
44
42
  def enqueue(scraper_name, method, url, opts={})
45
43
  body = {}
46
44
  body[:method] = method != "" ? method : "GET"
@@ -63,9 +63,9 @@ module Datahen
63
63
  client.find(gid)
64
64
  end
65
65
 
66
- def get_content(gid)
67
- client = Client::GlobalPage.new()
68
- content_json = client.find_content(gid)
66
+ def get_content(job_id, gid)
67
+ client = Client::JobPage.new()
68
+ content_json = client.find_content(job_id, gid)
69
69
 
70
70
  if content_json['available']
71
71
  signed_url = content_json['signed_url']
@@ -75,7 +75,7 @@ module Datahen
75
75
  end
76
76
  end
77
77
 
78
- def get_failed_content(gid)
78
+ def get_failed_content(job_id, gid)
79
79
  client = Client::JobPage.new()
80
80
  content_json = client.find_failed_content(job_id, gid)
81
81
 
@@ -1,18 +1,24 @@
1
1
  module Datahen
2
2
  module Scraper
3
3
  class Parser
4
- def self.exec_parser_page(filename, gid, job_id=nil, save=false, vars = {})
4
+ def self.exec_parser_page(filename, gid, job_id=nil, save=false, vars = {}, keep_outputs=false)
5
5
  extname = File.extname(filename)
6
6
  case extname
7
7
  when '.rb'
8
- executor = RubyParserExecutor.new(filename: filename, gid: gid, job_id: job_id, vars: vars)
8
+ executor = RubyParserExecutor.new(
9
+ filename: filename,
10
+ gid: gid,
11
+ job_id: job_id,
12
+ vars: vars,
13
+ keep_outputs: keep_outputs
14
+ )
9
15
  executor.exec_parser(save)
10
16
  else
11
17
  puts "Unable to find a parser executor for file type \"#{extname}\""
12
18
  end
13
19
  end
14
20
 
15
-
21
+
16
22
  end
17
23
  end
18
- end
24
+ end
@@ -15,6 +15,7 @@ module Datahen
15
15
  @gid = options.fetch(:gid) { raise "GID is required"}
16
16
  @job_id = options.fetch(:job_id)
17
17
  @page_vars = options.fetch(:vars) { {} }
18
+ @keep_outputs = !!(options.fetch(:keep_outputs) { false })
18
19
  end
19
20
 
20
21
  def self.exposed_methods
@@ -66,7 +67,9 @@ module Datahen
66
67
  response = parsing_update(
67
68
  job_id: job_id,
68
69
  gid: gid,
69
- parsing_status: :starting)
70
+ parsing_status: :starting,
71
+ keep_outputs: @keep_outputs
72
+ )
70
73
 
71
74
  if response.code == 200
72
75
  puts "Page Parsing Status Updated."
@@ -165,7 +168,7 @@ module Datahen
165
168
  handle_error(e) if save
166
169
  raise e
167
170
  end
168
-
171
+
169
172
  if refetch_self
170
173
  refetch_page gid
171
174
  elsif reparse_self
@@ -178,11 +181,11 @@ module Datahen
178
181
  end
179
182
 
180
183
  def content
181
- @content ||= get_content(gid)
184
+ @content ||= get_content(job_id, gid)
182
185
  end
183
186
 
184
187
  def failed_content
185
- @failed_content ||= get_failed_content(gid)
188
+ @failed_content ||= get_failed_content(job_id, gid)
186
189
  end
187
190
 
188
191
  def handle_error(e)
@@ -6,6 +6,7 @@ module Datahen
6
6
  def initialize(options={})
7
7
  @filename = options.fetch(:filename) { raise "Filename is required"}
8
8
  @job_id = options[:job_id]
9
+ @keep_outputs = !!(options.fetch(:keep_outputs) { false })
9
10
  end
10
11
 
11
12
  def self.exposed_methods
@@ -81,7 +82,9 @@ module Datahen
81
82
 
82
83
  response = seeding_update(
83
84
  job_id: job_id,
84
- seeding_status: :starting)
85
+ seeding_status: :starting,
86
+ keep_outputs: @keep_outputs
87
+ )
85
88
 
86
89
  if response.code == 200
87
90
  puts "Seeding Status Updated."
@@ -2,11 +2,15 @@ module Datahen
2
2
  module Scraper
3
3
  class Seeder
4
4
 
5
- def self.exec_seeder(filename, job_id=nil, save=false)
5
+ def self.exec_seeder(filename, job_id=nil, save=false, keep_outputs=false)
6
6
  extname = File.extname(filename)
7
7
  case extname
8
8
  when '.rb'
9
- executor = RubySeederExecutor.new(filename: filename, job_id: job_id)
9
+ executor = RubySeederExecutor.new(
10
+ filename: filename,
11
+ job_id: job_id,
12
+ keep_outputs: keep_outputs
13
+ )
10
14
  executor.exec_seeder(save)
11
15
  else
12
16
  puts "Unable to find a seeder executor for file type \"#{extname}\""
@@ -15,4 +19,4 @@ module Datahen
15
19
 
16
20
  end
17
21
  end
18
- end
22
+ end
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.13.0"
2
+ VERSION = "0.13.7"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.13.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-03-02 00:00:00.000000000 Z
11
+ date: 2020-05-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -215,10 +215,12 @@ files:
215
215
  - lib/datahen/client/global_page.rb
216
216
  - lib/datahen/client/job.rb
217
217
  - lib/datahen/client/job_export.rb
218
+ - lib/datahen/client/job_finisher.rb
218
219
  - lib/datahen/client/job_log.rb
219
220
  - lib/datahen/client/job_output.rb
220
221
  - lib/datahen/client/job_page.rb
221
222
  - lib/datahen/client/job_stat.rb
223
+ - lib/datahen/client/job_var.rb
222
224
  - lib/datahen/client/scraper.rb
223
225
  - lib/datahen/client/scraper_deployment.rb
224
226
  - lib/datahen/client/scraper_export.rb