datahen 0.11.1 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/lib/datahen/cli/global_page.rb +2 -15
 - data/lib/datahen/cli/job.rb +14 -2
 - data/lib/datahen/cli/parser.rb +9 -5
 - data/lib/datahen/cli/scraper.rb +7 -4
 - data/lib/datahen/cli/scraper_export.rb +2 -3
 - data/lib/datahen/cli/scraper_finisher.rb +8 -2
 - data/lib/datahen/cli/scraper_job.rb +35 -10
 - data/lib/datahen/cli/scraper_job_var.rb +33 -10
 - data/lib/datahen/cli/scraper_page.rb +57 -4
 - data/lib/datahen/cli/seeder.rb +4 -2
 - data/lib/datahen/client.rb +2 -0
 - data/lib/datahen/client/base.rb +4 -4
 - data/lib/datahen/client/global_page.rb +0 -5
 - data/lib/datahen/client/job.rb +8 -2
 - data/lib/datahen/client/job_finisher.rb +16 -0
 - data/lib/datahen/client/job_page.rb +19 -0
 - data/lib/datahen/client/job_stat.rb +12 -4
 - data/lib/datahen/client/job_var.rb +28 -0
 - data/lib/datahen/client/scraper_job.rb +6 -2
 - data/lib/datahen/client/scraper_job_page.rb +11 -5
 - data/lib/datahen/scraper/executor.rb +6 -6
 - data/lib/datahen/scraper/parser.rb +10 -4
 - data/lib/datahen/scraper/ruby_parser_executor.rb +7 -4
 - data/lib/datahen/scraper/ruby_seeder_executor.rb +4 -1
 - data/lib/datahen/scraper/seeder.rb +7 -3
 - data/lib/datahen/version.rb +1 -1
 - metadata +5 -3
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 3ff2ed2cd4772450c01e3e88248ae89441de709198fdd177d3e572bbc5f0e474
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 5701717fcba8a05b6f3e027d9bce33a3830fa20dabe3413255779899478cb4ab
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 949ad06a090a4ac8c2ef5b4e053ed4b7668c051be15b6959a2948614e771c25e18774d9ee97fe1f5c03c130986b671a8b26ac253f592a993fa4ad393bcad7673
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: b73cfc6c070314f97cbc7917d571de67031247aac42f3474b2e71d04e8b3d650fc380a0ce3ca65c1d8339bf8743d94b666ecccca4431f7b89df4e7485a03a382
         
     | 
| 
         @@ -12,28 +12,15 @@ module Datahen 
     | 
|
| 
       12 
12 
     | 
    
         
             
                  def content(gid)
         
     | 
| 
       13 
13 
     | 
    
         
             
                    client = Client::GlobalPage.new(options)
         
     | 
| 
       14 
14 
     | 
    
         
             
                    result = JSON.parse(client.find_content(gid).to_s)
         
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
       16 
16 
     | 
    
         
             
                    if result['available'] == true
         
     | 
| 
       17 
17 
     | 
    
         
             
                      puts "Preview content url: \"#{result['preview_url']}\""
         
     | 
| 
       18 
18 
     | 
    
         
             
                      `open "#{result['preview_url']}"`
         
     | 
| 
       19 
19 
     | 
    
         
             
                    else
         
     | 
| 
       20 
20 
     | 
    
         
             
                      puts "Content does not exist"
         
     | 
| 
       21 
     | 
    
         
            -
                    end 
     | 
| 
      
 21 
     | 
    
         
            +
                    end
         
     | 
| 
       22 
22 
     | 
    
         
             
                  end
         
     | 
| 
       23 
23 
     | 
    
         | 
| 
       24 
     | 
    
         
            -
                  desc "failedcontent <gid>", "Show failed content of a globalpage"
         
     | 
| 
       25 
     | 
    
         
            -
                  def failedcontent(gid)
         
     | 
| 
       26 
     | 
    
         
            -
                    client = Client::GlobalPage.new(options)
         
     | 
| 
       27 
     | 
    
         
            -
                    result = JSON.parse(client.find_failed_content(gid).to_s)
         
     | 
| 
       28 
     | 
    
         
            -
                    
         
     | 
| 
       29 
     | 
    
         
            -
                    if result['available'] == true
         
     | 
| 
       30 
     | 
    
         
            -
                      puts "Preview failed content url: \"#{result['preview_url']}\""
         
     | 
| 
       31 
     | 
    
         
            -
                      `open "#{result['preview_url']}"`
         
     | 
| 
       32 
     | 
    
         
            -
                    else
         
     | 
| 
       33 
     | 
    
         
            -
                      puts "Failed Content does not exist"
         
     | 
| 
       34 
     | 
    
         
            -
                    end        
         
     | 
| 
       35 
     | 
    
         
            -
                  end
         
     | 
| 
       36 
     | 
    
         
            -
                  
         
     | 
| 
       37 
24 
     | 
    
         
             
                end
         
     | 
| 
       38 
25 
     | 
    
         
             
              end
         
     | 
| 
       39 
26 
     | 
    
         
             
            end
         
     | 
    
        data/lib/datahen/cli/job.rb
    CHANGED
    
    | 
         @@ -18,12 +18,24 @@ module Datahen 
     | 
|
| 
       18 
18 
     | 
    
         
             
                    puts "#{client.all()}"
         
     | 
| 
       19 
19 
     | 
    
         
             
                  end
         
     | 
| 
       20 
20 
     | 
    
         | 
| 
       21 
     | 
    
         
            -
                  desc "show <job_id>", "Show a job"
         
     | 
| 
      
 21 
     | 
    
         
            +
                  desc "show <job_id>", "Show a job (Defaults to showing data from cached job)"
         
     | 
| 
      
 22 
     | 
    
         
            +
                  option :live, type: :boolean, desc: 'Get data from the live job, not cached job.'
         
     | 
| 
       22 
23 
     | 
    
         
             
                  def show(job_id)
         
     | 
| 
       23 
24 
     | 
    
         
             
                    client = Client::Job.new(options)
         
     | 
| 
       24 
     | 
    
         
            -
                    puts "#{client.find(job_id)}"
         
     | 
| 
      
 25 
     | 
    
         
            +
                    puts "#{client.find(job_id, options)}"
         
     | 
| 
       25 
26 
     | 
    
         
             
                  end
         
     | 
| 
       26 
27 
     | 
    
         | 
| 
      
 28 
     | 
    
         
            +
                  desc "stats <job_id>", "Get the stat for a job (Defaults to showing data from cached stats)"
         
     | 
| 
      
 29 
     | 
    
         
            +
                  long_desc <<-LONGDESC
         
     | 
| 
      
 30 
     | 
    
         
            +
                    Get stats for a scraper's current job\n
         
     | 
| 
      
 31 
     | 
    
         
            +
                  LONGDESC
         
     | 
| 
      
 32 
     | 
    
         
            +
                  option :live, type: :boolean, desc: 'Get data from the live stats, not cached stats.'
         
     | 
| 
      
 33 
     | 
    
         
            +
                  def stats(job_id)
         
     | 
| 
      
 34 
     | 
    
         
            +
                    client = Client::JobStat.new(options)
         
     | 
| 
      
 35 
     | 
    
         
            +
                    puts "#{client.job_current_stats(job_id, options)}"
         
     | 
| 
      
 36 
     | 
    
         
            +
                  end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
       27 
39 
     | 
    
         
             
                end
         
     | 
| 
       28 
40 
     | 
    
         
             
              end
         
     | 
| 
       29 
41 
     | 
    
         | 
    
        data/lib/datahen/cli/parser.rb
    CHANGED
    
    | 
         @@ -10,12 +10,13 @@ module Datahen 
     | 
|
| 
       10 
10 
     | 
    
         
             
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
       11 
11 
     | 
    
         
             
                  option :global, :aliases => :g, type: :boolean, default: false, desc: 'Use globalpage instead of a job page'
         
     | 
| 
       12 
12 
     | 
    
         
             
                  option :vars, :aliases => :v, type: :string, desc: 'Set user-defined page variables. Must be in json format. i.e: {"Foo":"bar"}'
         
     | 
| 
      
 13 
     | 
    
         
            +
                  option :"keep-outputs", :aliases => :ko, type: :boolean, default: false, desc: "Don't delete existing outputs"
         
     | 
| 
       13 
14 
     | 
    
         
             
                  def try_parse(scraper_name, parser_file, gid)
         
     | 
| 
       14 
     | 
    
         
            -
                    begin 
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
      
 15 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
       16 
17 
     | 
    
         
             
                        if options[:job]
         
     | 
| 
       17 
18 
     | 
    
         
             
                          job_id = options[:job]
         
     | 
| 
       18 
     | 
    
         
            -
                        elsif options[:global] 
     | 
| 
      
 19 
     | 
    
         
            +
                        elsif options[:global]
         
     | 
| 
       19 
20 
     | 
    
         
             
                          job_id = nil
         
     | 
| 
       20 
21 
     | 
    
         
             
                        else
         
     | 
| 
       21 
22 
     | 
    
         
             
                          job = Client::ScraperJob.new(options).find(scraper_name)
         
     | 
| 
         @@ -24,7 +25,7 @@ module Datahen 
     | 
|
| 
       24 
25 
     | 
    
         | 
| 
       25 
26 
     | 
    
         | 
| 
       26 
27 
     | 
    
         
             
                      vars = JSON.parse(options[:vars]) if options[:vars]
         
     | 
| 
       27 
     | 
    
         
            -
                      puts Datahen::Scraper::Parser.exec_parser_page(parser_file, gid, job_id, false, vars)
         
     | 
| 
      
 28 
     | 
    
         
            +
                      puts Datahen::Scraper::Parser.exec_parser_page(parser_file, gid, job_id, false, vars, options[:"keep-outputs"])
         
     | 
| 
       28 
29 
     | 
    
         | 
| 
       29 
30 
     | 
    
         
             
                      rescue JSON::ParserError
         
     | 
| 
       30 
31 
     | 
    
         
             
                      if options[:vars]
         
     | 
| 
         @@ -40,6 +41,8 @@ module Datahen 
     | 
|
| 
       40 
41 
     | 
    
         
             
                        <GID>: Global ID of the page.\x5
         
     | 
| 
       41 
42 
     | 
    
         
             
                      LONGDESC
         
     | 
| 
       42 
43 
     | 
    
         
             
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
      
 44 
     | 
    
         
            +
                  option :vars, :aliases => :v, type: :string, desc: 'Set user-defined page variables. Must be in json format. i.e: {"Foo":"bar"}'
         
     | 
| 
      
 45 
     | 
    
         
            +
                  option :"keep-outputs", :aliases => :ko, type: :boolean, default: false, desc: "Don't delete existing outputs"
         
     | 
| 
       43 
46 
     | 
    
         
             
                  def exec_parse(scraper_name, parser_file, *gids)
         
     | 
| 
       44 
47 
     | 
    
         
             
                    gids.each do |gid|
         
     | 
| 
       45 
48 
     | 
    
         
             
                      begin
         
     | 
| 
         @@ -52,7 +55,8 @@ module Datahen 
     | 
|
| 
       52 
55 
     | 
    
         
             
                          job_id = job['id']
         
     | 
| 
       53 
56 
     | 
    
         
             
                        end
         
     | 
| 
       54 
57 
     | 
    
         | 
| 
       55 
     | 
    
         
            -
                         
     | 
| 
      
 58 
     | 
    
         
            +
                        vars = JSON.parse(options[:vars]) if options[:vars]
         
     | 
| 
      
 59 
     | 
    
         
            +
                        puts Datahen::Scraper::Parser.exec_parser_page(parser_file, gid, job_id, true, vars, options[:"keep-outputs"])
         
     | 
| 
       56 
60 
     | 
    
         
             
                      rescue => e
         
     | 
| 
       57 
61 
     | 
    
         
             
                        puts e
         
     | 
| 
       58 
62 
     | 
    
         
             
                      end
         
     | 
    
        data/lib/datahen/cli/scraper.rb
    CHANGED
    
    | 
         @@ -60,7 +60,7 @@ module Datahen 
     | 
|
| 
       60 
60 
     | 
    
         
             
                  desc "show <scraper_name>", "Show a scraper"
         
     | 
| 
       61 
61 
     | 
    
         
             
                  def show(scraper_name)
         
     | 
| 
       62 
62 
     | 
    
         
             
                    client = Client::Scraper.new(options)
         
     | 
| 
       63 
     | 
    
         
            -
                    puts "#{client.find(scraper_name)}"
         
     | 
| 
      
 63 
     | 
    
         
            +
                    puts "#{client.find(scraper_name, options)}"
         
     | 
| 
       64 
64 
     | 
    
         
             
                  end
         
     | 
| 
       65 
65 
     | 
    
         | 
| 
       66 
66 
     | 
    
         
             
                  desc "delete <scraper_name>", "Delete a scraper and related records"
         
     | 
| 
         @@ -102,6 +102,7 @@ module Datahen 
     | 
|
| 
       102 
102 
     | 
    
         
             
                  option :head, :aliases => :H, desc: 'Show the oldest log entries. If not set, newest entries is shown'
         
     | 
| 
       103 
103 
     | 
    
         
             
                  option :parsing, :aliases => :p, type: :boolean, desc: 'Show only log entries related to parsing errors'
         
     | 
| 
       104 
104 
     | 
    
         
             
                  option :seeding, :aliases => :s, type: :boolean, desc: 'Show only log entries related to seeding errors'
         
     | 
| 
      
 105 
     | 
    
         
            +
                  option :finisher, :aliases => :f, type: :boolean, desc: 'Show only log entries related to finisher errors'
         
     | 
| 
       105 
106 
     | 
    
         
             
                  option :more, :aliases => :m, desc: 'Show next set of log entries. Enter the `More token`'
         
     | 
| 
       106 
107 
     | 
    
         
             
                  option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 5000 per page.'
         
     | 
| 
       107 
108 
     | 
    
         
             
                  def log(scraper_name)
         
     | 
| 
         @@ -111,6 +112,7 @@ module Datahen 
     | 
|
| 
       111 
112 
     | 
    
         
             
                    query["order"] = options.delete(:head) if options[:head]
         
     | 
| 
       112 
113 
     | 
    
         
             
                    query["job_type"] = "parsing" if options[:parsing]
         
     | 
| 
       113 
114 
     | 
    
         
             
                    query["job_type"] = "seeding" if options[:seeding]
         
     | 
| 
      
 115 
     | 
    
         
            +
                    query["job_type"] = "finisher executing" if options[:finisher]
         
     | 
| 
       114 
116 
     | 
    
         
             
                    query["page_token"] = options.delete(:more) if options[:more]
         
     | 
| 
       115 
117 
     | 
    
         
             
                    query["per_page"] = options.delete(:per_page) if options[:per_page]
         
     | 
| 
       116 
118 
     | 
    
         | 
| 
         @@ -138,17 +140,18 @@ module Datahen 
     | 
|
| 
       138 
140 
     | 
    
         
             
                    end
         
     | 
| 
       139 
141 
     | 
    
         
             
                  end
         
     | 
| 
       140 
142 
     | 
    
         | 
| 
       141 
     | 
    
         
            -
                  desc "stats <scraper_name>", "Get the  
     | 
| 
      
 143 
     | 
    
         
            +
                  desc "stats <scraper_name>", "Get the stat for a current job (Defaults to showing data from cached stats)"
         
     | 
| 
       142 
144 
     | 
    
         
             
                  long_desc <<-LONGDESC
         
     | 
| 
       143 
145 
     | 
    
         
             
                    Get stats for a scraper's current job\n
         
     | 
| 
       144 
146 
     | 
    
         
             
                  LONGDESC
         
     | 
| 
       145 
147 
     | 
    
         
             
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
      
 148 
     | 
    
         
            +
                  option :live, type: :boolean, desc: 'Get data from the live stats, not cached stats.'
         
     | 
| 
       146 
149 
     | 
    
         
             
                  def stats(scraper_name)
         
     | 
| 
       147 
150 
     | 
    
         
             
                    client = Client::JobStat.new(options)
         
     | 
| 
       148 
151 
     | 
    
         
             
                    if options[:job]
         
     | 
| 
       149 
     | 
    
         
            -
                      puts "#{client.job_current_stats(options[:job])}"
         
     | 
| 
      
 152 
     | 
    
         
            +
                      puts "#{client.job_current_stats(options[:job], options)}"
         
     | 
| 
       150 
153 
     | 
    
         
             
                    else
         
     | 
| 
       151 
     | 
    
         
            -
                      puts "#{client.scraper_job_current_stats(scraper_name)}"
         
     | 
| 
      
 154 
     | 
    
         
            +
                      puts "#{client.scraper_job_current_stats(scraper_name, options)}"
         
     | 
| 
       152 
155 
     | 
    
         
             
                    end
         
     | 
| 
       153 
156 
     | 
    
         
             
                  end
         
     | 
| 
       154 
157 
     | 
    
         | 
| 
         @@ -12,7 +12,6 @@ module Datahen 
     | 
|
| 
       12 
12 
     | 
    
         
             
                    puts "#{client.find(export_id)}"
         
     | 
| 
       13 
13 
     | 
    
         
             
                  end
         
     | 
| 
       14 
14 
     | 
    
         | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
15 
     | 
    
         
             
                  desc "list", "Gets a list of exports"
         
     | 
| 
       17 
16 
     | 
    
         
             
                  long_desc <<-LONGDESC
         
     | 
| 
       18 
17 
     | 
    
         
             
                    List exports.
         
     | 
| 
         @@ -34,13 +33,13 @@ module Datahen 
     | 
|
| 
       34 
33 
     | 
    
         
             
                  def download(export_id)
         
     | 
| 
       35 
34 
     | 
    
         
             
                    client = Client::ScraperExport.new(options)
         
     | 
| 
       36 
35 
     | 
    
         
             
                    result = JSON.parse(client.download(export_id).to_s)
         
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
       38 
37 
     | 
    
         
             
                    if result['signed_url']
         
     | 
| 
       39 
38 
     | 
    
         
             
                      puts "Download url: \"#{result['signed_url']}\""
         
     | 
| 
       40 
39 
     | 
    
         
             
                      `open "#{result['signed_url']}"`
         
     | 
| 
       41 
40 
     | 
    
         
             
                    else
         
     | 
| 
       42 
41 
     | 
    
         
             
                      puts "Exported file does not exist"
         
     | 
| 
       43 
     | 
    
         
            -
                    end 
     | 
| 
      
 42 
     | 
    
         
            +
                    end
         
     | 
| 
       44 
43 
     | 
    
         
             
                  end
         
     | 
| 
       45 
44 
     | 
    
         | 
| 
       46 
45 
     | 
    
         | 
| 
         @@ -11,9 +11,15 @@ module Datahen 
     | 
|
| 
       11 
11 
     | 
    
         
             
                  long_desc <<-LONGDESC
         
     | 
| 
       12 
12 
     | 
    
         
             
                    Reset finisher on a scraper's current job.\x5
         
     | 
| 
       13 
13 
     | 
    
         
             
                  LONGDESC
         
     | 
| 
      
 14 
     | 
    
         
            +
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
       14 
15 
     | 
    
         
             
                  def reset(scraper_name)
         
     | 
| 
       15 
     | 
    
         
            -
                     
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
      
 16 
     | 
    
         
            +
                    if options[:job]
         
     | 
| 
      
 17 
     | 
    
         
            +
                      client = Client::JobFinisher.new(options)
         
     | 
| 
      
 18 
     | 
    
         
            +
                      puts "#{client.reset(options[:job])}"
         
     | 
| 
      
 19 
     | 
    
         
            +
                    else
         
     | 
| 
      
 20 
     | 
    
         
            +
                      client = Client::ScraperFinisher.new(options)
         
     | 
| 
      
 21 
     | 
    
         
            +
                      puts "#{client.reset(scraper_name)}"
         
     | 
| 
      
 22 
     | 
    
         
            +
                    end
         
     | 
| 
       17 
23 
     | 
    
         
             
                  end
         
     | 
| 
       18 
24 
     | 
    
         
             
                end
         
     | 
| 
       19 
25 
     | 
    
         
             
              end
         
     | 
| 
         @@ -6,10 +6,11 @@ module Datahen 
     | 
|
| 
       6 
6 
     | 
    
         
             
                    "#{basename} #{@package_name} #{command.usage}"
         
     | 
| 
       7 
7 
     | 
    
         
             
                  end
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
     | 
    
         
            -
                  desc "show <scraper_name>", "Show a scraper's current job"
         
     | 
| 
      
 9 
     | 
    
         
            +
                  desc "show <scraper_name>", "Show a scraper's current job (Defaults to showing data from cached job)"
         
     | 
| 
      
 10 
     | 
    
         
            +
                  option :live, type: :boolean, desc: 'Get data from the live job, not cached job.'
         
     | 
| 
       10 
11 
     | 
    
         
             
                  def show(scraper_name)
         
     | 
| 
       11 
12 
     | 
    
         
             
                    client = Client::ScraperJob.new(options)
         
     | 
| 
       12 
     | 
    
         
            -
                    puts "#{client.find(scraper_name)}"
         
     | 
| 
      
 13 
     | 
    
         
            +
                    puts "#{client.find(scraper_name, options)}"
         
     | 
| 
       13 
14 
     | 
    
         
             
                  end
         
     | 
| 
       14 
15 
     | 
    
         | 
| 
       15 
16 
     | 
    
         | 
| 
         @@ -29,27 +30,45 @@ module Datahen 
     | 
|
| 
       29 
30 
     | 
    
         
             
                  long_desc <<-LONGDESC
         
     | 
| 
       30 
31 
     | 
    
         
             
                    Cancels a scraper's current job
         
     | 
| 
       31 
32 
     | 
    
         
             
                  LONGDESC
         
     | 
| 
      
 33 
     | 
    
         
            +
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
       32 
34 
     | 
    
         
             
                  def cancel(scraper_name)
         
     | 
| 
       33 
     | 
    
         
            -
                     
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
      
 35 
     | 
    
         
            +
                    if options[:job]
         
     | 
| 
      
 36 
     | 
    
         
            +
                      client = Client::Job.new(options)
         
     | 
| 
      
 37 
     | 
    
         
            +
                      puts "#{client.cancel(options[:job])}"
         
     | 
| 
      
 38 
     | 
    
         
            +
                    else
         
     | 
| 
      
 39 
     | 
    
         
            +
                      client = Client::ScraperJob.new(options)
         
     | 
| 
      
 40 
     | 
    
         
            +
                      puts "#{client.cancel(scraper_name)}"
         
     | 
| 
      
 41 
     | 
    
         
            +
                    end
         
     | 
| 
       35 
42 
     | 
    
         
             
                  end
         
     | 
| 
       36 
43 
     | 
    
         | 
| 
       37 
44 
     | 
    
         
             
                  desc "resume <scraper_name>", "resumes a scraper's current job"
         
     | 
| 
       38 
45 
     | 
    
         
             
                  long_desc <<-LONGDESC
         
     | 
| 
       39 
46 
     | 
    
         
             
                    Resumes a scraper's current job
         
     | 
| 
       40 
47 
     | 
    
         
             
                  LONGDESC
         
     | 
| 
      
 48 
     | 
    
         
            +
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
       41 
49 
     | 
    
         
             
                  def resume(scraper_name)
         
     | 
| 
       42 
     | 
    
         
            -
                     
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
      
 50 
     | 
    
         
            +
                    if options[:job]
         
     | 
| 
      
 51 
     | 
    
         
            +
                      client = Client::Job.new(options)
         
     | 
| 
      
 52 
     | 
    
         
            +
                      puts "#{client.resume(options[:job])}"
         
     | 
| 
      
 53 
     | 
    
         
            +
                    else
         
     | 
| 
      
 54 
     | 
    
         
            +
                      client = Client::ScraperJob.new(options)
         
     | 
| 
      
 55 
     | 
    
         
            +
                      puts "#{client.resume(scraper_name)}"
         
     | 
| 
      
 56 
     | 
    
         
            +
                    end
         
     | 
| 
       44 
57 
     | 
    
         
             
                  end
         
     | 
| 
       45 
58 
     | 
    
         | 
| 
       46 
59 
     | 
    
         
             
                  desc "pause <scraper_name>", "pauses a scraper's current job"
         
     | 
| 
       47 
60 
     | 
    
         
             
                  long_desc <<-LONGDESC
         
     | 
| 
       48 
61 
     | 
    
         
             
                    pauses a scraper's current job
         
     | 
| 
       49 
62 
     | 
    
         
             
                  LONGDESC
         
     | 
| 
      
 63 
     | 
    
         
            +
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
       50 
64 
     | 
    
         
             
                  def pause(scraper_name)
         
     | 
| 
       51 
     | 
    
         
            -
                     
     | 
| 
       52 
     | 
    
         
            -
             
     | 
| 
      
 65 
     | 
    
         
            +
                    if options[:job]
         
     | 
| 
      
 66 
     | 
    
         
            +
                      client = Client::Job.new(options)
         
     | 
| 
      
 67 
     | 
    
         
            +
                      puts "#{client.pause(options[:job])}"
         
     | 
| 
      
 68 
     | 
    
         
            +
                    else
         
     | 
| 
      
 69 
     | 
    
         
            +
                      client = Client::ScraperJob.new(options)
         
     | 
| 
      
 70 
     | 
    
         
            +
                      puts "#{client.pause(scraper_name)}"
         
     | 
| 
      
 71 
     | 
    
         
            +
                    end
         
     | 
| 
       53 
72 
     | 
    
         
             
                  end
         
     | 
| 
       54 
73 
     | 
    
         | 
| 
       55 
74 
     | 
    
         | 
| 
         @@ -60,9 +79,15 @@ module Datahen 
     | 
|
| 
       60 
79 
     | 
    
         
             
                  option :workers, :aliases => :w, type: :numeric, desc: 'Set how many standard workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 1. '
         
     | 
| 
       61 
80 
     | 
    
         
             
                  option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 0. '
         
     | 
| 
       62 
81 
     | 
    
         
             
                  option :proxy_type, desc: 'Set the Proxy type. Default: standard'
         
     | 
| 
      
 82 
     | 
    
         
            +
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
       63 
83 
     | 
    
         
             
                  def update(scraper_name)
         
     | 
| 
       64 
     | 
    
         
            -
                     
     | 
| 
       65 
     | 
    
         
            -
             
     | 
| 
      
 84 
     | 
    
         
            +
                    if options[:job]
         
     | 
| 
      
 85 
     | 
    
         
            +
                      client = Client::Job.new(options)
         
     | 
| 
      
 86 
     | 
    
         
            +
                      puts "#{client.update(options[:job], options)}"
         
     | 
| 
      
 87 
     | 
    
         
            +
                    else
         
     | 
| 
      
 88 
     | 
    
         
            +
                      client = Client::ScraperJob.new(options)
         
     | 
| 
      
 89 
     | 
    
         
            +
                      puts "#{client.update(scraper_name, options)}"
         
     | 
| 
      
 90 
     | 
    
         
            +
                    end
         
     | 
| 
       66 
91 
     | 
    
         
             
                  end
         
     | 
| 
       67 
92 
     | 
    
         | 
| 
       68 
93 
     | 
    
         
             
                  desc "var SUBCOMMAND ...ARGS", "for managing scraper's job variables"
         
     | 
| 
         @@ -13,9 +13,15 @@ module Datahen 
     | 
|
| 
       13 
13 
     | 
    
         
             
                  LONGDESC
         
     | 
| 
       14 
14 
     | 
    
         
             
                  option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
         
     | 
| 
       15 
15 
     | 
    
         
             
                  option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
         
     | 
| 
      
 16 
     | 
    
         
            +
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
       16 
17 
     | 
    
         
             
                  def list(scraper_name)
         
     | 
| 
       17 
     | 
    
         
            -
                     
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
      
 18 
     | 
    
         
            +
                    if options[:job]
         
     | 
| 
      
 19 
     | 
    
         
            +
                      client = Client::JobVar.new(options)
         
     | 
| 
      
 20 
     | 
    
         
            +
                      puts "#{client.all(options[:job])}"
         
     | 
| 
      
 21 
     | 
    
         
            +
                    else
         
     | 
| 
      
 22 
     | 
    
         
            +
                      client = Client::ScraperJobVar.new(options)
         
     | 
| 
      
 23 
     | 
    
         
            +
                      puts "#{client.all(scraper_name)}"
         
     | 
| 
      
 24 
     | 
    
         
            +
                    end
         
     | 
| 
       19 
25 
     | 
    
         
             
                  end
         
     | 
| 
       20 
26 
     | 
    
         | 
| 
       21 
27 
     | 
    
         
             
                  desc "set <scraper_name> <var_name> <value>", "Set an environment var on the scrape job"
         
     | 
| 
         @@ -24,23 +30,40 @@ module Datahen 
     | 
|
| 
       24 
30 
     | 
    
         
             
                      <var_name>: Var name can only consist of alphabets, numbers, underscores. Name must be unique to your scrape job, otherwise it will be overwritten.\x5
         
     | 
| 
       25 
31 
     | 
    
         
             
                      <value>: Value of variable.\x5
         
     | 
| 
       26 
32 
     | 
    
         
             
                      LONGDESC
         
     | 
| 
       27 
     | 
    
         
            -
                  option :secret, type: :boolean, desc: 'Set true to make it decrypt the value. Default: false' 
     | 
| 
      
 33 
     | 
    
         
            +
                  option :secret, type: :boolean, desc: 'Set true to make it decrypt the value. Default: false'
         
     | 
| 
      
 34 
     | 
    
         
            +
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
       28 
35 
     | 
    
         
             
                  def set(scraper_name, var_name, value)
         
     | 
| 
       29 
     | 
    
         
            -
                     
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
      
 36 
     | 
    
         
            +
                    if options[:job]
         
     | 
| 
      
 37 
     | 
    
         
            +
                      client = Client::JobVar.new(options)
         
     | 
| 
      
 38 
     | 
    
         
            +
                      puts "#{client.set(options[:job], var_name, value, options)}"
         
     | 
| 
      
 39 
     | 
    
         
            +
                    else
         
     | 
| 
      
 40 
     | 
    
         
            +
                      client = Client::ScraperJobVar.new(options)
         
     | 
| 
      
 41 
     | 
    
         
            +
                      puts "#{client.set(scraper_name, var_name, value, options)}"
         
     | 
| 
      
 42 
     | 
    
         
            +
                    end
         
     | 
| 
       32 
43 
     | 
    
         
             
                  end
         
     | 
| 
       33 
44 
     | 
    
         | 
| 
       34 
45 
     | 
    
         
             
                  desc "show <scraper_name> <var_name>", "Show an environment variable on the scrape job"
         
     | 
| 
      
 46 
     | 
    
         
            +
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
       35 
47 
     | 
    
         
             
                  def show(scraper_name, var_name)
         
     | 
| 
       36 
     | 
    
         
            -
                     
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
      
 48 
     | 
    
         
            +
                    if options[:job]
         
     | 
| 
      
 49 
     | 
    
         
            +
                      client = Client::JobVar.new(options)
         
     | 
| 
      
 50 
     | 
    
         
            +
                      puts "#{client.find(options[:job], var_name)}"
         
     | 
| 
      
 51 
     | 
    
         
            +
                    else
         
     | 
| 
      
 52 
     | 
    
         
            +
                      client = Client::ScraperJobVar.new(options)
         
     | 
| 
      
 53 
     | 
    
         
            +
                      puts "#{client.find(scraper_name, var_name)}"
         
     | 
| 
      
 54 
     | 
    
         
            +
                    end
         
     | 
| 
       38 
55 
     | 
    
         
             
                  end
         
     | 
| 
       39 
56 
     | 
    
         | 
| 
       40 
57 
     | 
    
         
             
                  desc "unset <scraper_name> <var_name>", "Deletes an environment variable on the scrape job"
         
     | 
| 
      
 58 
     | 
    
         
            +
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
       41 
59 
     | 
    
         
             
                  def unset(scraper_name, var_name)
         
     | 
| 
       42 
     | 
    
         
            -
                     
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
      
 60 
     | 
    
         
            +
                    if options[:job]
         
     | 
| 
      
 61 
     | 
    
         
            +
                      client = Client::JobVar.new(options)
         
     | 
| 
      
 62 
     | 
    
         
            +
                      puts "#{client.unset(options[:job], var_name)}"
         
     | 
| 
      
 63 
     | 
    
         
            +
                    else
         
     | 
| 
      
 64 
     | 
    
         
            +
                      client = Client::ScraperJobVar.new(options)
         
     | 
| 
      
 65 
     | 
    
         
            +
                      puts "#{client.unset(scraper_name, var_name)}"
         
     | 
| 
      
 66 
     | 
    
         
            +
                    end
         
     | 
| 
       44 
67 
     | 
    
         
             
                  end
         
     | 
| 
       45 
68 
     | 
    
         
             
                end
         
     | 
| 
       46 
69 
     | 
    
         
             
              end
         
     | 
| 
         @@ -17,6 +17,7 @@ module Datahen 
     | 
|
| 
       17 
17 
     | 
    
         
             
                  option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
         
     | 
| 
       18 
18 
     | 
    
         
             
                  option :fetch_fail, type: :boolean, desc: 'Returns only pages that fails fetching.'
         
     | 
| 
       19 
19 
     | 
    
         
             
                  option :parse_fail, type: :boolean, desc: 'Returns only pages that fails parsing.'
         
     | 
| 
      
 20 
     | 
    
         
            +
                  option :status, type: :string, desc: 'Returns only pages with specific status.'
         
     | 
| 
       20 
21 
     | 
    
         
             
                  def list(scraper_name)
         
     | 
| 
       21 
22 
     | 
    
         
             
                    if options[:job]
         
     | 
| 
       22 
23 
     | 
    
         
             
                      client = Client::JobPage.new(options)
         
     | 
| 
         @@ -104,13 +105,19 @@ module Datahen 
     | 
|
| 
       104 
105 
     | 
    
         
             
                  option :fetch_fail, type: :boolean, desc: 'Refetches only pages that fails fetching.'
         
     | 
| 
       105 
106 
     | 
    
         
             
                  option :parse_fail, type: :boolean, desc: 'Refetches only pages that fails parsing.'
         
     | 
| 
       106 
107 
     | 
    
         
             
                  option :status, type: :string, desc: 'Refetches only pages with a specific status.'
         
     | 
| 
      
 108 
     | 
    
         
            +
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
       107 
109 
     | 
    
         
             
                  def refetch(scraper_name)
         
     | 
| 
       108 
110 
     | 
    
         
             
                    if !options.key?(:gid) && !options.key?(:fetch_fail) && !options.key?(:parse_fail) && !options.key?(:status)
         
     | 
| 
       109 
111 
     | 
    
         
             
                      puts "Must specify either a --gid, --fetch-fail, --parse-fail or --status"
         
     | 
| 
       110 
112 
     | 
    
         
             
                      return
         
     | 
| 
       111 
113 
     | 
    
         
             
                    end
         
     | 
| 
       112 
     | 
    
         
            -
                     
     | 
| 
       113 
     | 
    
         
            -
             
     | 
| 
      
 114 
     | 
    
         
            +
                    if options[:job]
         
     | 
| 
      
 115 
     | 
    
         
            +
                      client = Client::JobPage.new(options)
         
     | 
| 
      
 116 
     | 
    
         
            +
                      puts "#{client.refetch(options[:job])}"
         
     | 
| 
      
 117 
     | 
    
         
            +
                    else
         
     | 
| 
      
 118 
     | 
    
         
            +
                      client = Client::ScraperJobPage.new(options)
         
     | 
| 
      
 119 
     | 
    
         
            +
                      puts "#{client.refetch(scraper_name)}"
         
     | 
| 
      
 120 
     | 
    
         
            +
                    end
         
     | 
| 
       114 
121 
     | 
    
         
             
                  end
         
     | 
| 
       115 
122 
     | 
    
         | 
| 
       116 
123 
     | 
    
         
             
                  desc "reparse <scraper_name>", "Reparse Pages on a scraper's current job"
         
     | 
| 
         @@ -120,6 +127,7 @@ module Datahen 
     | 
|
| 
       120 
127 
     | 
    
         
             
                  option :gid, :aliases => :g, type: :string, desc: 'Reparse a specific GID'
         
     | 
| 
       121 
128 
     | 
    
         
             
                  option :parse_fail, type: :boolean, desc: 'Reparse only pages that fails parsing.'
         
     | 
| 
       122 
129 
     | 
    
         
             
                  option :status, type: :string, desc: 'Reparse only pages with a specific status.'
         
     | 
| 
      
 130 
     | 
    
         
            +
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
       123 
131 
     | 
    
         
             
                  def reparse(scraper_name)
         
     | 
| 
       124 
132 
     | 
    
         
             
                    begin
         
     | 
| 
       125 
133 
     | 
    
         
             
                      options[:vars] = JSON.parse(options[:vars]) if options[:vars]
         
     | 
| 
         @@ -129,8 +137,13 @@ module Datahen 
     | 
|
| 
       129 
137 
     | 
    
         
             
                        return
         
     | 
| 
       130 
138 
     | 
    
         
             
                      end
         
     | 
| 
       131 
139 
     | 
    
         | 
| 
       132 
     | 
    
         
            -
                       
     | 
| 
       133 
     | 
    
         
            -
             
     | 
| 
      
 140 
     | 
    
         
            +
                      if options[:job]
         
     | 
| 
      
 141 
     | 
    
         
            +
                        client = Client::JobPage.new(options)
         
     | 
| 
      
 142 
     | 
    
         
            +
                        puts "#{client.reparse(options[:job])}"
         
     | 
| 
      
 143 
     | 
    
         
            +
                      else
         
     | 
| 
      
 144 
     | 
    
         
            +
                        client = Client::ScraperJobPage.new(options)
         
     | 
| 
      
 145 
     | 
    
         
            +
                        puts "#{client.reparse(scraper_name)}"
         
     | 
| 
      
 146 
     | 
    
         
            +
                      end
         
     | 
| 
       134 
147 
     | 
    
         | 
| 
       135 
148 
     | 
    
         
             
                    rescue JSON::ParserError
         
     | 
| 
       136 
149 
     | 
    
         
             
                      if options[:vars]
         
     | 
| 
         @@ -197,6 +210,46 @@ module Datahen 
     | 
|
| 
       197 
210 
     | 
    
         
             
                    end
         
     | 
| 
       198 
211 
     | 
    
         
             
                  end
         
     | 
| 
       199 
212 
     | 
    
         | 
| 
      
 213 
     | 
    
         
            +
                  desc "content <scraper_name> <gid>", "Show a page's content in scraper's current job"
         
     | 
| 
      
 214 
     | 
    
         
            +
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
      
 215 
     | 
    
         
            +
                  def content(scraper_name, gid)
         
     | 
| 
      
 216 
     | 
    
         
            +
                    result = nil
         
     | 
| 
      
 217 
     | 
    
         
            +
                    if options[:job]
         
     | 
| 
      
 218 
     | 
    
         
            +
                      client = Client::JobPage.new(options)
         
     | 
| 
      
 219 
     | 
    
         
            +
                      result = JSON.parse(client.find_content(options[:job], gid).to_s)
         
     | 
| 
      
 220 
     | 
    
         
            +
                    else
         
     | 
| 
      
 221 
     | 
    
         
            +
                      client = Client::ScraperJobPage.new(options)
         
     | 
| 
      
 222 
     | 
    
         
            +
                      result = JSON.parse(client.find_content(scraper_name, gid).to_s)
         
     | 
| 
      
 223 
     | 
    
         
            +
                    end
         
     | 
| 
      
 224 
     | 
    
         
            +
             
     | 
| 
      
 225 
     | 
    
         
            +
                    if result['available'] == true
         
     | 
| 
      
 226 
     | 
    
         
            +
                      puts "Preview content url: \"#{result['preview_url']}\""
         
     | 
| 
      
 227 
     | 
    
         
            +
                      `open "#{result['preview_url']}"`
         
     | 
| 
      
 228 
     | 
    
         
            +
                    else
         
     | 
| 
      
 229 
     | 
    
         
            +
                      puts "Content does not exist"
         
     | 
| 
      
 230 
     | 
    
         
            +
                    end
         
     | 
| 
      
 231 
     | 
    
         
            +
                  end
         
     | 
| 
      
 232 
     | 
    
         
            +
             
     | 
| 
      
 233 
     | 
    
         
            +
                  desc "failedcontent <gid>", "Show a page's failed content in scraper's current job"
         
     | 
| 
      
 234 
     | 
    
         
            +
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
      
 235 
     | 
    
         
            +
                  def failedcontent(scraper_name, gid)
         
     | 
| 
      
 236 
     | 
    
         
            +
                    result = nil
         
     | 
| 
      
 237 
     | 
    
         
            +
                    if options[:job]
         
     | 
| 
      
 238 
     | 
    
         
            +
                      client = Client::JobPage.new(options)
         
     | 
| 
      
 239 
     | 
    
         
            +
                      result = JSON.parse(client.find_failed_content(options[:job], gid).to_s)
         
     | 
| 
      
 240 
     | 
    
         
            +
                    else
         
     | 
| 
      
 241 
     | 
    
         
            +
                      client = Client::ScraperJobPage.new(options)
         
     | 
| 
      
 242 
     | 
    
         
            +
                      result = JSON.parse(client.find_failed_content(scraper_name, gid).to_s)
         
     | 
| 
      
 243 
     | 
    
         
            +
                    end
         
     | 
| 
      
 244 
     | 
    
         
            +
             
     | 
| 
      
 245 
     | 
    
         
            +
                    if result['available'] == true
         
     | 
| 
      
 246 
     | 
    
         
            +
                      puts "Preview failed content url: \"#{result['preview_url']}\""
         
     | 
| 
      
 247 
     | 
    
         
            +
                      `open "#{result['preview_url']}"`
         
     | 
| 
      
 248 
     | 
    
         
            +
                    else
         
     | 
| 
      
 249 
     | 
    
         
            +
                      puts "Failed Content does not exist"
         
     | 
| 
      
 250 
     | 
    
         
            +
                    end
         
     | 
| 
      
 251 
     | 
    
         
            +
                  end
         
     | 
| 
      
 252 
     | 
    
         
            +
             
     | 
| 
       200 
253 
     | 
    
         
             
                end
         
     | 
| 
       201 
254 
     | 
    
         
             
              end
         
     | 
| 
       202 
255 
     | 
    
         | 
    
        data/lib/datahen/cli/seeder.rb
    CHANGED
    
    | 
         @@ -7,6 +7,7 @@ module Datahen 
     | 
|
| 
       7 
7 
     | 
    
         
             
                        <seeder_file>: Seeder script file will be executed.\x5
         
     | 
| 
       8 
8 
     | 
    
         
             
                      LONGDESC
         
     | 
| 
       9 
9 
     | 
    
         
             
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
      
 10 
     | 
    
         
            +
                  option :"keep-outputs", :aliases => :ko, type: :boolean, default: false, desc: "Don't delete existing outputs"
         
     | 
| 
       10 
11 
     | 
    
         
             
                  def try_seed(scraper_name, seeder_file)
         
     | 
| 
       11 
12 
     | 
    
         
             
                    if options[:job]
         
     | 
| 
       12 
13 
     | 
    
         
             
                      job_id = options[:job]
         
     | 
| 
         @@ -14,8 +15,8 @@ module Datahen 
     | 
|
| 
       14 
15 
     | 
    
         
             
                      job = Client::ScraperJob.new(options).find(scraper_name)
         
     | 
| 
       15 
16 
     | 
    
         
             
                      job_id = job['id']
         
     | 
| 
       16 
17 
     | 
    
         
             
                    end
         
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
                    puts Datahen::Scraper::Seeder.exec_seeder(seeder_file, job_id, false)
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                    puts Datahen::Scraper::Seeder.exec_seeder(seeder_file, job_id, false, options[:"keep-outputs"])
         
     | 
| 
       19 
20 
     | 
    
         
             
                  end
         
     | 
| 
       20 
21 
     | 
    
         | 
| 
       21 
22 
     | 
    
         
             
                  desc "exec <scraper_name> <seeder_file>", "Executes a seeder script onto a scraper's current job."
         
     | 
| 
         @@ -24,6 +25,7 @@ module Datahen 
     | 
|
| 
       24 
25 
     | 
    
         
             
                        <seeder_file>: Seeder script file that will be executed on the scraper's current job.\x5
         
     | 
| 
       25 
26 
     | 
    
         
             
                      LONGDESC
         
     | 
| 
       26 
27 
     | 
    
         
             
                  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
         
     | 
| 
      
 28 
     | 
    
         
            +
                  option :"keep-outputs", :aliases => :ko, type: :boolean, default: false, desc: "Don't delete existing outputs"
         
     | 
| 
       27 
29 
     | 
    
         
             
                  def exec_parse(scraper_name, seeder_file)
         
     | 
| 
       28 
30 
     | 
    
         
             
                    if options[:job]
         
     | 
| 
       29 
31 
     | 
    
         
             
                      job_id = options[:job]
         
     | 
    
        data/lib/datahen/client.rb
    CHANGED
    
    | 
         @@ -20,7 +20,9 @@ require "datahen/client/job_stat" 
     | 
|
| 
       20 
20 
     | 
    
         
             
            require "datahen/client/backblaze_content"
         
     | 
| 
       21 
21 
     | 
    
         
             
            require "datahen/client/env_var"
         
     | 
| 
       22 
22 
     | 
    
         
             
            require "datahen/client/scraper_var"
         
     | 
| 
      
 23 
     | 
    
         
            +
            require "datahen/client/job_var"
         
     | 
| 
       23 
24 
     | 
    
         
             
            require "datahen/client/scraper_job_var"
         
     | 
| 
      
 25 
     | 
    
         
            +
            require "datahen/client/job_finisher"
         
     | 
| 
       24 
26 
     | 
    
         | 
| 
       25 
27 
     | 
    
         | 
| 
       26 
28 
     | 
    
         
             
            module Datahen
         
     | 
    
        data/lib/datahen/client/base.rb
    CHANGED
    
    | 
         @@ -51,10 +51,10 @@ module Datahen 
     | 
|
| 
       51 
51 
     | 
    
         
             
                    query[:status] = opts[:status] if opts[:status]
         
     | 
| 
       52 
52 
     | 
    
         
             
                    query[:page_type] = opts[:page_type] if opts[:page_type]
         
     | 
| 
       53 
53 
     | 
    
         
             
                    query[:gid] = opts[:gid] if opts[:gid]
         
     | 
| 
       54 
     | 
    
         
            -
                    query[:"min-timestamp"] = opts[:"min-timestamp"]
         
     | 
| 
       55 
     | 
    
         
            -
                    query[:"max-timestamp"] = opts[:"max-timestamp"]
         
     | 
| 
       56 
     | 
    
         
            -
                    query[:limit] = opts[:limit]
         
     | 
| 
       57 
     | 
    
         
            -
                    query[:order] = opts[:order]
         
     | 
| 
      
 54 
     | 
    
         
            +
                    query[:"min-timestamp"] = opts[:"min-timestamp"] if opts[:"min-timestamp"]
         
     | 
| 
      
 55 
     | 
    
         
            +
                    query[:"max-timestamp"] = opts[:"max-timestamp"] if opts[:"max-timestamp"]
         
     | 
| 
      
 56 
     | 
    
         
            +
                    query[:limit] = opts[:limit] if opts[:limit]
         
     | 
| 
      
 57 
     | 
    
         
            +
                    query[:order] = opts[:order] if opts[:order]
         
     | 
| 
       58 
58 
     | 
    
         | 
| 
       59 
59 
     | 
    
         
             
                    if opts[:query]
         
     | 
| 
       60 
60 
     | 
    
         
             
                      if opts[:query].is_a?(Hash)
         
     | 
    
        data/lib/datahen/client/job.rb
    CHANGED
    
    | 
         @@ -6,8 +6,12 @@ module Datahen 
     | 
|
| 
       6 
6 
     | 
    
         
             
                    self.class.get("/jobs", params)
         
     | 
| 
       7 
7 
     | 
    
         
             
                  end
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
     | 
    
         
            -
                  def find(job_id)
         
     | 
| 
       10 
     | 
    
         
            -
                     
     | 
| 
      
 9 
     | 
    
         
            +
                  def find(job_id, opts={})
         
     | 
| 
      
 10 
     | 
    
         
            +
                    if opts[:live]
         
     | 
| 
      
 11 
     | 
    
         
            +
                      self.class.get("/jobs/#{job_id}", @options)
         
     | 
| 
      
 12 
     | 
    
         
            +
                    else
         
     | 
| 
      
 13 
     | 
    
         
            +
                      self.class.get("/cached/jobs/#{job_id}", @options)
         
     | 
| 
      
 14 
     | 
    
         
            +
                    end
         
     | 
| 
       11 
15 
     | 
    
         
             
                  end
         
     | 
| 
       12 
16 
     | 
    
         | 
| 
       13 
17 
     | 
    
         
             
                  def update(job_id, opts={})
         
     | 
| 
         @@ -15,6 +19,7 @@ module Datahen 
     | 
|
| 
       15 
19 
     | 
    
         
             
                    body[:status] = opts[:status] if opts[:status]
         
     | 
| 
       16 
20 
     | 
    
         
             
                    body[:standard_worker_count] = opts[:workers] if opts[:workers]
         
     | 
| 
       17 
21 
     | 
    
         
             
                    body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
         
     | 
| 
      
 22 
     | 
    
         
            +
                    body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
         
     | 
| 
       18 
23 
     | 
    
         
             
                    params = @options.merge({body: body.to_json})
         
     | 
| 
       19 
24 
     | 
    
         | 
| 
       20 
25 
     | 
    
         
             
                    self.class.put("/jobs/#{job_id}", params)
         
     | 
| 
         @@ -41,6 +46,7 @@ module Datahen 
     | 
|
| 
       41 
46 
     | 
    
         
             
                    body[:pages] = opts.fetch(:pages) {[]}
         
     | 
| 
       42 
47 
     | 
    
         
             
                    body[:seeding_status] = opts.fetch(:seeding_status){ nil }
         
     | 
| 
       43 
48 
     | 
    
         
             
                    body[:log_error] = opts[:log_error] if opts[:log_error]
         
     | 
| 
      
 49 
     | 
    
         
            +
                    body[:keep_outputs] = !!opts[:keep_outputs] if opts.has_key?(:keep_outputs)
         
     | 
| 
       44 
50 
     | 
    
         | 
| 
       45 
51 
     | 
    
         
             
                    params = @options.merge({body: body.to_json})
         
     | 
| 
       46 
52 
     | 
    
         | 
| 
         @@ -0,0 +1,16 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Datahen
         
     | 
| 
      
 2 
     | 
    
         
            +
              module Client
         
     | 
| 
      
 3 
     | 
    
         
            +
                class JobFinisher < Datahen::Client::Base
         
     | 
| 
      
 4 
     | 
    
         
            +
                  # Reset finisher on a scraper's current job.
         
     | 
| 
      
 5 
     | 
    
         
            +
                  #
         
     | 
| 
      
 6 
     | 
    
         
            +
                  # @param [Integer] job_id Job ID
         
     | 
| 
      
 7 
     | 
    
         
            +
                  # @param [Hash] opts ({}) API custom parameters.
         
     | 
| 
      
 8 
     | 
    
         
            +
                  #
         
     | 
| 
      
 9 
     | 
    
         
            +
                  # @return [HTTParty::Response]
         
     | 
| 
      
 10 
     | 
    
         
            +
                  def reset(job_id, opts={})
         
     | 
| 
      
 11 
     | 
    
         
            +
                    params = @options.merge(opts)
         
     | 
| 
      
 12 
     | 
    
         
            +
                    self.class.put("/jobs/#{job_id}/finisher/reset", params)
         
     | 
| 
      
 13 
     | 
    
         
            +
                  end
         
     | 
| 
      
 14 
     | 
    
         
            +
                end
         
     | 
| 
      
 15 
     | 
    
         
            +
              end
         
     | 
| 
      
 16 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -48,11 +48,30 @@ module Datahen 
     | 
|
| 
       48 
48 
     | 
    
         
             
                    body[:pages] = opts.fetch(:pages) {[]}
         
     | 
| 
       49 
49 
     | 
    
         
             
                    body[:parsing_status] = opts.fetch(:parsing_status){ nil }
         
     | 
| 
       50 
50 
     | 
    
         
             
                    body[:log_error] = opts[:log_error] if opts[:log_error]
         
     | 
| 
      
 51 
     | 
    
         
            +
                    body[:keep_outputs] = !!opts[:keep_outputs] if opts.has_key?(:keep_outputs)
         
     | 
| 
       51 
52 
     | 
    
         | 
| 
       52 
53 
     | 
    
         
             
                    params = @options.merge({body: body.to_json})
         
     | 
| 
       53 
54 
     | 
    
         | 
| 
       54 
55 
     | 
    
         
             
                    self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", params)
         
     | 
| 
       55 
56 
     | 
    
         
             
                  end
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
                  def find_content(job_id, gid)
         
     | 
| 
      
 59 
     | 
    
         
            +
                    self.class.get("/jobs/#{job_id}/pages/#{gid}/content", @options)
         
     | 
| 
      
 60 
     | 
    
         
            +
                  end
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                  def find_failed_content(job_id, gid)
         
     | 
| 
      
 63 
     | 
    
         
            +
                    self.class.get("/jobs/#{job_id}/pages/#{gid}/failed_content", @options)
         
     | 
| 
      
 64 
     | 
    
         
            +
                  end
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
                  def reparse(job_id, opts={})
         
     | 
| 
      
 67 
     | 
    
         
            +
                    params = @options.merge(opts)
         
     | 
| 
      
 68 
     | 
    
         
            +
                    self.class.put("/jobs/#{job_id}/pages/reparse", params)
         
     | 
| 
      
 69 
     | 
    
         
            +
                  end
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
                  def refetch(job_id, opts={})
         
     | 
| 
      
 72 
     | 
    
         
            +
                    params = @options.merge(opts)
         
     | 
| 
      
 73 
     | 
    
         
            +
                    self.class.put("/jobs/#{job_id}/pages/refetch", params)
         
     | 
| 
      
 74 
     | 
    
         
            +
                  end
         
     | 
| 
       56 
75 
     | 
    
         
             
                end
         
     | 
| 
       57 
76 
     | 
    
         
             
              end
         
     | 
| 
       58 
77 
     | 
    
         
             
            end
         
     | 
| 
         @@ -2,12 +2,20 @@ module Datahen 
     | 
|
| 
       2 
2 
     | 
    
         
             
              module Client
         
     | 
| 
       3 
3 
     | 
    
         
             
                class JobStat < Datahen::Client::Base
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
       5 
     | 
    
         
            -
                  def job_current_stats(job_id)
         
     | 
| 
       6 
     | 
    
         
            -
                     
     | 
| 
      
 5 
     | 
    
         
            +
                  def job_current_stats(job_id, opts={})
         
     | 
| 
      
 6 
     | 
    
         
            +
                    if opts[:live]
         
     | 
| 
      
 7 
     | 
    
         
            +
                      self.class.get("/jobs/#{job_id}/stats/current", @options)
         
     | 
| 
      
 8 
     | 
    
         
            +
                    else
         
     | 
| 
      
 9 
     | 
    
         
            +
                      self.class.get("/cached/jobs/#{job_id}/stats/current", @options)
         
     | 
| 
      
 10 
     | 
    
         
            +
                    end
         
     | 
| 
       7 
11 
     | 
    
         
             
                  end
         
     | 
| 
       8 
12 
     | 
    
         | 
| 
       9 
     | 
    
         
            -
                  def scraper_job_current_stats(scraper_name)
         
     | 
| 
       10 
     | 
    
         
            -
                     
     | 
| 
      
 13 
     | 
    
         
            +
                  def scraper_job_current_stats(scraper_name, opts={})
         
     | 
| 
      
 14 
     | 
    
         
            +
                    if opts[:live]
         
     | 
| 
      
 15 
     | 
    
         
            +
                      self.class.get("/scrapers/#{scraper_name}/current_job/stats/current", @options)
         
     | 
| 
      
 16 
     | 
    
         
            +
                    else
         
     | 
| 
      
 17 
     | 
    
         
            +
                      self.class.get("/cached/scrapers/#{scraper_name}/current_job/stats/current", @options)
         
     | 
| 
      
 18 
     | 
    
         
            +
                    end
         
     | 
| 
       11 
19 
     | 
    
         
             
                  end
         
     | 
| 
       12 
20 
     | 
    
         | 
| 
       13 
21 
     | 
    
         
             
                  def job_stats_history(job_id)
         
     | 
| 
         @@ -0,0 +1,28 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Datahen
         
     | 
| 
      
 2 
     | 
    
         
            +
              module Client
         
     | 
| 
      
 3 
     | 
    
         
            +
                class JobVar < Datahen::Client::Base
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
                  def find(job_id, var_name)
         
     | 
| 
      
 6 
     | 
    
         
            +
                    self.class.get("/jobs/#{job_id}/vars/#{var_name}", @options)
         
     | 
| 
      
 7 
     | 
    
         
            +
                  end
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                  def all(job_id, opts={})
         
     | 
| 
      
 10 
     | 
    
         
            +
                    params = @options.merge opts
         
     | 
| 
      
 11 
     | 
    
         
            +
                    self.class.get("/jobs/#{job_id}/vars", params)
         
     | 
| 
      
 12 
     | 
    
         
            +
                  end
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
                  def set(job_id, var_name, value, opts={})
         
     | 
| 
      
 15 
     | 
    
         
            +
                    body = {}
         
     | 
| 
      
 16 
     | 
    
         
            +
                    body[:value] = value
         
     | 
| 
      
 17 
     | 
    
         
            +
                    body[:secret] = opts[:secret] if opts[:secret]
         
     | 
| 
      
 18 
     | 
    
         
            +
                    params = @options.merge({body: body.to_json})
         
     | 
| 
      
 19 
     | 
    
         
            +
                    self.class.put("/jobs/#{job_id}/vars/#{var_name}", params)
         
     | 
| 
      
 20 
     | 
    
         
            +
                  end
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
                  def unset(job_id, var_name, opts={})
         
     | 
| 
      
 23 
     | 
    
         
            +
                    params = @options.merge(opts)
         
     | 
| 
      
 24 
     | 
    
         
            +
                    self.class.delete("/jobs/#{job_id}/vars/#{var_name}", params)
         
     | 
| 
      
 25 
     | 
    
         
            +
                  end
         
     | 
| 
      
 26 
     | 
    
         
            +
                end
         
     | 
| 
      
 27 
     | 
    
         
            +
              end
         
     | 
| 
      
 28 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -15,8 +15,12 @@ module Datahen 
     | 
|
| 
       15 
15 
     | 
    
         
             
                    self.class.post("/scrapers/#{scraper_name}/jobs", params)
         
     | 
| 
       16 
16 
     | 
    
         
             
                  end
         
     | 
| 
       17 
17 
     | 
    
         | 
| 
       18 
     | 
    
         
            -
                  def find(scraper_name)
         
     | 
| 
       19 
     | 
    
         
            -
                     
     | 
| 
      
 18 
     | 
    
         
            +
                  def find(scraper_name, opts={})
         
     | 
| 
      
 19 
     | 
    
         
            +
                    if opts[:live]
         
     | 
| 
      
 20 
     | 
    
         
            +
                      self.class.get("/scrapers/#{scraper_name}/current_job", @options)
         
     | 
| 
      
 21 
     | 
    
         
            +
                    else
         
     | 
| 
      
 22 
     | 
    
         
            +
                      self.class.get("/cached/scrapers/#{scraper_name}/current_job", @options)
         
     | 
| 
      
 23 
     | 
    
         
            +
                    end
         
     | 
| 
       20 
24 
     | 
    
         
             
                  end
         
     | 
| 
       21 
25 
     | 
    
         | 
| 
       22 
26 
     | 
    
         
             
                  def update(scraper_name, opts={})
         
     | 
| 
         @@ -26,6 +26,9 @@ module Datahen 
     | 
|
| 
       26 
26 
     | 
    
         
             
                    self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", params)
         
     | 
| 
       27 
27 
     | 
    
         
             
                  end
         
     | 
| 
       28 
28 
     | 
    
         | 
| 
      
 29 
     | 
    
         
            +
                  # Deprecated, please use Datahen::Client::JobVar#refetch instead.
         
     | 
| 
      
 30 
     | 
    
         
            +
                  #
         
     | 
| 
      
 31 
     | 
    
         
            +
                  # @note This method will be removed at some point in the future.
         
     | 
| 
       29 
32 
     | 
    
         
             
                  def refetch_by_job(job_id, opts={})
         
     | 
| 
       30 
33 
     | 
    
         
             
                    params = @options.merge(opts)
         
     | 
| 
       31 
34 
     | 
    
         
             
                    self.class.put("/jobs/#{job_id}/pages/refetch", params)
         
     | 
| 
         @@ -36,11 +39,6 @@ module Datahen 
     | 
|
| 
       36 
39 
     | 
    
         
             
                    self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", params)
         
     | 
| 
       37 
40 
     | 
    
         
             
                  end
         
     | 
| 
       38 
41 
     | 
    
         | 
| 
       39 
     | 
    
         
            -
                  def reparse_by_job(job_id, opts={})
         
     | 
| 
       40 
     | 
    
         
            -
                    params = @options.merge(opts)
         
     | 
| 
       41 
     | 
    
         
            -
                    self.class.put("/jobs/#{job_id}/pages/reparse", params)
         
     | 
| 
       42 
     | 
    
         
            -
                  end
         
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
42 
     | 
    
         
             
                  def enqueue(scraper_name, method, url, opts={})
         
     | 
| 
       45 
43 
     | 
    
         
             
                    body = {}
         
     | 
| 
       46 
44 
     | 
    
         
             
                    body[:method] =  method != "" ? method : "GET"
         
     | 
| 
         @@ -62,6 +60,14 @@ module Datahen 
     | 
|
| 
       62 
60 
     | 
    
         
             
                    self.class.post("/scrapers/#{scraper_name}/current_job/pages", params)
         
     | 
| 
       63 
61 
     | 
    
         
             
                  end
         
     | 
| 
       64 
62 
     | 
    
         | 
| 
      
 63 
     | 
    
         
            +
                  def find_content(scraper_name, gid)
         
     | 
| 
      
 64 
     | 
    
         
            +
                    self.class.get("/scrapers/#{scraper_name}/current_job/pages/#{gid}/content", @options)
         
     | 
| 
      
 65 
     | 
    
         
            +
                  end
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                  def find_failed_content(scraper_name, gid)
         
     | 
| 
      
 68 
     | 
    
         
            +
                    self.class.get("/scrapers/#{scraper_name}/current_job/pages/#{gid}/failed_content", @options)
         
     | 
| 
      
 69 
     | 
    
         
            +
                  end
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
       65 
71 
     | 
    
         
             
                end
         
     | 
| 
       66 
72 
     | 
    
         
             
              end
         
     | 
| 
       67 
73 
     | 
    
         
             
            end
         
     | 
| 
         @@ -63,9 +63,9 @@ module Datahen 
     | 
|
| 
       63 
63 
     | 
    
         
             
                    client.find(gid)
         
     | 
| 
       64 
64 
     | 
    
         
             
                  end
         
     | 
| 
       65 
65 
     | 
    
         | 
| 
       66 
     | 
    
         
            -
                  def get_content(gid)
         
     | 
| 
       67 
     | 
    
         
            -
                    client = Client:: 
     | 
| 
       68 
     | 
    
         
            -
                    content_json = client.find_content(gid)
         
     | 
| 
      
 66 
     | 
    
         
            +
                  def get_content(job_id, gid)
         
     | 
| 
      
 67 
     | 
    
         
            +
                    client = Client::JobPage.new()
         
     | 
| 
      
 68 
     | 
    
         
            +
                    content_json = client.find_content(job_id, gid)
         
     | 
| 
       69 
69 
     | 
    
         | 
| 
       70 
70 
     | 
    
         
             
                    if content_json['available']
         
     | 
| 
       71 
71 
     | 
    
         
             
                      signed_url = content_json['signed_url']
         
     | 
| 
         @@ -75,9 +75,9 @@ module Datahen 
     | 
|
| 
       75 
75 
     | 
    
         
             
                    end
         
     | 
| 
       76 
76 
     | 
    
         
             
                  end
         
     | 
| 
       77 
77 
     | 
    
         | 
| 
       78 
     | 
    
         
            -
                  def get_failed_content(gid)
         
     | 
| 
       79 
     | 
    
         
            -
                    client = Client:: 
     | 
| 
       80 
     | 
    
         
            -
                    content_json = client.find_failed_content(gid)
         
     | 
| 
      
 78 
     | 
    
         
            +
                  def get_failed_content(job_id, gid)
         
     | 
| 
      
 79 
     | 
    
         
            +
                    client = Client::JobPage.new()
         
     | 
| 
      
 80 
     | 
    
         
            +
                    content_json = client.find_failed_content(job_id, gid)
         
     | 
| 
       81 
81 
     | 
    
         | 
| 
       82 
82 
     | 
    
         
             
                    if content_json['available']
         
     | 
| 
       83 
83 
     | 
    
         
             
                      signed_url = content_json['signed_url']
         
     | 
| 
         @@ -1,18 +1,24 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Datahen
         
     | 
| 
       2 
2 
     | 
    
         
             
              module Scraper
         
     | 
| 
       3 
3 
     | 
    
         
             
                class Parser
         
     | 
| 
       4 
     | 
    
         
            -
                  def self.exec_parser_page(filename, gid, job_id=nil, save=false, vars = {})
         
     | 
| 
      
 4 
     | 
    
         
            +
                  def self.exec_parser_page(filename, gid, job_id=nil, save=false, vars = {}, keep_outputs=false)
         
     | 
| 
       5 
5 
     | 
    
         
             
                    extname = File.extname(filename)
         
     | 
| 
       6 
6 
     | 
    
         
             
                    case extname
         
     | 
| 
       7 
7 
     | 
    
         
             
                    when '.rb'
         
     | 
| 
       8 
     | 
    
         
            -
                      executor = RubyParserExecutor.new( 
     | 
| 
      
 8 
     | 
    
         
            +
                      executor = RubyParserExecutor.new(
         
     | 
| 
      
 9 
     | 
    
         
            +
                        filename: filename,
         
     | 
| 
      
 10 
     | 
    
         
            +
                        gid: gid,
         
     | 
| 
      
 11 
     | 
    
         
            +
                        job_id: job_id,
         
     | 
| 
      
 12 
     | 
    
         
            +
                        vars: vars,
         
     | 
| 
      
 13 
     | 
    
         
            +
                        keep_outputs: keep_outputs
         
     | 
| 
      
 14 
     | 
    
         
            +
                      )
         
     | 
| 
       9 
15 
     | 
    
         
             
                      executor.exec_parser(save)
         
     | 
| 
       10 
16 
     | 
    
         
             
                    else
         
     | 
| 
       11 
17 
     | 
    
         
             
                      puts "Unable to find a parser executor for file type \"#{extname}\""
         
     | 
| 
       12 
18 
     | 
    
         
             
                    end
         
     | 
| 
       13 
19 
     | 
    
         
             
                  end
         
     | 
| 
       14 
20 
     | 
    
         | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
       16 
22 
     | 
    
         
             
                end
         
     | 
| 
       17 
23 
     | 
    
         
             
              end
         
     | 
| 
       18 
     | 
    
         
            -
            end
         
     | 
| 
      
 24 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -15,6 +15,7 @@ module Datahen 
     | 
|
| 
       15 
15 
     | 
    
         
             
                    @gid = options.fetch(:gid) { raise "GID is required"}
         
     | 
| 
       16 
16 
     | 
    
         
             
                    @job_id = options.fetch(:job_id)
         
     | 
| 
       17 
17 
     | 
    
         
             
                    @page_vars = options.fetch(:vars) { {} }
         
     | 
| 
      
 18 
     | 
    
         
            +
                    @keep_outputs = !!(options.fetch(:keep_outputs) { false })
         
     | 
| 
       18 
19 
     | 
    
         
             
                  end
         
     | 
| 
       19 
20 
     | 
    
         | 
| 
       20 
21 
     | 
    
         
             
                  def self.exposed_methods
         
     | 
| 
         @@ -66,7 +67,9 @@ module Datahen 
     | 
|
| 
       66 
67 
     | 
    
         
             
                    response = parsing_update(
         
     | 
| 
       67 
68 
     | 
    
         
             
                      job_id: job_id,
         
     | 
| 
       68 
69 
     | 
    
         
             
                      gid: gid,
         
     | 
| 
       69 
     | 
    
         
            -
                      parsing_status: :starting 
     | 
| 
      
 70 
     | 
    
         
            +
                      parsing_status: :starting,
         
     | 
| 
      
 71 
     | 
    
         
            +
                      keep_outputs: @keep_outputs
         
     | 
| 
      
 72 
     | 
    
         
            +
                    )
         
     | 
| 
       70 
73 
     | 
    
         | 
| 
       71 
74 
     | 
    
         
             
                    if response.code == 200
         
     | 
| 
       72 
75 
     | 
    
         
             
                      puts "Page Parsing Status Updated."
         
     | 
| 
         @@ -165,7 +168,7 @@ module Datahen 
     | 
|
| 
       165 
168 
     | 
    
         
             
                        handle_error(e) if save
         
     | 
| 
       166 
169 
     | 
    
         
             
                        raise e
         
     | 
| 
       167 
170 
     | 
    
         
             
                      end
         
     | 
| 
       168 
     | 
    
         
            -
             
     | 
| 
      
 171 
     | 
    
         
            +
             
     | 
| 
       169 
172 
     | 
    
         
             
                      if refetch_self
         
     | 
| 
       170 
173 
     | 
    
         
             
                        refetch_page gid
         
     | 
| 
       171 
174 
     | 
    
         
             
                      elsif reparse_self
         
     | 
| 
         @@ -178,11 +181,11 @@ module Datahen 
     | 
|
| 
       178 
181 
     | 
    
         
             
                  end
         
     | 
| 
       179 
182 
     | 
    
         | 
| 
       180 
183 
     | 
    
         
             
                  def content
         
     | 
| 
       181 
     | 
    
         
            -
                    @content ||= get_content(gid)
         
     | 
| 
      
 184 
     | 
    
         
            +
                    @content ||= get_content(job_id, gid)
         
     | 
| 
       182 
185 
     | 
    
         
             
                  end
         
     | 
| 
       183 
186 
     | 
    
         | 
| 
       184 
187 
     | 
    
         
             
                  def failed_content
         
     | 
| 
       185 
     | 
    
         
            -
                    @failed_content ||= get_failed_content(gid)
         
     | 
| 
      
 188 
     | 
    
         
            +
                    @failed_content ||= get_failed_content(job_id, gid)
         
     | 
| 
       186 
189 
     | 
    
         
             
                  end
         
     | 
| 
       187 
190 
     | 
    
         | 
| 
       188 
191 
     | 
    
         
             
                  def handle_error(e)
         
     | 
| 
         @@ -6,6 +6,7 @@ module Datahen 
     | 
|
| 
       6 
6 
     | 
    
         
             
                  def initialize(options={})
         
     | 
| 
       7 
7 
     | 
    
         
             
                    @filename = options.fetch(:filename) { raise "Filename is required"}
         
     | 
| 
       8 
8 
     | 
    
         
             
                    @job_id = options[:job_id]
         
     | 
| 
      
 9 
     | 
    
         
            +
                    @keep_outputs = !!(options.fetch(:keep_outputs) { false })
         
     | 
| 
       9 
10 
     | 
    
         
             
                  end
         
     | 
| 
       10 
11 
     | 
    
         | 
| 
       11 
12 
     | 
    
         
             
                  def self.exposed_methods
         
     | 
| 
         @@ -81,7 +82,9 @@ module Datahen 
     | 
|
| 
       81 
82 
     | 
    
         | 
| 
       82 
83 
     | 
    
         
             
                    response = seeding_update(
         
     | 
| 
       83 
84 
     | 
    
         
             
                      job_id: job_id,
         
     | 
| 
       84 
     | 
    
         
            -
                      seeding_status: :starting 
     | 
| 
      
 85 
     | 
    
         
            +
                      seeding_status: :starting,
         
     | 
| 
      
 86 
     | 
    
         
            +
                      keep_outputs: @keep_outputs
         
     | 
| 
      
 87 
     | 
    
         
            +
                    )
         
     | 
| 
       85 
88 
     | 
    
         | 
| 
       86 
89 
     | 
    
         
             
                    if response.code == 200
         
     | 
| 
       87 
90 
     | 
    
         
             
                      puts "Seeding Status Updated."
         
     | 
| 
         @@ -2,11 +2,15 @@ module Datahen 
     | 
|
| 
       2 
2 
     | 
    
         
             
              module Scraper
         
     | 
| 
       3 
3 
     | 
    
         
             
                class Seeder
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
       5 
     | 
    
         
            -
                  def self.exec_seeder(filename, job_id=nil, save=false)
         
     | 
| 
      
 5 
     | 
    
         
            +
                  def self.exec_seeder(filename, job_id=nil, save=false, keep_outputs=false)
         
     | 
| 
       6 
6 
     | 
    
         
             
                    extname = File.extname(filename)
         
     | 
| 
       7 
7 
     | 
    
         
             
                    case extname
         
     | 
| 
       8 
8 
     | 
    
         
             
                    when '.rb'
         
     | 
| 
       9 
     | 
    
         
            -
                      executor = RubySeederExecutor.new( 
     | 
| 
      
 9 
     | 
    
         
            +
                      executor = RubySeederExecutor.new(
         
     | 
| 
      
 10 
     | 
    
         
            +
                        filename: filename,
         
     | 
| 
      
 11 
     | 
    
         
            +
                        job_id: job_id,
         
     | 
| 
      
 12 
     | 
    
         
            +
                        keep_outputs: keep_outputs
         
     | 
| 
      
 13 
     | 
    
         
            +
                      )
         
     | 
| 
       10 
14 
     | 
    
         
             
                      executor.exec_seeder(save)
         
     | 
| 
       11 
15 
     | 
    
         
             
                    else
         
     | 
| 
       12 
16 
     | 
    
         
             
                      puts "Unable to find a seeder executor for file type \"#{extname}\""
         
     | 
| 
         @@ -15,4 +19,4 @@ module Datahen 
     | 
|
| 
       15 
19 
     | 
    
         | 
| 
       16 
20 
     | 
    
         
             
                end
         
     | 
| 
       17 
21 
     | 
    
         
             
              end
         
     | 
| 
       18 
     | 
    
         
            -
            end
         
     | 
| 
      
 22 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/datahen/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: datahen
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.14.0
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Parama Danoesubroto
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: exe
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2020- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2020-07-26 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: thor
         
     | 
| 
         @@ -215,10 +215,12 @@ files: 
     | 
|
| 
       215 
215 
     | 
    
         
             
            - lib/datahen/client/global_page.rb
         
     | 
| 
       216 
216 
     | 
    
         
             
            - lib/datahen/client/job.rb
         
     | 
| 
       217 
217 
     | 
    
         
             
            - lib/datahen/client/job_export.rb
         
     | 
| 
      
 218 
     | 
    
         
            +
            - lib/datahen/client/job_finisher.rb
         
     | 
| 
       218 
219 
     | 
    
         
             
            - lib/datahen/client/job_log.rb
         
     | 
| 
       219 
220 
     | 
    
         
             
            - lib/datahen/client/job_output.rb
         
     | 
| 
       220 
221 
     | 
    
         
             
            - lib/datahen/client/job_page.rb
         
     | 
| 
       221 
222 
     | 
    
         
             
            - lib/datahen/client/job_stat.rb
         
     | 
| 
      
 223 
     | 
    
         
            +
            - lib/datahen/client/job_var.rb
         
     | 
| 
       222 
224 
     | 
    
         
             
            - lib/datahen/client/scraper.rb
         
     | 
| 
       223 
225 
     | 
    
         
             
            - lib/datahen/client/scraper_deployment.rb
         
     | 
| 
       224 
226 
     | 
    
         
             
            - lib/datahen/client/scraper_export.rb
         
     | 
| 
         @@ -262,7 +264,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       262 
264 
     | 
    
         
             
                - !ruby/object:Gem::Version
         
     | 
| 
       263 
265 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       264 
266 
     | 
    
         
             
            requirements: []
         
     | 
| 
       265 
     | 
    
         
            -
            rubygems_version: 3. 
     | 
| 
      
 267 
     | 
    
         
            +
            rubygems_version: 3.1.2
         
     | 
| 
       266 
268 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       267 
269 
     | 
    
         
             
            specification_version: 4
         
     | 
| 
       268 
270 
     | 
    
         
             
            summary: DataHen toolbelt for developers
         
     |