answersengine 0.10.1 → 0.10.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/CODE_OF_CONDUCT.md +1 -1
  3. data/LICENSE.txt +1 -1
  4. data/README.md +3 -4
  5. data/answersengine.gemspec +6 -12
  6. data/exe/answersengine +3 -2
  7. data/lib/answersengine.rb +20 -3
  8. metadata +14 -152
  9. data/examples/fetchtest/libraries/hello.rb +0 -9
  10. data/examples/fetchtest/libraries/hello_fail.rb +0 -10
  11. data/examples/fetchtest/parsers/failed.rb +0 -2
  12. data/examples/fetchtest/parsers/find_outputs.rb +0 -18
  13. data/examples/fetchtest/parsers/home.rb +0 -50
  14. data/examples/fetchtest/parsers/nested_fail.rb +0 -3
  15. data/examples/fetchtest/parsers/simple.rb +0 -14
  16. data/examples/fetchtest/seeders/csv_seeder.rb +0 -12
  17. data/examples/fetchtest/seeders/failed.rb +0 -1
  18. data/examples/fetchtest/seeders/list_of_urls.csv +0 -5
  19. data/examples/fetchtest/seeders/seed.rb +0 -28
  20. data/examples/fetchtest/seeders/test_reset_page.rb +0 -4
  21. data/lib/answersengine/cli.rb +0 -45
  22. data/lib/answersengine/cli/env_var.rb +0 -48
  23. data/lib/answersengine/cli/finisher.rb +0 -40
  24. data/lib/answersengine/cli/global_page.rb +0 -39
  25. data/lib/answersengine/cli/job.rb +0 -30
  26. data/lib/answersengine/cli/job_output.rb +0 -69
  27. data/lib/answersengine/cli/parser.rb +0 -64
  28. data/lib/answersengine/cli/scraper.rb +0 -185
  29. data/lib/answersengine/cli/scraper_deployment.rb +0 -24
  30. data/lib/answersengine/cli/scraper_export.rb +0 -51
  31. data/lib/answersengine/cli/scraper_exporter.rb +0 -40
  32. data/lib/answersengine/cli/scraper_finisher.rb +0 -20
  33. data/lib/answersengine/cli/scraper_job.rb +0 -75
  34. data/lib/answersengine/cli/scraper_job_var.rb +0 -48
  35. data/lib/answersengine/cli/scraper_page.rb +0 -203
  36. data/lib/answersengine/cli/scraper_var.rb +0 -48
  37. data/lib/answersengine/cli/seeder.rb +0 -40
  38. data/lib/answersengine/client.rb +0 -29
  39. data/lib/answersengine/client/auth_token.rb +0 -50
  40. data/lib/answersengine/client/backblaze_content.rb +0 -45
  41. data/lib/answersengine/client/base.rb +0 -55
  42. data/lib/answersengine/client/deploy_key.rb +0 -21
  43. data/lib/answersengine/client/env_var.rb +0 -28
  44. data/lib/answersengine/client/export.rb +0 -10
  45. data/lib/answersengine/client/global_page.rb +0 -18
  46. data/lib/answersengine/client/job.rb +0 -64
  47. data/lib/answersengine/client/job_export.rb +0 -10
  48. data/lib/answersengine/client/job_log.rb +0 -26
  49. data/lib/answersengine/client/job_output.rb +0 -19
  50. data/lib/answersengine/client/job_page.rb +0 -58
  51. data/lib/answersengine/client/job_stat.rb +0 -16
  52. data/lib/answersengine/client/scraper.rb +0 -57
  53. data/lib/answersengine/client/scraper_deployment.rb +0 -18
  54. data/lib/answersengine/client/scraper_export.rb +0 -22
  55. data/lib/answersengine/client/scraper_exporter.rb +0 -14
  56. data/lib/answersengine/client/scraper_finisher.rb +0 -16
  57. data/lib/answersengine/client/scraper_job.rb +0 -49
  58. data/lib/answersengine/client/scraper_job_output.rb +0 -19
  59. data/lib/answersengine/client/scraper_job_page.rb +0 -67
  60. data/lib/answersengine/client/scraper_job_var.rb +0 -28
  61. data/lib/answersengine/client/scraper_var.rb +0 -28
  62. data/lib/answersengine/plugin.rb +0 -6
  63. data/lib/answersengine/plugin/context_exposer.rb +0 -55
  64. data/lib/answersengine/scraper.rb +0 -18
  65. data/lib/answersengine/scraper/executor.rb +0 -373
  66. data/lib/answersengine/scraper/finisher.rb +0 -18
  67. data/lib/answersengine/scraper/parser.rb +0 -18
  68. data/lib/answersengine/scraper/ruby_finisher_executor.rb +0 -116
  69. data/lib/answersengine/scraper/ruby_parser_executor.rb +0 -200
  70. data/lib/answersengine/scraper/ruby_seeder_executor.rb +0 -120
  71. data/lib/answersengine/scraper/seeder.rb +0 -18
  72. data/lib/answersengine/version.rb +0 -3
@@ -1,48 +0,0 @@
1
- module AnswersEngine
2
- class CLI < Thor
3
- class ScraperVar < Thor
4
-
5
- package_name "scraper var"
6
- def self.banner(command, namespace = nil, subcommand = false)
7
- "#{basename} #{@package_name} #{command.usage}"
8
- end
9
-
10
- desc "list <scraper_name>", "List environment variables on the scraper"
11
- long_desc <<-LONGDESC
12
- List all environment variables on the scraper.
13
- LONGDESC
14
- option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
15
- option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
16
- def list(scraper_name)
17
- client = Client::ScraperVar.new(options)
18
- puts "#{client.all(scraper_name)}"
19
- end
20
-
21
- desc "set <scraper_name> <var_name> <value>", "Set an environment var on the scraper"
22
- long_desc <<-LONGDESC
23
- Creates an environment variable\x5
24
- <var_name>: Var name can only consist of alphabets, numbers, underscores. Name must be unique to your scraper, otherwise it will be overwritten.\x5
25
- <value>: Value of variable.\x5
26
- LONGDESC
27
- option :secret, type: :boolean, desc: 'Set true to make it decrypt the value. Default: false'
28
- def set(scraper_name, var_name, value)
29
- # puts "options #{options}"
30
- client = Client::ScraperVar.new(options)
31
- puts "#{client.set(scraper_name, var_name, value, options)}"
32
- end
33
-
34
- desc "show <scraper_name> <var_name>", "Show an environment variable on the scraper"
35
- def show(scraper_name, var_name)
36
- client = Client::ScraperVar.new(options)
37
- puts "#{client.find(scraper_name, var_name)}"
38
- end
39
-
40
- desc "unset <scraper_name> <var_name>", "Deletes an environment variable on the scraper"
41
- def unset(scraper_name, var_name)
42
- client = Client::ScraperVar.new(options)
43
- puts "#{client.unset(scraper_name, var_name)}"
44
- end
45
- end
46
- end
47
-
48
- end
@@ -1,40 +0,0 @@
1
- module AnswersEngine
2
- class CLI < Thor
3
- class Seeder < Thor
4
- desc "try <scraper_name> <seeder_file>", "Tries a seeder file"
5
- long_desc <<-LONGDESC
6
- Takes a seeder script and tries to execute it without saving anything.\x5
7
- <seeder_file>: Seeder script file will be executed.\x5
8
- LONGDESC
9
- option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
10
- def try_seed(scraper_name, seeder_file)
11
- if options[:job]
12
- job_id = options[:job]
13
- else
14
- job = Client::ScraperJob.new(options).find(scraper_name)
15
- job_id = job['id']
16
- end
17
-
18
- puts AnswersEngine::Scraper::Seeder.exec_seeder(seeder_file, job_id, false)
19
- end
20
-
21
- desc "exec <scraper_name> <seeder_file>", "Executes a seeder script onto a scraper's current job."
22
- long_desc <<-LONGDESC
23
- Takes a seeder script and execute it against a job and enqueues the pages into the scraper's current job\x5
24
- <seeder_file>: Seeder script file that will be executed on the scraper's current job.\x5
25
- LONGDESC
26
- option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
27
- def exec_parse(scraper_name, seeder_file)
28
- if options[:job]
29
- job_id = options[:job]
30
- else
31
- job = Client::ScraperJob.new(options).find(scraper_name)
32
- job_id = job['id']
33
- end
34
-
35
- puts AnswersEngine::Scraper::Seeder.exec_seeder(seeder_file, job_id, true)
36
- end
37
- end
38
- end
39
-
40
- end
@@ -1,29 +0,0 @@
1
- require "answersengine/client/base"
2
- require "answersengine/client/auth_token"
3
- require "answersengine/client/deploy_key"
4
- require 'answersengine/client/export'
5
- require "answersengine/client/scraper"
6
- require "answersengine/client/scraper_deployment"
7
- require "answersengine/client/scraper_job_output"
8
- require "answersengine/client/scraper_job_page"
9
- require "answersengine/client/scraper_exporter"
10
- require "answersengine/client/scraper_export"
11
- require "answersengine/client/scraper_job"
12
- require "answersengine/client/scraper_finisher"
13
- require 'answersengine/client/job_export'
14
- require "answersengine/client/job"
15
- require "answersengine/client/job_log"
16
- require "answersengine/client/global_page"
17
- require "answersengine/client/job_page"
18
- require "answersengine/client/job_output"
19
- require "answersengine/client/job_stat"
20
- require "answersengine/client/backblaze_content"
21
- require "answersengine/client/env_var"
22
- require "answersengine/client/scraper_var"
23
- require "answersengine/client/scraper_job_var"
24
-
25
-
26
- module AnswersEngine
27
- module Client
28
- end
29
- end
@@ -1,50 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class AuthToken < AnswersEngine::Client::Base
4
-
5
- def find(token)
6
- self.class.get("/auth_tokens/#{token}", @options)
7
- end
8
-
9
- def all(opts={})
10
- params = @options.merge(opts)
11
- self.class.get("/auth_tokens", params)
12
- end
13
-
14
- def create(role, description, opts={})
15
- body = {
16
- role: role,
17
- description: description}
18
-
19
- params = @options.merge({body: body.to_json})
20
- self.class.post("/auth_tokens", params)
21
- end
22
-
23
- def create_on_account(account_id, role, description)
24
- body = {
25
- role: role,
26
- description: description}
27
-
28
- params = @options.merge({body: body.to_json})
29
- self.class.post("/accounts/#{account_id}/auth_tokens", params)
30
- end
31
-
32
- def update(token, role, description="", opts={})
33
- body = {}
34
-
35
- body[:role] = role
36
- body[:description] = description if description.present?
37
- params = @options.merge({body: body.to_json})
38
-
39
- self.class.put("/auth_tokens/#{token}", params)
40
- end
41
-
42
- def delete(token, opts={})
43
- body = {}
44
- params = @options.merge({body: body.to_json})
45
-
46
- self.class.delete("/auth_tokens/#{token}", params)
47
- end
48
- end
49
- end
50
- end
@@ -1,45 +0,0 @@
1
- require 'zlib'
2
- require 'httparty'
3
-
4
- module AnswersEngine
5
- module Client
6
- class BackblazeContent
7
- include HTTParty
8
-
9
- def get_content(url)
10
- self.class.get(url, format: :plain)
11
- end
12
-
13
- def get_gunzipped_content(url)
14
- # Zlib.gunzip(get_content(url))
15
- gunzip(get_content(url))
16
- end
17
-
18
- def gunzip(string)
19
- sio = StringIO.new(string)
20
- gz = Zlib::GzipReader.new(sio, encoding: Encoding::ASCII_8BIT)
21
- _content = ""
22
- begin
23
- _content = gz.read
24
- rescue => e
25
- # if unexpected eof error, then readchar until error, and ignore it
26
- if e.to_s == 'unexpected end of file'
27
- begin
28
- while !gz.eof?
29
- _content += gz.readchar
30
- end
31
- rescue => e
32
- puts "Ignored Zlib error: #{e.to_s}"
33
- end
34
- else
35
- raise e
36
- end
37
- end
38
-
39
- return _content
40
- ensure
41
- gz.close if gz.respond_to?(:close)
42
- end
43
- end
44
- end
45
- end
@@ -1,55 +0,0 @@
1
- require 'httparty'
2
-
3
- module AnswersEngine
4
- module Client
5
- class Base
6
- include HTTParty
7
-
8
- def self.env_auth_token
9
- ENV['ANSWERSENGINE_TOKEN']
10
- end
11
-
12
- def env_api_url
13
- ENV['ANSWERSENGINE_API_URL'].nil? ? 'https://fetch.answersengine.com/api/v1' : ENV['ANSWERSENGINE_API_URL']
14
- end
15
-
16
- def auth_token
17
- @auth_token ||= self.class.env_auth_token
18
- end
19
-
20
- def auth_token= value
21
- @auth_token = value
22
- end
23
-
24
- def initialize(opts={})
25
- self.class.base_uri(env_api_url)
26
- self.auth_token = opts[:auth_token] unless opts[:auth_token].nil?
27
- @options = { headers: {
28
- "Authorization" => "Bearer #{auth_token}",
29
- "Content-Type" => "application/json",
30
- }}
31
-
32
- query = {}
33
- query[:p] = opts[:page] if opts[:page]
34
- query[:pp] = opts[:per_page] if opts[:per_page]
35
- query[:fetchfail] = opts[:fetch_fail] if opts[:fetch_fail]
36
- query[:parsefail] = opts[:parse_fail] if opts[:parse_fail]
37
- query[:status] = opts[:status] if opts[:status]
38
- query[:page_type] = opts[:page_type] if opts[:page_type]
39
- query[:gid] = opts[:gid] if opts[:gid]
40
-
41
- if opts[:query]
42
- if opts[:query].is_a?(Hash)
43
- query[:q] = opts[:query].to_json
44
- elsif opts[:query].is_a?(String)
45
- query[:q] = JSON.parse(opts[:query]).to_json
46
- end
47
- end
48
-
49
- unless query.empty?
50
- @options.merge!(query: query)
51
- end
52
- end
53
- end
54
- end
55
- end
@@ -1,21 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class DeployKey < AnswersEngine::Client::Base
4
-
5
- def find(opts={})
6
- params = @options.merge(opts)
7
- self.class.get("/deploy_key", params)
8
- end
9
-
10
- def create(opts={})
11
- params = @options.merge(opts)
12
- self.class.post("/deploy_key", params)
13
- end
14
-
15
- def delete(opts={})
16
- params = @options.merge(opts)
17
- self.class.delete("/deploy_key", params)
18
- end
19
- end
20
- end
21
- end
@@ -1,28 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class EnvVar < AnswersEngine::Client::Base
4
-
5
- def find(name)
6
- self.class.get("/env_vars/#{name}", @options)
7
- end
8
-
9
- def all(opts={})
10
- params = @options.merge opts
11
- self.class.get("/env_vars", params)
12
- end
13
-
14
- def set(name, value, opts={})
15
- body = {}
16
- body[:value] = value
17
- body[:secret] = opts[:secret] if opts[:secret]
18
- params = @options.merge({body: body.to_json})
19
- self.class.put("/env_vars/#{name}", params)
20
- end
21
-
22
- def unset(name, opts={})
23
- params = @options.merge(opts)
24
- self.class.delete("/env_vars/#{name}", params)
25
- end
26
- end
27
- end
28
- end
@@ -1,10 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class Export < AnswersEngine::Client::Base
4
- def all(opts={})
5
- params = @options.merge(opts)
6
- self.class.get("/scrapers/exports", params)
7
- end
8
- end
9
- end
10
- end
@@ -1,18 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class GlobalPage < AnswersEngine::Client::Base
4
- def find(gid)
5
- self.class.get("/global_pages/#{gid}", @options)
6
- end
7
-
8
- def find_content(gid)
9
- self.class.get("/global_pages/#{gid}/content", @options)
10
- end
11
-
12
- def find_failed_content(gid)
13
- self.class.get("/global_pages/#{gid}/failed_content", @options)
14
- end
15
- end
16
- end
17
- end
18
-
@@ -1,64 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class Job < AnswersEngine::Client::Base
4
- def all(opts={})
5
- params = @options.merge(opts)
6
- self.class.get("/jobs", params)
7
- end
8
-
9
- def find(job_id)
10
- self.class.get("/jobs/#{job_id}", @options)
11
- end
12
-
13
- def update(job_id, opts={})
14
- body = {}
15
- body[:status] = opts[:status] if opts[:status]
16
- body[:standard_worker_count] = opts[:workers] if opts[:workers]
17
- body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
18
- params = @options.merge({body: body.to_json})
19
-
20
- self.class.put("/jobs/#{job_id}", params)
21
- end
22
-
23
- def cancel(job_id, opts={})
24
- opts[:status] = 'cancelled'
25
- update(job_id, opts)
26
- end
27
-
28
- def resume(job_id, opts={})
29
- opts[:status] = 'active'
30
- update(job_id, opts)
31
- end
32
-
33
- def pause(job_id, opts={})
34
- opts[:status] = 'paused'
35
- update(job_id, opts)
36
- end
37
-
38
- def seeding_update(job_id, opts={})
39
- body = {}
40
- body[:outputs] = opts.fetch(:outputs) {[]}
41
- body[:pages] = opts.fetch(:pages) {[]}
42
- body[:seeding_status] = opts.fetch(:seeding_status){ nil }
43
- body[:log_error] = opts[:log_error] if opts[:log_error]
44
-
45
- params = @options.merge({body: body.to_json})
46
-
47
- self.class.put("/jobs/#{job_id}/seeding_update", params)
48
- end
49
-
50
- def finisher_update(job_id, opts={})
51
- body = {}
52
- body[:outputs] = opts.fetch(:outputs) {[]}
53
- body[:finisher_status] = opts.fetch(:finisher_status){ nil }
54
- body[:log_error] = opts[:log_error] if opts[:log_error]
55
-
56
- params = @options.merge({body: body.to_json})
57
-
58
- self.class.put("/jobs/#{job_id}/finisher_update", params)
59
- end
60
-
61
- end
62
-
63
- end
64
- end
@@ -1,10 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class JobExport < AnswersEngine::Client::Base
4
- def create(job_id, exporter_name)
5
- self.class.post("/jobs/#{job_id}/exports/#{exporter_name}", @options)
6
- end
7
- end
8
- end
9
- end
10
-
@@ -1,26 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class JobLog < AnswersEngine::Client::Base
4
- def all_job_page_log(job_id, gid, opts={})
5
- params = @options.merge(opts)
6
- self.class.get("/jobs/#{job_id}/pages/#{gid}/log", params)
7
- end
8
-
9
- def scraper_all_job_page_log(scraper_name, gid, opts={})
10
- params = @options.merge(opts)
11
- self.class.get("/scrapers/#{scraper_name}/current_job/pages/#{gid}/log", params)
12
- end
13
-
14
- def all_job_log(job_id, opts={})
15
- params = @options.merge(opts)
16
- self.class.get("/jobs/#{job_id}/log", params)
17
- end
18
-
19
- def scraper_all_job_log(scraper_name, opts={})
20
- params = @options.merge(opts)
21
- self.class.get("/scrapers/#{scraper_name}/current_job/log", params)
22
- end
23
-
24
- end
25
- end
26
- end