answersengine 0.10.1 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/CODE_OF_CONDUCT.md +1 -1
  3. data/LICENSE.txt +1 -1
  4. data/README.md +3 -4
  5. data/answersengine.gemspec +6 -12
  6. data/exe/answersengine +3 -2
  7. data/lib/answersengine.rb +20 -3
  8. metadata +14 -152
  9. data/examples/fetchtest/libraries/hello.rb +0 -9
  10. data/examples/fetchtest/libraries/hello_fail.rb +0 -10
  11. data/examples/fetchtest/parsers/failed.rb +0 -2
  12. data/examples/fetchtest/parsers/find_outputs.rb +0 -18
  13. data/examples/fetchtest/parsers/home.rb +0 -50
  14. data/examples/fetchtest/parsers/nested_fail.rb +0 -3
  15. data/examples/fetchtest/parsers/simple.rb +0 -14
  16. data/examples/fetchtest/seeders/csv_seeder.rb +0 -12
  17. data/examples/fetchtest/seeders/failed.rb +0 -1
  18. data/examples/fetchtest/seeders/list_of_urls.csv +0 -5
  19. data/examples/fetchtest/seeders/seed.rb +0 -28
  20. data/examples/fetchtest/seeders/test_reset_page.rb +0 -4
  21. data/lib/answersengine/cli.rb +0 -45
  22. data/lib/answersengine/cli/env_var.rb +0 -48
  23. data/lib/answersengine/cli/finisher.rb +0 -40
  24. data/lib/answersengine/cli/global_page.rb +0 -39
  25. data/lib/answersengine/cli/job.rb +0 -30
  26. data/lib/answersengine/cli/job_output.rb +0 -69
  27. data/lib/answersengine/cli/parser.rb +0 -64
  28. data/lib/answersengine/cli/scraper.rb +0 -185
  29. data/lib/answersengine/cli/scraper_deployment.rb +0 -24
  30. data/lib/answersengine/cli/scraper_export.rb +0 -51
  31. data/lib/answersengine/cli/scraper_exporter.rb +0 -40
  32. data/lib/answersengine/cli/scraper_finisher.rb +0 -20
  33. data/lib/answersengine/cli/scraper_job.rb +0 -75
  34. data/lib/answersengine/cli/scraper_job_var.rb +0 -48
  35. data/lib/answersengine/cli/scraper_page.rb +0 -203
  36. data/lib/answersengine/cli/scraper_var.rb +0 -48
  37. data/lib/answersengine/cli/seeder.rb +0 -40
  38. data/lib/answersengine/client.rb +0 -29
  39. data/lib/answersengine/client/auth_token.rb +0 -50
  40. data/lib/answersengine/client/backblaze_content.rb +0 -45
  41. data/lib/answersengine/client/base.rb +0 -55
  42. data/lib/answersengine/client/deploy_key.rb +0 -21
  43. data/lib/answersengine/client/env_var.rb +0 -28
  44. data/lib/answersengine/client/export.rb +0 -10
  45. data/lib/answersengine/client/global_page.rb +0 -18
  46. data/lib/answersengine/client/job.rb +0 -64
  47. data/lib/answersengine/client/job_export.rb +0 -10
  48. data/lib/answersengine/client/job_log.rb +0 -26
  49. data/lib/answersengine/client/job_output.rb +0 -19
  50. data/lib/answersengine/client/job_page.rb +0 -58
  51. data/lib/answersengine/client/job_stat.rb +0 -16
  52. data/lib/answersengine/client/scraper.rb +0 -57
  53. data/lib/answersengine/client/scraper_deployment.rb +0 -18
  54. data/lib/answersengine/client/scraper_export.rb +0 -22
  55. data/lib/answersengine/client/scraper_exporter.rb +0 -14
  56. data/lib/answersengine/client/scraper_finisher.rb +0 -16
  57. data/lib/answersengine/client/scraper_job.rb +0 -49
  58. data/lib/answersengine/client/scraper_job_output.rb +0 -19
  59. data/lib/answersengine/client/scraper_job_page.rb +0 -67
  60. data/lib/answersengine/client/scraper_job_var.rb +0 -28
  61. data/lib/answersengine/client/scraper_var.rb +0 -28
  62. data/lib/answersengine/plugin.rb +0 -6
  63. data/lib/answersengine/plugin/context_exposer.rb +0 -55
  64. data/lib/answersengine/scraper.rb +0 -18
  65. data/lib/answersengine/scraper/executor.rb +0 -373
  66. data/lib/answersengine/scraper/finisher.rb +0 -18
  67. data/lib/answersengine/scraper/parser.rb +0 -18
  68. data/lib/answersengine/scraper/ruby_finisher_executor.rb +0 -116
  69. data/lib/answersengine/scraper/ruby_parser_executor.rb +0 -200
  70. data/lib/answersengine/scraper/ruby_seeder_executor.rb +0 -120
  71. data/lib/answersengine/scraper/seeder.rb +0 -18
  72. data/lib/answersengine/version.rb +0 -3
@@ -1,48 +0,0 @@
1
- module AnswersEngine
2
- class CLI < Thor
3
- class ScraperVar < Thor
4
-
5
- package_name "scraper var"
6
- def self.banner(command, namespace = nil, subcommand = false)
7
- "#{basename} #{@package_name} #{command.usage}"
8
- end
9
-
10
- desc "list <scraper_name>", "List environment variables on the scraper"
11
- long_desc <<-LONGDESC
12
- List all environment variables on the scraper.
13
- LONGDESC
14
- option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
15
- option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
16
- def list(scraper_name)
17
- client = Client::ScraperVar.new(options)
18
- puts "#{client.all(scraper_name)}"
19
- end
20
-
21
- desc "set <scraper_name> <var_name> <value>", "Set an environment var on the scraper"
22
- long_desc <<-LONGDESC
23
- Creates an environment variable\x5
24
- <var_name>: Var name can only consist of alphabets, numbers, underscores. Name must be unique to your scraper, otherwise it will be overwritten.\x5
25
- <value>: Value of variable.\x5
26
- LONGDESC
27
- option :secret, type: :boolean, desc: 'Set true to make it decrypt the value. Default: false'
28
- def set(scraper_name, var_name, value)
29
- # puts "options #{options}"
30
- client = Client::ScraperVar.new(options)
31
- puts "#{client.set(scraper_name, var_name, value, options)}"
32
- end
33
-
34
- desc "show <scraper_name> <var_name>", "Show an environment variable on the scraper"
35
- def show(scraper_name, var_name)
36
- client = Client::ScraperVar.new(options)
37
- puts "#{client.find(scraper_name, var_name)}"
38
- end
39
-
40
- desc "unset <scraper_name> <var_name>", "Deletes an environment variable on the scraper"
41
- def unset(scraper_name, var_name)
42
- client = Client::ScraperVar.new(options)
43
- puts "#{client.unset(scraper_name, var_name)}"
44
- end
45
- end
46
- end
47
-
48
- end
@@ -1,40 +0,0 @@
1
- module AnswersEngine
2
- class CLI < Thor
3
- class Seeder < Thor
4
- desc "try <scraper_name> <seeder_file>", "Tries a seeder file"
5
- long_desc <<-LONGDESC
6
- Takes a seeder script and tries to execute it without saving anything.\x5
7
- <seeder_file>: Seeder script file will be executed.\x5
8
- LONGDESC
9
- option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
10
- def try_seed(scraper_name, seeder_file)
11
- if options[:job]
12
- job_id = options[:job]
13
- else
14
- job = Client::ScraperJob.new(options).find(scraper_name)
15
- job_id = job['id']
16
- end
17
-
18
- puts AnswersEngine::Scraper::Seeder.exec_seeder(seeder_file, job_id, false)
19
- end
20
-
21
- desc "exec <scraper_name> <seeder_file>", "Executes a seeder script onto a scraper's current job."
22
- long_desc <<-LONGDESC
23
- Takes a seeder script and execute it against a job and enqueues the pages into the scraper's current job\x5
24
- <seeder_file>: Seeder script file that will be executed on the scraper's current job.\x5
25
- LONGDESC
26
- option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
27
- def exec_parse(scraper_name, seeder_file)
28
- if options[:job]
29
- job_id = options[:job]
30
- else
31
- job = Client::ScraperJob.new(options).find(scraper_name)
32
- job_id = job['id']
33
- end
34
-
35
- puts AnswersEngine::Scraper::Seeder.exec_seeder(seeder_file, job_id, true)
36
- end
37
- end
38
- end
39
-
40
- end
@@ -1,29 +0,0 @@
1
- require "answersengine/client/base"
2
- require "answersengine/client/auth_token"
3
- require "answersengine/client/deploy_key"
4
- require 'answersengine/client/export'
5
- require "answersengine/client/scraper"
6
- require "answersengine/client/scraper_deployment"
7
- require "answersengine/client/scraper_job_output"
8
- require "answersengine/client/scraper_job_page"
9
- require "answersengine/client/scraper_exporter"
10
- require "answersengine/client/scraper_export"
11
- require "answersengine/client/scraper_job"
12
- require "answersengine/client/scraper_finisher"
13
- require 'answersengine/client/job_export'
14
- require "answersengine/client/job"
15
- require "answersengine/client/job_log"
16
- require "answersengine/client/global_page"
17
- require "answersengine/client/job_page"
18
- require "answersengine/client/job_output"
19
- require "answersengine/client/job_stat"
20
- require "answersengine/client/backblaze_content"
21
- require "answersengine/client/env_var"
22
- require "answersengine/client/scraper_var"
23
- require "answersengine/client/scraper_job_var"
24
-
25
-
26
- module AnswersEngine
27
- module Client
28
- end
29
- end
@@ -1,50 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class AuthToken < AnswersEngine::Client::Base
4
-
5
- def find(token)
6
- self.class.get("/auth_tokens/#{token}", @options)
7
- end
8
-
9
- def all(opts={})
10
- params = @options.merge(opts)
11
- self.class.get("/auth_tokens", params)
12
- end
13
-
14
- def create(role, description, opts={})
15
- body = {
16
- role: role,
17
- description: description}
18
-
19
- params = @options.merge({body: body.to_json})
20
- self.class.post("/auth_tokens", params)
21
- end
22
-
23
- def create_on_account(account_id, role, description)
24
- body = {
25
- role: role,
26
- description: description}
27
-
28
- params = @options.merge({body: body.to_json})
29
- self.class.post("/accounts/#{account_id}/auth_tokens", params)
30
- end
31
-
32
- def update(token, role, description="", opts={})
33
- body = {}
34
-
35
- body[:role] = role
36
- body[:description] = description if description.present?
37
- params = @options.merge({body: body.to_json})
38
-
39
- self.class.put("/auth_tokens/#{token}", params)
40
- end
41
-
42
- def delete(token, opts={})
43
- body = {}
44
- params = @options.merge({body: body.to_json})
45
-
46
- self.class.delete("/auth_tokens/#{token}", params)
47
- end
48
- end
49
- end
50
- end
@@ -1,45 +0,0 @@
1
- require 'zlib'
2
- require 'httparty'
3
-
4
- module AnswersEngine
5
- module Client
6
- class BackblazeContent
7
- include HTTParty
8
-
9
- def get_content(url)
10
- self.class.get(url, format: :plain)
11
- end
12
-
13
- def get_gunzipped_content(url)
14
- # Zlib.gunzip(get_content(url))
15
- gunzip(get_content(url))
16
- end
17
-
18
- def gunzip(string)
19
- sio = StringIO.new(string)
20
- gz = Zlib::GzipReader.new(sio, encoding: Encoding::ASCII_8BIT)
21
- _content = ""
22
- begin
23
- _content = gz.read
24
- rescue => e
25
- # if unexpected eof error, then readchar until error, and ignore it
26
- if e.to_s == 'unexpected end of file'
27
- begin
28
- while !gz.eof?
29
- _content += gz.readchar
30
- end
31
- rescue => e
32
- puts "Ignored Zlib error: #{e.to_s}"
33
- end
34
- else
35
- raise e
36
- end
37
- end
38
-
39
- return _content
40
- ensure
41
- gz.close if gz.respond_to?(:close)
42
- end
43
- end
44
- end
45
- end
@@ -1,55 +0,0 @@
1
- require 'httparty'
2
-
3
- module AnswersEngine
4
- module Client
5
- class Base
6
- include HTTParty
7
-
8
- def self.env_auth_token
9
- ENV['ANSWERSENGINE_TOKEN']
10
- end
11
-
12
- def env_api_url
13
- ENV['ANSWERSENGINE_API_URL'].nil? ? 'https://fetch.answersengine.com/api/v1' : ENV['ANSWERSENGINE_API_URL']
14
- end
15
-
16
- def auth_token
17
- @auth_token ||= self.class.env_auth_token
18
- end
19
-
20
- def auth_token= value
21
- @auth_token = value
22
- end
23
-
24
- def initialize(opts={})
25
- self.class.base_uri(env_api_url)
26
- self.auth_token = opts[:auth_token] unless opts[:auth_token].nil?
27
- @options = { headers: {
28
- "Authorization" => "Bearer #{auth_token}",
29
- "Content-Type" => "application/json",
30
- }}
31
-
32
- query = {}
33
- query[:p] = opts[:page] if opts[:page]
34
- query[:pp] = opts[:per_page] if opts[:per_page]
35
- query[:fetchfail] = opts[:fetch_fail] if opts[:fetch_fail]
36
- query[:parsefail] = opts[:parse_fail] if opts[:parse_fail]
37
- query[:status] = opts[:status] if opts[:status]
38
- query[:page_type] = opts[:page_type] if opts[:page_type]
39
- query[:gid] = opts[:gid] if opts[:gid]
40
-
41
- if opts[:query]
42
- if opts[:query].is_a?(Hash)
43
- query[:q] = opts[:query].to_json
44
- elsif opts[:query].is_a?(String)
45
- query[:q] = JSON.parse(opts[:query]).to_json
46
- end
47
- end
48
-
49
- unless query.empty?
50
- @options.merge!(query: query)
51
- end
52
- end
53
- end
54
- end
55
- end
@@ -1,21 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class DeployKey < AnswersEngine::Client::Base
4
-
5
- def find(opts={})
6
- params = @options.merge(opts)
7
- self.class.get("/deploy_key", params)
8
- end
9
-
10
- def create(opts={})
11
- params = @options.merge(opts)
12
- self.class.post("/deploy_key", params)
13
- end
14
-
15
- def delete(opts={})
16
- params = @options.merge(opts)
17
- self.class.delete("/deploy_key", params)
18
- end
19
- end
20
- end
21
- end
@@ -1,28 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class EnvVar < AnswersEngine::Client::Base
4
-
5
- def find(name)
6
- self.class.get("/env_vars/#{name}", @options)
7
- end
8
-
9
- def all(opts={})
10
- params = @options.merge opts
11
- self.class.get("/env_vars", params)
12
- end
13
-
14
- def set(name, value, opts={})
15
- body = {}
16
- body[:value] = value
17
- body[:secret] = opts[:secret] if opts[:secret]
18
- params = @options.merge({body: body.to_json})
19
- self.class.put("/env_vars/#{name}", params)
20
- end
21
-
22
- def unset(name, opts={})
23
- params = @options.merge(opts)
24
- self.class.delete("/env_vars/#{name}", params)
25
- end
26
- end
27
- end
28
- end
@@ -1,10 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class Export < AnswersEngine::Client::Base
4
- def all(opts={})
5
- params = @options.merge(opts)
6
- self.class.get("/scrapers/exports", params)
7
- end
8
- end
9
- end
10
- end
@@ -1,18 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class GlobalPage < AnswersEngine::Client::Base
4
- def find(gid)
5
- self.class.get("/global_pages/#{gid}", @options)
6
- end
7
-
8
- def find_content(gid)
9
- self.class.get("/global_pages/#{gid}/content", @options)
10
- end
11
-
12
- def find_failed_content(gid)
13
- self.class.get("/global_pages/#{gid}/failed_content", @options)
14
- end
15
- end
16
- end
17
- end
18
-
@@ -1,64 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class Job < AnswersEngine::Client::Base
4
- def all(opts={})
5
- params = @options.merge(opts)
6
- self.class.get("/jobs", params)
7
- end
8
-
9
- def find(job_id)
10
- self.class.get("/jobs/#{job_id}", @options)
11
- end
12
-
13
- def update(job_id, opts={})
14
- body = {}
15
- body[:status] = opts[:status] if opts[:status]
16
- body[:standard_worker_count] = opts[:workers] if opts[:workers]
17
- body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
18
- params = @options.merge({body: body.to_json})
19
-
20
- self.class.put("/jobs/#{job_id}", params)
21
- end
22
-
23
- def cancel(job_id, opts={})
24
- opts[:status] = 'cancelled'
25
- update(job_id, opts)
26
- end
27
-
28
- def resume(job_id, opts={})
29
- opts[:status] = 'active'
30
- update(job_id, opts)
31
- end
32
-
33
- def pause(job_id, opts={})
34
- opts[:status] = 'paused'
35
- update(job_id, opts)
36
- end
37
-
38
- def seeding_update(job_id, opts={})
39
- body = {}
40
- body[:outputs] = opts.fetch(:outputs) {[]}
41
- body[:pages] = opts.fetch(:pages) {[]}
42
- body[:seeding_status] = opts.fetch(:seeding_status){ nil }
43
- body[:log_error] = opts[:log_error] if opts[:log_error]
44
-
45
- params = @options.merge({body: body.to_json})
46
-
47
- self.class.put("/jobs/#{job_id}/seeding_update", params)
48
- end
49
-
50
- def finisher_update(job_id, opts={})
51
- body = {}
52
- body[:outputs] = opts.fetch(:outputs) {[]}
53
- body[:finisher_status] = opts.fetch(:finisher_status){ nil }
54
- body[:log_error] = opts[:log_error] if opts[:log_error]
55
-
56
- params = @options.merge({body: body.to_json})
57
-
58
- self.class.put("/jobs/#{job_id}/finisher_update", params)
59
- end
60
-
61
- end
62
-
63
- end
64
- end
@@ -1,10 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class JobExport < AnswersEngine::Client::Base
4
- def create(job_id, exporter_name)
5
- self.class.post("/jobs/#{job_id}/exports/#{exporter_name}", @options)
6
- end
7
- end
8
- end
9
- end
10
-
@@ -1,26 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class JobLog < AnswersEngine::Client::Base
4
- def all_job_page_log(job_id, gid, opts={})
5
- params = @options.merge(opts)
6
- self.class.get("/jobs/#{job_id}/pages/#{gid}/log", params)
7
- end
8
-
9
- def scraper_all_job_page_log(scraper_name, gid, opts={})
10
- params = @options.merge(opts)
11
- self.class.get("/scrapers/#{scraper_name}/current_job/pages/#{gid}/log", params)
12
- end
13
-
14
- def all_job_log(job_id, opts={})
15
- params = @options.merge(opts)
16
- self.class.get("/jobs/#{job_id}/log", params)
17
- end
18
-
19
- def scraper_all_job_log(scraper_name, opts={})
20
- params = @options.merge(opts)
21
- self.class.get("/scrapers/#{scraper_name}/current_job/log", params)
22
- end
23
-
24
- end
25
- end
26
- end