answersengine 0.10.1 → 0.10.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/CODE_OF_CONDUCT.md +1 -1
  3. data/LICENSE.txt +1 -1
  4. data/README.md +3 -4
  5. data/answersengine.gemspec +6 -12
  6. data/exe/answersengine +3 -2
  7. data/lib/answersengine.rb +20 -3
  8. metadata +14 -152
  9. data/examples/fetchtest/libraries/hello.rb +0 -9
  10. data/examples/fetchtest/libraries/hello_fail.rb +0 -10
  11. data/examples/fetchtest/parsers/failed.rb +0 -2
  12. data/examples/fetchtest/parsers/find_outputs.rb +0 -18
  13. data/examples/fetchtest/parsers/home.rb +0 -50
  14. data/examples/fetchtest/parsers/nested_fail.rb +0 -3
  15. data/examples/fetchtest/parsers/simple.rb +0 -14
  16. data/examples/fetchtest/seeders/csv_seeder.rb +0 -12
  17. data/examples/fetchtest/seeders/failed.rb +0 -1
  18. data/examples/fetchtest/seeders/list_of_urls.csv +0 -5
  19. data/examples/fetchtest/seeders/seed.rb +0 -28
  20. data/examples/fetchtest/seeders/test_reset_page.rb +0 -4
  21. data/lib/answersengine/cli.rb +0 -45
  22. data/lib/answersengine/cli/env_var.rb +0 -48
  23. data/lib/answersengine/cli/finisher.rb +0 -40
  24. data/lib/answersengine/cli/global_page.rb +0 -39
  25. data/lib/answersengine/cli/job.rb +0 -30
  26. data/lib/answersengine/cli/job_output.rb +0 -69
  27. data/lib/answersengine/cli/parser.rb +0 -64
  28. data/lib/answersengine/cli/scraper.rb +0 -185
  29. data/lib/answersengine/cli/scraper_deployment.rb +0 -24
  30. data/lib/answersengine/cli/scraper_export.rb +0 -51
  31. data/lib/answersengine/cli/scraper_exporter.rb +0 -40
  32. data/lib/answersengine/cli/scraper_finisher.rb +0 -20
  33. data/lib/answersengine/cli/scraper_job.rb +0 -75
  34. data/lib/answersengine/cli/scraper_job_var.rb +0 -48
  35. data/lib/answersengine/cli/scraper_page.rb +0 -203
  36. data/lib/answersengine/cli/scraper_var.rb +0 -48
  37. data/lib/answersengine/cli/seeder.rb +0 -40
  38. data/lib/answersengine/client.rb +0 -29
  39. data/lib/answersengine/client/auth_token.rb +0 -50
  40. data/lib/answersengine/client/backblaze_content.rb +0 -45
  41. data/lib/answersengine/client/base.rb +0 -55
  42. data/lib/answersengine/client/deploy_key.rb +0 -21
  43. data/lib/answersengine/client/env_var.rb +0 -28
  44. data/lib/answersengine/client/export.rb +0 -10
  45. data/lib/answersengine/client/global_page.rb +0 -18
  46. data/lib/answersengine/client/job.rb +0 -64
  47. data/lib/answersengine/client/job_export.rb +0 -10
  48. data/lib/answersengine/client/job_log.rb +0 -26
  49. data/lib/answersengine/client/job_output.rb +0 -19
  50. data/lib/answersengine/client/job_page.rb +0 -58
  51. data/lib/answersengine/client/job_stat.rb +0 -16
  52. data/lib/answersengine/client/scraper.rb +0 -57
  53. data/lib/answersengine/client/scraper_deployment.rb +0 -18
  54. data/lib/answersengine/client/scraper_export.rb +0 -22
  55. data/lib/answersengine/client/scraper_exporter.rb +0 -14
  56. data/lib/answersengine/client/scraper_finisher.rb +0 -16
  57. data/lib/answersengine/client/scraper_job.rb +0 -49
  58. data/lib/answersengine/client/scraper_job_output.rb +0 -19
  59. data/lib/answersengine/client/scraper_job_page.rb +0 -67
  60. data/lib/answersengine/client/scraper_job_var.rb +0 -28
  61. data/lib/answersengine/client/scraper_var.rb +0 -28
  62. data/lib/answersengine/plugin.rb +0 -6
  63. data/lib/answersengine/plugin/context_exposer.rb +0 -55
  64. data/lib/answersengine/scraper.rb +0 -18
  65. data/lib/answersengine/scraper/executor.rb +0 -373
  66. data/lib/answersengine/scraper/finisher.rb +0 -18
  67. data/lib/answersengine/scraper/parser.rb +0 -18
  68. data/lib/answersengine/scraper/ruby_finisher_executor.rb +0 -116
  69. data/lib/answersengine/scraper/ruby_parser_executor.rb +0 -200
  70. data/lib/answersengine/scraper/ruby_seeder_executor.rb +0 -120
  71. data/lib/answersengine/scraper/seeder.rb +0 -18
  72. data/lib/answersengine/version.rb +0 -3
@@ -1,19 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class JobOutput < AnswersEngine::Client::Base
4
- def find(job_id, collection, id)
5
- self.class.get("/jobs/#{job_id}/output/collections/#{collection}/records/#{id}", @options)
6
- end
7
-
8
- def all(job_id, collection = 'default')
9
-
10
- self.class.get("/jobs/#{job_id}/output/collections/#{collection}/records", @options)
11
- end
12
-
13
- def collections(job_id)
14
- self.class.get("/jobs/#{job_id}/output/collections", @options)
15
- end
16
- end
17
- end
18
- end
19
-
@@ -1,58 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class JobPage < AnswersEngine::Client::Base
4
- def find(job_id, gid)
5
- self.class.get("/jobs/#{job_id}/pages/#{gid}", @options)
6
- end
7
-
8
- def all(job_id, opts={})
9
- params = @options.merge(opts)
10
- self.class.get("/jobs/#{job_id}/pages", params)
11
- end
12
-
13
- def update(job_id, gid, opts={})
14
- body = {}
15
- body[:page_type] = opts[:page_type] if opts[:page_type]
16
- body[:priority] = opts[:priority] if opts[:priority]
17
- body[:vars] = opts[:vars] if opts[:vars]
18
-
19
- params = @options.merge({body: body.to_json})
20
-
21
- self.class.put("/jobs/#{job_id}/pages/#{gid}", params)
22
- end
23
-
24
- def enqueue(job_id, method, url, opts={})
25
- body = {}
26
- body[:method] = method != "" ? method : "GET"
27
- body[:url] = url
28
- body[:page_type] = opts[:page_type] if opts[:page_type]
29
- body[:priority] = opts[:priority] if opts[:priority]
30
- body[:fetch_type] = opts[:fetch_type] if opts[:fetch_type]
31
- body[:body] = opts[:body] if opts[:body]
32
- body[:headers] = opts[:headers] if opts[:headers]
33
- body[:vars] = opts[:vars] if opts[:vars]
34
- body[:force_fetch] = opts[:force_fetch] if opts[:force_fetch]
35
- body[:freshness] = opts[:freshness] if opts[:freshness]
36
- body[:ua_type] = opts[:ua_type] if opts[:ua_type]
37
- body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
38
- body[:cookie] = opts[:cookie] if opts[:cookie]
39
-
40
- params = @options.merge({body: body.to_json})
41
-
42
- self.class.post("/jobs/#{job_id}/pages", params)
43
- end
44
-
45
- def parsing_update(job_id, gid, opts={})
46
- body = {}
47
- body[:outputs] = opts.fetch(:outputs) {[]}
48
- body[:pages] = opts.fetch(:pages) {[]}
49
- body[:parsing_status] = opts.fetch(:parsing_status){ nil }
50
- body[:log_error] = opts[:log_error] if opts[:log_error]
51
-
52
- params = @options.merge({body: body.to_json})
53
-
54
- self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", params)
55
- end
56
- end
57
- end
58
- end
@@ -1,16 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class JobStat < AnswersEngine::Client::Base
4
-
5
- def job_current_stats(job_id)
6
- self.class.get("/jobs/#{job_id}/stats/current", @options)
7
- end
8
-
9
- def scraper_job_current_stats(scraper_name)
10
- self.class.get("/scrapers/#{scraper_name}/current_job/stats/current", @options)
11
- end
12
-
13
- end
14
- end
15
- end
16
-
@@ -1,57 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class Scraper < AnswersEngine::Client::Base
4
-
5
- def find(scraper_name)
6
- self.class.get("/scrapers/#{scraper_name}", @options)
7
- end
8
-
9
- def all(opts={})
10
- params = @options.merge opts
11
- self.class.get("/scrapers", params)
12
- end
13
-
14
- def create(scraper_name, git_repository, opts={})
15
- body = {}
16
- body[:name] = scraper_name
17
- body[:git_repository] = git_repository
18
- body[:git_branch] = opts[:branch] || opts[:git_branch] || "master" if opts[:branch] || opts[:git_branch]
19
- body[:freshness_type] = opts[:freshness_type] if opts[:freshness_type]
20
- body[:force_fetch] = opts[:force_fetch] if opts[:force_fetch]
21
- body[:standard_worker_count] = opts[:workers] || opts[:standard_worker_count] if opts[:workers] || opts[:standard_worker_count]
22
- body[:browser_worker_count] = opts[:browsers] || opts[:browser_worker_count] if opts[:browsers] || opts[:browser_worker_count]
23
- body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
24
- body[:disable_scheduler] = opts[:disable_scheduler] if opts[:disable_scheduler]
25
- body[:cancel_current_job] = opts[:cancel_current_job] if opts[:cancel_current_job]
26
- body[:schedule] = opts[:schedule] if opts[:schedule]
27
- body[:timezone] = opts[:timezone] if opts[:timezone]
28
- params = @options.merge({body: body.to_json})
29
- self.class.post("/scrapers", params)
30
- end
31
-
32
- def update(scraper_name, opts={})
33
- body = {}
34
- body[:name] = opts[:name] if opts[:name]
35
- body[:git_repository] = opts[:repo] || opts[:git_repository] if opts[:repo] || opts[:git_repository]
36
- body[:git_branch] = opts[:branch] || opts[:git_branch] if opts[:branch] || opts[:git_branch]
37
- body[:freshness_type] = opts[:freshness_type] if opts[:freshness_type]
38
- body[:force_fetch] = opts[:force_fetch] if opts.has_key?("force_fetch") || opts.has_key?(:force_fetch)
39
- body[:standard_worker_count] = opts[:workers] || opts[:standard_worker_count] if opts[:workers] || opts[:standard_worker_count]
40
- body[:browser_worker_count] = opts[:browsers] || opts[:browser_worker_count] if opts[:browsers] || opts[:browser_worker_count]
41
- body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
42
- body[:disable_scheduler] = opts[:disable_scheduler] if opts.has_key?("disable_scheduler") || opts.has_key?(:disable_scheduler)
43
- body[:cancel_current_job] = opts[:cancel_current_job] if opts.has_key?("cancel_current_job") || opts.has_key?(:cancel_current_job)
44
- body[:schedule] = opts[:schedule] if opts[:schedule]
45
- body[:timezone] = opts[:timezone] if opts[:timezone]
46
- params = @options.merge({body: body.to_json})
47
-
48
- self.class.put("/scrapers/#{scraper_name}", params)
49
- end
50
-
51
- def delete(scraper_name, opts={})
52
- params = @options.merge(opts)
53
- self.class.delete("/scrapers/#{scraper_name}", params)
54
- end
55
- end
56
- end
57
- end
@@ -1,18 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class ScraperDeployment < AnswersEngine::Client::Base
4
-
5
- def all(scraper_name, opts={})
6
- params = @options.merge(opts)
7
- self.class.get("/scrapers/#{scraper_name}/deployments", params)
8
- end
9
-
10
-
11
- def deploy(scraper_name, opts={})
12
- params = @options.merge(opts)
13
- self.class.post("/scrapers/#{scraper_name}/deployments", params)
14
- end
15
-
16
- end
17
- end
18
- end
@@ -1,22 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class ScraperExport < AnswersEngine::Client::Base
4
- def all(scraper_name, opts={})
5
- params = @options.merge(opts)
6
- self.class.get("/scrapers/#{scraper_name}/exports", params)
7
- end
8
-
9
- def find(export_id)
10
- self.class.get("/scrapers/exports/#{export_id}", @options)
11
- end
12
-
13
- def create(scraper_name, exporter_name)
14
- self.class.post("/scrapers/#{scraper_name}/exports/#{exporter_name}", @options)
15
- end
16
-
17
- def download(export_id)
18
- self.class.get("/scrapers/exports/#{export_id}/download", @options)
19
- end
20
- end
21
- end
22
- end
@@ -1,14 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class ScraperExporter < AnswersEngine::Client::Base
4
- def all(scraper_name, opts={})
5
- params = @options.merge(opts)
6
- self.class.get("/scrapers/#{scraper_name}/exporters", params)
7
- end
8
-
9
- def find(scraper_name, exporter_name)
10
- self.class.get("/scrapers/#{scraper_name}/exporters/#{exporter_name}", @options)
11
- end
12
- end
13
- end
14
- end
@@ -1,16 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class ScraperFinisher < AnswersEngine::Client::Base
4
- # Reset finisher on a scraper's current job.
5
- #
6
- # @param [String] scraper_name Scraper name.
7
- # @param [Hash] opts ({}) API custom parameters.
8
- #
9
- # @return [HTTParty::Response]
10
- def reset(scraper_name, opts={})
11
- params = @options.merge(opts)
12
- self.class.put("/scrapers/#{scraper_name}/current_job/finisher/reset", params)
13
- end
14
- end
15
- end
16
- end
@@ -1,49 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class ScraperJob < AnswersEngine::Client::Base
4
- def all(scraper_name, opts={})
5
- params = @options.merge(opts)
6
- self.class.get("/scrapers/#{scraper_name}/jobs", params)
7
- end
8
-
9
- def create(scraper_name, opts={})
10
- body = {}
11
- body[:standard_worker_count] = opts[:workers] if opts[:workers]
12
- body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
13
- body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
14
- params = @options.merge({body: body.to_json})
15
- self.class.post("/scrapers/#{scraper_name}/jobs", params)
16
- end
17
-
18
- def find(scraper_name)
19
- self.class.get("/scrapers/#{scraper_name}/current_job", @options)
20
- end
21
-
22
- def update(scraper_name, opts={})
23
- body = {}
24
- body[:status] = opts[:status] if opts[:status]
25
- body[:standard_worker_count] = opts[:workers] if opts[:workers]
26
- body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
27
- body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
28
- params = @options.merge({body: body.to_json})
29
-
30
- self.class.put("/scrapers/#{scraper_name}/current_job", params)
31
- end
32
-
33
- def cancel(scraper_name, opts={})
34
- opts[:status] = 'cancelled'
35
- update(scraper_name, opts)
36
- end
37
-
38
- def resume(scraper_name, opts={})
39
- opts[:status] = 'active'
40
- update(scraper_name, opts)
41
- end
42
-
43
- def pause(scraper_name, opts={})
44
- opts[:status] = 'paused'
45
- update(scraper_name, opts)
46
- end
47
- end
48
- end
49
- end
@@ -1,19 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class ScraperJobOutput < AnswersEngine::Client::Base
4
- def find(scraper_name, collection, id)
5
- self.class.get("/scrapers/#{scraper_name}/current_job/output/collections/#{collection}/records/#{id}", @options)
6
- end
7
-
8
- def all(scraper_name, collection = 'default')
9
-
10
- self.class.get("/scrapers/#{scraper_name}/current_job/output/collections/#{collection}/records", @options)
11
- end
12
-
13
- def collections(scraper_name)
14
- self.class.get("/scrapers/#{scraper_name}/current_job/output/collections", @options)
15
- end
16
- end
17
- end
18
- end
19
-
@@ -1,67 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class ScraperJobPage < AnswersEngine::Client::Base
4
- def find(scraper_name, gid)
5
- self.class.get("/scrapers/#{scraper_name}/current_job/pages/#{gid}", @options)
6
- end
7
-
8
- def all(scraper_name, opts={})
9
- params = @options.merge(opts)
10
- self.class.get("/scrapers/#{scraper_name}/current_job/pages", params)
11
- end
12
-
13
- def update(scraper_name, gid, opts={})
14
- body = {}
15
- body[:page_type] = opts[:page_type] if opts[:page_type]
16
- body[:priority] = opts[:priority] if opts[:priority]
17
- body[:vars] = opts[:vars] if opts[:vars]
18
-
19
- params = @options.merge({body: body.to_json})
20
-
21
- self.class.put("/scrapers/#{scraper_name}/current_job/pages/#{gid}", params)
22
- end
23
-
24
- def refetch(scraper_name, opts={})
25
- params = @options.merge(opts)
26
- self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", params)
27
- end
28
-
29
- def refetch_by_job(job_id, opts={})
30
- params = @options.merge(opts)
31
- self.class.put("/jobs/#{job_id}/pages/refetch", params)
32
- end
33
-
34
- def reparse(scraper_name, opts={})
35
- params = @options.merge(opts)
36
- self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", params)
37
- end
38
-
39
- def reparse_by_job(job_id, opts={})
40
- params = @options.merge(opts)
41
- self.class.put("/jobs/#{job_id}/pages/reparse", params)
42
- end
43
-
44
- def enqueue(scraper_name, method, url, opts={})
45
- body = {}
46
- body[:method] = method != "" ? method : "GET"
47
- body[:url] = url
48
- body[:page_type] = opts[:page_type] if opts[:page_type]
49
- body[:priority] = opts[:priority] if opts[:priority]
50
- body[:fetch_type] = opts[:fetch_type] if opts[:fetch_type]
51
- body[:body] = opts[:body] if opts[:body]
52
- body[:headers] = opts[:headers] if opts[:headers]
53
- body[:vars] = opts[:vars] if opts[:vars]
54
- body[:force_fetch] = opts[:force_fetch] if opts[:force_fetch]
55
- body[:freshness] = opts[:freshness] if opts[:freshness]
56
- body[:ua_type] = opts[:ua_type] if opts[:ua_type]
57
- body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
58
- body[:cookie] = opts[:cookie] if opts[:cookie]
59
-
60
- params = @options.merge({body: body.to_json})
61
-
62
- self.class.post("/scrapers/#{scraper_name}/current_job/pages", params)
63
- end
64
-
65
- end
66
- end
67
- end
@@ -1,28 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class ScraperJobVar < AnswersEngine::Client::Base
4
-
5
- def find(scraper_name, var_name)
6
- self.class.get("/scrapers/#{scraper_name}/current_job/vars/#{var_name}", @options)
7
- end
8
-
9
- def all(scraper_name, opts={})
10
- params = @options.merge opts
11
- self.class.get("/scrapers/#{scraper_name}/current_job/vars", params)
12
- end
13
-
14
- def set(scraper_name, var_name, value, opts={})
15
- body = {}
16
- body[:value] = value
17
- body[:secret] = opts[:secret] if opts[:secret]
18
- params = @options.merge({body: body.to_json})
19
- self.class.put("/scrapers/#{scraper_name}/current_job/vars/#{var_name}", params)
20
- end
21
-
22
- def unset(scraper_name, var_name, opts={})
23
- params = @options.merge(opts)
24
- self.class.delete("/scrapers/#{scraper_name}/current_job/vars/#{var_name}", params)
25
- end
26
- end
27
- end
28
- end
@@ -1,28 +0,0 @@
1
- module AnswersEngine
2
- module Client
3
- class ScraperVar < AnswersEngine::Client::Base
4
-
5
- def find(scraper_name, var_name)
6
- self.class.get("/scrapers/#{scraper_name}/vars/#{var_name}", @options)
7
- end
8
-
9
- def all(scraper_name, opts={})
10
- params = @options.merge opts
11
- self.class.get("/scrapers/#{scraper_name}/vars", params)
12
- end
13
-
14
- def set(scraper_name, var_name, value, opts={})
15
- body = {}
16
- body[:value] = value
17
- body[:secret] = opts[:secret] if opts[:secret]
18
- params = @options.merge({body: body.to_json})
19
- self.class.put("/scrapers/#{scraper_name}/vars/#{var_name}", params)
20
- end
21
-
22
- def unset(scraper_name, var_name, opts={})
23
- params = @options.merge(opts)
24
- self.class.delete("/scrapers/#{scraper_name}/vars/#{var_name}", params)
25
- end
26
- end
27
- end
28
- end
@@ -1,6 +0,0 @@
1
- require 'answersengine/plugin/context_exposer'
2
-
3
- module AnswersEngine
4
- module Plugin
5
- end
6
- end
@@ -1,55 +0,0 @@
1
- module AnswersEngine
2
- module Plugin
3
- module ContextExposer
4
- def self.exposed_methods
5
- raise NotImplementedError.new('Specify methods exposed to isolated env')
6
- end
7
-
8
- def exposed_methods
9
- self.class.exposed_methods
10
- end
11
-
12
- # Create lambda to retrieve a variable or call instance method
13
- def var_or_proc vars, key
14
- myself = self # Avoid stack overflow
15
- return lambda{vars[key]} if vars.has_key?(key)
16
- lambda{|*args| myself.send(key, *args)}
17
- end
18
-
19
- def exposed_env vars
20
- keys = exposed_methods + vars.keys
21
- Hash[keys.uniq.map{|key|[key, var_or_proc(vars, key)]}]
22
- end
23
-
24
- def expose_to object, env
25
- metaclass = class << object; self; end
26
- env.each do |key, block|
27
- metaclass.send(:define_method, key, block)
28
- end
29
- object
30
- end
31
-
32
- # Create isolated context object from self
33
- def create_context vars = {}
34
- create_top_object_script = '(
35
- lambda do
36
- object = Object.new
37
- metaclass = class << object
38
- define_method(:context_binding){binding}
39
- end
40
- object
41
- end
42
- ).call'
43
- object = TOPLEVEL_BINDING.eval(create_top_object_script)
44
- env = exposed_env(vars)
45
- expose_to object, env
46
- object
47
- end
48
-
49
- # Create an isolated binding
50
- def isolated_binding vars = {}
51
- create_context(vars).context_binding
52
- end
53
- end
54
- end
55
- end