answersengine 0.10.1 → 0.10.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/CODE_OF_CONDUCT.md +1 -1
  3. data/LICENSE.txt +1 -1
  4. data/README.md +3 -4
  5. data/answersengine.gemspec +6 -12
  6. data/exe/answersengine +3 -2
  7. data/lib/answersengine.rb +20 -3
  8. metadata +14 -152
  9. data/examples/fetchtest/libraries/hello.rb +0 -9
  10. data/examples/fetchtest/libraries/hello_fail.rb +0 -10
  11. data/examples/fetchtest/parsers/failed.rb +0 -2
  12. data/examples/fetchtest/parsers/find_outputs.rb +0 -18
  13. data/examples/fetchtest/parsers/home.rb +0 -50
  14. data/examples/fetchtest/parsers/nested_fail.rb +0 -3
  15. data/examples/fetchtest/parsers/simple.rb +0 -14
  16. data/examples/fetchtest/seeders/csv_seeder.rb +0 -12
  17. data/examples/fetchtest/seeders/failed.rb +0 -1
  18. data/examples/fetchtest/seeders/list_of_urls.csv +0 -5
  19. data/examples/fetchtest/seeders/seed.rb +0 -28
  20. data/examples/fetchtest/seeders/test_reset_page.rb +0 -4
  21. data/lib/answersengine/cli.rb +0 -45
  22. data/lib/answersengine/cli/env_var.rb +0 -48
  23. data/lib/answersengine/cli/finisher.rb +0 -40
  24. data/lib/answersengine/cli/global_page.rb +0 -39
  25. data/lib/answersengine/cli/job.rb +0 -30
  26. data/lib/answersengine/cli/job_output.rb +0 -69
  27. data/lib/answersengine/cli/parser.rb +0 -64
  28. data/lib/answersengine/cli/scraper.rb +0 -185
  29. data/lib/answersengine/cli/scraper_deployment.rb +0 -24
  30. data/lib/answersengine/cli/scraper_export.rb +0 -51
  31. data/lib/answersengine/cli/scraper_exporter.rb +0 -40
  32. data/lib/answersengine/cli/scraper_finisher.rb +0 -20
  33. data/lib/answersengine/cli/scraper_job.rb +0 -75
  34. data/lib/answersengine/cli/scraper_job_var.rb +0 -48
  35. data/lib/answersengine/cli/scraper_page.rb +0 -203
  36. data/lib/answersengine/cli/scraper_var.rb +0 -48
  37. data/lib/answersengine/cli/seeder.rb +0 -40
  38. data/lib/answersengine/client.rb +0 -29
  39. data/lib/answersengine/client/auth_token.rb +0 -50
  40. data/lib/answersengine/client/backblaze_content.rb +0 -45
  41. data/lib/answersengine/client/base.rb +0 -55
  42. data/lib/answersengine/client/deploy_key.rb +0 -21
  43. data/lib/answersengine/client/env_var.rb +0 -28
  44. data/lib/answersengine/client/export.rb +0 -10
  45. data/lib/answersengine/client/global_page.rb +0 -18
  46. data/lib/answersengine/client/job.rb +0 -64
  47. data/lib/answersengine/client/job_export.rb +0 -10
  48. data/lib/answersengine/client/job_log.rb +0 -26
  49. data/lib/answersengine/client/job_output.rb +0 -19
  50. data/lib/answersengine/client/job_page.rb +0 -58
  51. data/lib/answersengine/client/job_stat.rb +0 -16
  52. data/lib/answersengine/client/scraper.rb +0 -57
  53. data/lib/answersengine/client/scraper_deployment.rb +0 -18
  54. data/lib/answersengine/client/scraper_export.rb +0 -22
  55. data/lib/answersengine/client/scraper_exporter.rb +0 -14
  56. data/lib/answersengine/client/scraper_finisher.rb +0 -16
  57. data/lib/answersengine/client/scraper_job.rb +0 -49
  58. data/lib/answersengine/client/scraper_job_output.rb +0 -19
  59. data/lib/answersengine/client/scraper_job_page.rb +0 -67
  60. data/lib/answersengine/client/scraper_job_var.rb +0 -28
  61. data/lib/answersengine/client/scraper_var.rb +0 -28
  62. data/lib/answersengine/plugin.rb +0 -6
  63. data/lib/answersengine/plugin/context_exposer.rb +0 -55
  64. data/lib/answersengine/scraper.rb +0 -18
  65. data/lib/answersengine/scraper/executor.rb +0 -373
  66. data/lib/answersengine/scraper/finisher.rb +0 -18
  67. data/lib/answersengine/scraper/parser.rb +0 -18
  68. data/lib/answersengine/scraper/ruby_finisher_executor.rb +0 -116
  69. data/lib/answersengine/scraper/ruby_parser_executor.rb +0 -200
  70. data/lib/answersengine/scraper/ruby_seeder_executor.rb +0 -120
  71. data/lib/answersengine/scraper/seeder.rb +0 -18
  72. data/lib/answersengine/version.rb +0 -3
@@ -1,200 +0,0 @@
1
- module AnswersEngine
2
- module Scraper
3
- class RubyParserExecutor < Executor
4
- attr_accessor :save
5
- # Refetch self page flag.
6
- # @return [Boollean]
7
- # @note It is stronger than #reparse_self flag.
8
- attr_accessor :refetch_self
9
- # Reparse self page flag.
10
- # @return [Boollean]
11
- attr_accessor :reparse_self
12
-
13
- def initialize(options={})
14
- @filename = options.fetch(:filename) { raise "Filename is required"}
15
- @gid = options.fetch(:gid) { raise "GID is required"}
16
- @job_id = options.fetch(:job_id)
17
- @page_vars = options.fetch(:vars) { {} }
18
- end
19
-
20
- def self.exposed_methods
21
- [
22
- :content,
23
- :failed_content,
24
- :outputs,
25
- :pages,
26
- :page,
27
- :save_pages,
28
- :save_outputs,
29
- :find_output,
30
- :find_outputs,
31
- :refetch,
32
- :reparse
33
- ].freeze
34
- end
35
-
36
- def exec_parser(save=false)
37
- @save = save
38
- if save
39
- puts "Executing parser script"
40
- else
41
- puts "Trying parser script"
42
- end
43
-
44
- eval_parser_script(save)
45
- end
46
-
47
- def init_page_vars(page)
48
- if !@page_vars.nil? && !@page_vars.empty?
49
- page['vars'] = @page_vars
50
- end
51
- page
52
- end
53
-
54
- def update_to_server(opts = {})
55
- parsing_update(
56
- job_id: opts[:job_id],
57
- gid: opts[:gid],
58
- pages: opts[:pages],
59
- outputs: opts[:outputs],
60
- parsing_status: opts[:status])
61
- end
62
-
63
- def update_parsing_starting_status
64
- return unless save
65
-
66
- response = parsing_update(
67
- job_id: job_id,
68
- gid: gid,
69
- parsing_status: :starting)
70
-
71
- if response.code == 200
72
- puts "Page Parsing Status Updated."
73
- else
74
- puts "Error: Unable to save Page Parsing Status to server: #{response.body}"
75
- raise "Unable to save Page Parsing Status to server: #{response.body}"
76
- end
77
- end
78
-
79
- def update_parsing_done_status
80
- return unless save
81
-
82
- response = parsing_update(
83
- job_id: job_id,
84
- gid: gid,
85
- parsing_status: :done)
86
-
87
- if response.code == 200
88
- puts "Page Parsing Done."
89
- else
90
- puts "Error: Unable to save Page Parsing Done Status to server: #{response.body}"
91
- raise "Unable to save Page Parsing Done Status to server: #{response.body}"
92
- end
93
- end
94
-
95
- def save_type
96
- :parsing
97
- end
98
-
99
- def refetch_page gid
100
- if save
101
- Client::ScraperJobPage.new({gid: gid}).refetch_by_job(self.job_id)
102
- puts "Refetch page #{gid}"
103
- else
104
- puts "Would have refetch page #{gid}"
105
- end
106
- end
107
-
108
- def refetch page_gid
109
- raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
110
- if page_gid == gid
111
- self.refetch_self = true
112
- return
113
- end
114
- refetch_page page_gid
115
- end
116
-
117
- def reparse_page gid
118
- if save
119
- Client::ScraperJobPage.new({gid: gid}).reparse_by_job(self.job_id)
120
- puts "Reparse page #{gid}"
121
- else
122
- puts "Would have reparse page #{gid}"
123
- end
124
- end
125
-
126
- def reparse page_gid
127
- raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
128
- if page_gid == gid
129
- self.reparse_self = true
130
- return
131
- end
132
- reparse_page page_gid
133
- end
134
-
135
- def eval_parser_script(save=false)
136
- update_parsing_starting_status
137
-
138
- proc = Proc.new do
139
- page = init_page
140
- outputs = []
141
- pages = []
142
- page = init_page_vars(page)
143
- self.refetch_self = false
144
- self.reparse_self = false
145
-
146
- begin
147
- context = isolated_binding({
148
- outputs: outputs,
149
- pages: pages,
150
- page: page
151
- })
152
- eval_with_context filename, context
153
- rescue SyntaxError => e
154
- handle_error(e) if save
155
- raise e
156
- rescue => e
157
- handle_error(e) if save
158
- raise e
159
- end
160
-
161
- puts "=========== Parsing Executed ==========="
162
- begin
163
- save_pages_and_outputs(pages, outputs, :parsing)
164
- rescue => e
165
- handle_error(e) if save
166
- raise e
167
- end
168
-
169
- if refetch_self
170
- refetch_page gid
171
- elsif reparse_self
172
- reparse_page gid
173
- else
174
- update_parsing_done_status
175
- end
176
- end
177
- proc.call
178
- end
179
-
180
- def content
181
- @content ||= get_content(gid)
182
- end
183
-
184
- def failed_content
185
- @failed_content ||= get_failed_content(gid)
186
- end
187
-
188
- def handle_error(e)
189
- error = ["Parsing #{e.class}: #{e.to_s} (Job:#{job_id} GID:#{gid})",clean_backtrace(e.backtrace)].join("\n")
190
-
191
- parsing_update(
192
- job_id: job_id,
193
- gid: gid,
194
- parsing_status: :failed,
195
- log_error: error)
196
- end
197
-
198
- end
199
- end
200
- end
@@ -1,120 +0,0 @@
1
- module AnswersEngine
2
- module Scraper
3
- class RubySeederExecutor < Executor
4
- attr_accessor :save
5
-
6
- def initialize(options={})
7
- @filename = options.fetch(:filename) { raise "Filename is required"}
8
- @job_id = options[:job_id]
9
- end
10
-
11
- def self.exposed_methods
12
- [
13
- :outputs,
14
- :pages,
15
- :save_pages,
16
- :save_outputs,
17
- :find_output,
18
- :find_outputs
19
- ].freeze
20
- end
21
-
22
- def exec_seeder(save=false)
23
- @save = save
24
- if save
25
- puts "Executing seeder script"
26
- else
27
- puts "Trying seeder script"
28
- end
29
-
30
- eval_seeder_script(save)
31
- end
32
-
33
- def eval_seeder_script(save=false)
34
- update_seeding_starting_status
35
-
36
- proc = Proc.new do
37
- outputs = []
38
- pages = []
39
-
40
- begin
41
- context = isolated_binding({
42
- outputs: outputs,
43
- pages: pages
44
- })
45
- eval_with_context filename, context
46
- rescue SyntaxError => e
47
- handle_error(e) if save
48
- raise e
49
- rescue => e
50
- handle_error(e) if save
51
- raise e
52
- end
53
-
54
- puts "=========== Seeding Executed ==========="
55
- begin
56
- save_pages_and_outputs(pages, outputs, :seeding)
57
- rescue => e
58
- handle_error(e) if save
59
- raise e
60
- end
61
-
62
- update_seeding_done_status
63
- end
64
- proc.call
65
- end
66
-
67
- def save_type
68
- :seeding
69
- end
70
-
71
- def update_to_server(opts = {})
72
- seeding_update(
73
- job_id: opts[:job_id],
74
- pages: opts[:pages],
75
- outputs: opts[:outputs],
76
- seeding_status: opts[:status])
77
- end
78
-
79
- def update_seeding_starting_status
80
- return unless save
81
-
82
- response = seeding_update(
83
- job_id: job_id,
84
- seeding_status: :starting)
85
-
86
- if response.code == 200
87
- puts "Seeding Status Updated."
88
- else
89
- puts "Error: Unable to save Seeding Status to server: #{response.body}"
90
- raise "Unable to save Seeding Status to server: #{response.body}"
91
- end
92
- end
93
-
94
- def update_seeding_done_status
95
- return unless save
96
-
97
- response = seeding_update(
98
- job_id: job_id,
99
- seeding_status: :done)
100
-
101
- if response.code == 200
102
- puts "Seeding Done."
103
- else
104
- puts "Error: Unable to save Seeding Done Status to server: #{response.body}"
105
- raise "Unable to save Seeding Done Status to server: #{response.body}"
106
- end
107
- end
108
-
109
- def handle_error(e)
110
- error = ["Seeding #{e.class}: #{e.to_s} (Job:#{job_id}",clean_backtrace(e.backtrace)].join("\n")
111
-
112
- seeding_update(
113
- job_id: job_id,
114
- seeding_status: :failed,
115
- log_error: error)
116
- end
117
-
118
- end
119
- end
120
- end
@@ -1,18 +0,0 @@
1
- module AnswersEngine
2
- module Scraper
3
- class Seeder
4
-
5
- def self.exec_seeder(filename, job_id=nil, save=false)
6
- extname = File.extname(filename)
7
- case extname
8
- when '.rb'
9
- executor = RubySeederExecutor.new(filename: filename, job_id: job_id)
10
- executor.exec_seeder(save)
11
- else
12
- puts "Unable to find a seeder executor for file type \"#{extname}\""
13
- end
14
- end
15
-
16
- end
17
- end
18
- end
@@ -1,3 +0,0 @@
1
- module AnswersEngine
2
- VERSION = "0.10.1"
3
- end