answersengine 0.10.1 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/CODE_OF_CONDUCT.md +1 -1
  3. data/LICENSE.txt +1 -1
  4. data/README.md +3 -4
  5. data/answersengine.gemspec +6 -12
  6. data/exe/answersengine +3 -2
  7. data/lib/answersengine.rb +20 -3
  8. metadata +14 -152
  9. data/examples/fetchtest/libraries/hello.rb +0 -9
  10. data/examples/fetchtest/libraries/hello_fail.rb +0 -10
  11. data/examples/fetchtest/parsers/failed.rb +0 -2
  12. data/examples/fetchtest/parsers/find_outputs.rb +0 -18
  13. data/examples/fetchtest/parsers/home.rb +0 -50
  14. data/examples/fetchtest/parsers/nested_fail.rb +0 -3
  15. data/examples/fetchtest/parsers/simple.rb +0 -14
  16. data/examples/fetchtest/seeders/csv_seeder.rb +0 -12
  17. data/examples/fetchtest/seeders/failed.rb +0 -1
  18. data/examples/fetchtest/seeders/list_of_urls.csv +0 -5
  19. data/examples/fetchtest/seeders/seed.rb +0 -28
  20. data/examples/fetchtest/seeders/test_reset_page.rb +0 -4
  21. data/lib/answersengine/cli.rb +0 -45
  22. data/lib/answersengine/cli/env_var.rb +0 -48
  23. data/lib/answersengine/cli/finisher.rb +0 -40
  24. data/lib/answersengine/cli/global_page.rb +0 -39
  25. data/lib/answersengine/cli/job.rb +0 -30
  26. data/lib/answersengine/cli/job_output.rb +0 -69
  27. data/lib/answersengine/cli/parser.rb +0 -64
  28. data/lib/answersengine/cli/scraper.rb +0 -185
  29. data/lib/answersengine/cli/scraper_deployment.rb +0 -24
  30. data/lib/answersengine/cli/scraper_export.rb +0 -51
  31. data/lib/answersengine/cli/scraper_exporter.rb +0 -40
  32. data/lib/answersengine/cli/scraper_finisher.rb +0 -20
  33. data/lib/answersengine/cli/scraper_job.rb +0 -75
  34. data/lib/answersengine/cli/scraper_job_var.rb +0 -48
  35. data/lib/answersengine/cli/scraper_page.rb +0 -203
  36. data/lib/answersengine/cli/scraper_var.rb +0 -48
  37. data/lib/answersengine/cli/seeder.rb +0 -40
  38. data/lib/answersengine/client.rb +0 -29
  39. data/lib/answersengine/client/auth_token.rb +0 -50
  40. data/lib/answersengine/client/backblaze_content.rb +0 -45
  41. data/lib/answersengine/client/base.rb +0 -55
  42. data/lib/answersengine/client/deploy_key.rb +0 -21
  43. data/lib/answersengine/client/env_var.rb +0 -28
  44. data/lib/answersengine/client/export.rb +0 -10
  45. data/lib/answersengine/client/global_page.rb +0 -18
  46. data/lib/answersengine/client/job.rb +0 -64
  47. data/lib/answersengine/client/job_export.rb +0 -10
  48. data/lib/answersengine/client/job_log.rb +0 -26
  49. data/lib/answersengine/client/job_output.rb +0 -19
  50. data/lib/answersengine/client/job_page.rb +0 -58
  51. data/lib/answersengine/client/job_stat.rb +0 -16
  52. data/lib/answersengine/client/scraper.rb +0 -57
  53. data/lib/answersengine/client/scraper_deployment.rb +0 -18
  54. data/lib/answersengine/client/scraper_export.rb +0 -22
  55. data/lib/answersengine/client/scraper_exporter.rb +0 -14
  56. data/lib/answersengine/client/scraper_finisher.rb +0 -16
  57. data/lib/answersengine/client/scraper_job.rb +0 -49
  58. data/lib/answersengine/client/scraper_job_output.rb +0 -19
  59. data/lib/answersengine/client/scraper_job_page.rb +0 -67
  60. data/lib/answersengine/client/scraper_job_var.rb +0 -28
  61. data/lib/answersengine/client/scraper_var.rb +0 -28
  62. data/lib/answersengine/plugin.rb +0 -6
  63. data/lib/answersengine/plugin/context_exposer.rb +0 -55
  64. data/lib/answersengine/scraper.rb +0 -18
  65. data/lib/answersengine/scraper/executor.rb +0 -373
  66. data/lib/answersengine/scraper/finisher.rb +0 -18
  67. data/lib/answersengine/scraper/parser.rb +0 -18
  68. data/lib/answersengine/scraper/ruby_finisher_executor.rb +0 -116
  69. data/lib/answersengine/scraper/ruby_parser_executor.rb +0 -200
  70. data/lib/answersengine/scraper/ruby_seeder_executor.rb +0 -120
  71. data/lib/answersengine/scraper/seeder.rb +0 -18
  72. data/lib/answersengine/version.rb +0 -3
@@ -1,200 +0,0 @@
1
- module AnswersEngine
2
- module Scraper
3
- class RubyParserExecutor < Executor
4
- attr_accessor :save
5
- # Refetch self page flag.
6
- # @return [Boollean]
7
- # @note It is stronger than #reparse_self flag.
8
- attr_accessor :refetch_self
9
- # Reparse self page flag.
10
- # @return [Boollean]
11
- attr_accessor :reparse_self
12
-
13
- def initialize(options={})
14
- @filename = options.fetch(:filename) { raise "Filename is required"}
15
- @gid = options.fetch(:gid) { raise "GID is required"}
16
- @job_id = options.fetch(:job_id)
17
- @page_vars = options.fetch(:vars) { {} }
18
- end
19
-
20
- def self.exposed_methods
21
- [
22
- :content,
23
- :failed_content,
24
- :outputs,
25
- :pages,
26
- :page,
27
- :save_pages,
28
- :save_outputs,
29
- :find_output,
30
- :find_outputs,
31
- :refetch,
32
- :reparse
33
- ].freeze
34
- end
35
-
36
- def exec_parser(save=false)
37
- @save = save
38
- if save
39
- puts "Executing parser script"
40
- else
41
- puts "Trying parser script"
42
- end
43
-
44
- eval_parser_script(save)
45
- end
46
-
47
- def init_page_vars(page)
48
- if !@page_vars.nil? && !@page_vars.empty?
49
- page['vars'] = @page_vars
50
- end
51
- page
52
- end
53
-
54
- def update_to_server(opts = {})
55
- parsing_update(
56
- job_id: opts[:job_id],
57
- gid: opts[:gid],
58
- pages: opts[:pages],
59
- outputs: opts[:outputs],
60
- parsing_status: opts[:status])
61
- end
62
-
63
- def update_parsing_starting_status
64
- return unless save
65
-
66
- response = parsing_update(
67
- job_id: job_id,
68
- gid: gid,
69
- parsing_status: :starting)
70
-
71
- if response.code == 200
72
- puts "Page Parsing Status Updated."
73
- else
74
- puts "Error: Unable to save Page Parsing Status to server: #{response.body}"
75
- raise "Unable to save Page Parsing Status to server: #{response.body}"
76
- end
77
- end
78
-
79
- def update_parsing_done_status
80
- return unless save
81
-
82
- response = parsing_update(
83
- job_id: job_id,
84
- gid: gid,
85
- parsing_status: :done)
86
-
87
- if response.code == 200
88
- puts "Page Parsing Done."
89
- else
90
- puts "Error: Unable to save Page Parsing Done Status to server: #{response.body}"
91
- raise "Unable to save Page Parsing Done Status to server: #{response.body}"
92
- end
93
- end
94
-
95
- def save_type
96
- :parsing
97
- end
98
-
99
- def refetch_page gid
100
- if save
101
- Client::ScraperJobPage.new({gid: gid}).refetch_by_job(self.job_id)
102
- puts "Refetch page #{gid}"
103
- else
104
- puts "Would have refetch page #{gid}"
105
- end
106
- end
107
-
108
- def refetch page_gid
109
- raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
110
- if page_gid == gid
111
- self.refetch_self = true
112
- return
113
- end
114
- refetch_page page_gid
115
- end
116
-
117
- def reparse_page gid
118
- if save
119
- Client::ScraperJobPage.new({gid: gid}).reparse_by_job(self.job_id)
120
- puts "Reparse page #{gid}"
121
- else
122
- puts "Would have reparse page #{gid}"
123
- end
124
- end
125
-
126
- def reparse page_gid
127
- raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
128
- if page_gid == gid
129
- self.reparse_self = true
130
- return
131
- end
132
- reparse_page page_gid
133
- end
134
-
135
- def eval_parser_script(save=false)
136
- update_parsing_starting_status
137
-
138
- proc = Proc.new do
139
- page = init_page
140
- outputs = []
141
- pages = []
142
- page = init_page_vars(page)
143
- self.refetch_self = false
144
- self.reparse_self = false
145
-
146
- begin
147
- context = isolated_binding({
148
- outputs: outputs,
149
- pages: pages,
150
- page: page
151
- })
152
- eval_with_context filename, context
153
- rescue SyntaxError => e
154
- handle_error(e) if save
155
- raise e
156
- rescue => e
157
- handle_error(e) if save
158
- raise e
159
- end
160
-
161
- puts "=========== Parsing Executed ==========="
162
- begin
163
- save_pages_and_outputs(pages, outputs, :parsing)
164
- rescue => e
165
- handle_error(e) if save
166
- raise e
167
- end
168
-
169
- if refetch_self
170
- refetch_page gid
171
- elsif reparse_self
172
- reparse_page gid
173
- else
174
- update_parsing_done_status
175
- end
176
- end
177
- proc.call
178
- end
179
-
180
- def content
181
- @content ||= get_content(gid)
182
- end
183
-
184
- def failed_content
185
- @failed_content ||= get_failed_content(gid)
186
- end
187
-
188
- def handle_error(e)
189
- error = ["Parsing #{e.class}: #{e.to_s} (Job:#{job_id} GID:#{gid})",clean_backtrace(e.backtrace)].join("\n")
190
-
191
- parsing_update(
192
- job_id: job_id,
193
- gid: gid,
194
- parsing_status: :failed,
195
- log_error: error)
196
- end
197
-
198
- end
199
- end
200
- end
@@ -1,120 +0,0 @@
1
- module AnswersEngine
2
- module Scraper
3
- class RubySeederExecutor < Executor
4
- attr_accessor :save
5
-
6
- def initialize(options={})
7
- @filename = options.fetch(:filename) { raise "Filename is required"}
8
- @job_id = options[:job_id]
9
- end
10
-
11
- def self.exposed_methods
12
- [
13
- :outputs,
14
- :pages,
15
- :save_pages,
16
- :save_outputs,
17
- :find_output,
18
- :find_outputs
19
- ].freeze
20
- end
21
-
22
- def exec_seeder(save=false)
23
- @save = save
24
- if save
25
- puts "Executing seeder script"
26
- else
27
- puts "Trying seeder script"
28
- end
29
-
30
- eval_seeder_script(save)
31
- end
32
-
33
- def eval_seeder_script(save=false)
34
- update_seeding_starting_status
35
-
36
- proc = Proc.new do
37
- outputs = []
38
- pages = []
39
-
40
- begin
41
- context = isolated_binding({
42
- outputs: outputs,
43
- pages: pages
44
- })
45
- eval_with_context filename, context
46
- rescue SyntaxError => e
47
- handle_error(e) if save
48
- raise e
49
- rescue => e
50
- handle_error(e) if save
51
- raise e
52
- end
53
-
54
- puts "=========== Seeding Executed ==========="
55
- begin
56
- save_pages_and_outputs(pages, outputs, :seeding)
57
- rescue => e
58
- handle_error(e) if save
59
- raise e
60
- end
61
-
62
- update_seeding_done_status
63
- end
64
- proc.call
65
- end
66
-
67
- def save_type
68
- :seeding
69
- end
70
-
71
- def update_to_server(opts = {})
72
- seeding_update(
73
- job_id: opts[:job_id],
74
- pages: opts[:pages],
75
- outputs: opts[:outputs],
76
- seeding_status: opts[:status])
77
- end
78
-
79
- def update_seeding_starting_status
80
- return unless save
81
-
82
- response = seeding_update(
83
- job_id: job_id,
84
- seeding_status: :starting)
85
-
86
- if response.code == 200
87
- puts "Seeding Status Updated."
88
- else
89
- puts "Error: Unable to save Seeding Status to server: #{response.body}"
90
- raise "Unable to save Seeding Status to server: #{response.body}"
91
- end
92
- end
93
-
94
- def update_seeding_done_status
95
- return unless save
96
-
97
- response = seeding_update(
98
- job_id: job_id,
99
- seeding_status: :done)
100
-
101
- if response.code == 200
102
- puts "Seeding Done."
103
- else
104
- puts "Error: Unable to save Seeding Done Status to server: #{response.body}"
105
- raise "Unable to save Seeding Done Status to server: #{response.body}"
106
- end
107
- end
108
-
109
- def handle_error(e)
110
- error = ["Seeding #{e.class}: #{e.to_s} (Job:#{job_id}",clean_backtrace(e.backtrace)].join("\n")
111
-
112
- seeding_update(
113
- job_id: job_id,
114
- seeding_status: :failed,
115
- log_error: error)
116
- end
117
-
118
- end
119
- end
120
- end
@@ -1,18 +0,0 @@
1
- module AnswersEngine
2
- module Scraper
3
- class Seeder
4
-
5
- def self.exec_seeder(filename, job_id=nil, save=false)
6
- extname = File.extname(filename)
7
- case extname
8
- when '.rb'
9
- executor = RubySeederExecutor.new(filename: filename, job_id: job_id)
10
- executor.exec_seeder(save)
11
- else
12
- puts "Unable to find a seeder executor for file type \"#{extname}\""
13
- end
14
- end
15
-
16
- end
17
- end
18
- end
@@ -1,3 +0,0 @@
1
- module AnswersEngine
2
- VERSION = "0.10.1"
3
- end