answersengine 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1025b7622002c5eb5fef3328f51171e574404f8c54d2c334a88a9a432a02e34
|
4
|
+
data.tar.gz: ce8c68dbeecd1c6151fc294395b59051cac3584419012991d31d184204160732
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a0708e1189d35a1b3b3b8a19ad98340dc461a88079b7e5587f3622183e6235531299ceef7e42931e958d6ac62c17778183f7d26e730c01df5c83b6eab364b18b
|
7
|
+
data.tar.gz: 97085efab1572619ef43d7014da56d768686b72a4c60ae00bdea516a619832444370d0a8024678f9896111d9eedd17d5ef4679e4c2a524c7b9918664f4557b4f
|
@@ -26,11 +26,21 @@ module AnswersEngine
|
|
26
26
|
self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", params)
|
27
27
|
end
|
28
28
|
|
29
|
+
def refetch_by_job(job_id, opts={})
|
30
|
+
params = @options.merge(opts)
|
31
|
+
self.class.put("/jobs/#{job_id}/pages/refetch", params)
|
32
|
+
end
|
33
|
+
|
29
34
|
def reparse(scraper_name, opts={})
|
30
35
|
params = @options.merge(opts)
|
31
36
|
self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", params)
|
32
37
|
end
|
33
38
|
|
39
|
+
def reparse_by_job(job_id, opts={})
|
40
|
+
params = @options.merge(opts)
|
41
|
+
self.class.put("/jobs/#{job_id}/pages/reparse", params)
|
42
|
+
end
|
43
|
+
|
34
44
|
def enqueue(scraper_name, method, url, opts={})
|
35
45
|
body = {}
|
36
46
|
body[:method] = method != "" ? method : "GET"
|
@@ -2,6 +2,13 @@ module AnswersEngine
|
|
2
2
|
module Scraper
|
3
3
|
class RubyParserExecutor < Executor
|
4
4
|
attr_accessor :save
|
5
|
+
# Refetch self page flag.
|
6
|
+
# @return [Boollean]
|
7
|
+
# @note It is stronger than #reparse_self flag.
|
8
|
+
attr_accessor :refetch_self
|
9
|
+
# Reparse self page flag.
|
10
|
+
# @return [Boollean]
|
11
|
+
attr_accessor :reparse_self
|
5
12
|
|
6
13
|
def initialize(options={})
|
7
14
|
@filename = options.fetch(:filename) { raise "Filename is required"}
|
@@ -20,7 +27,9 @@ module AnswersEngine
|
|
20
27
|
:save_pages,
|
21
28
|
:save_outputs,
|
22
29
|
:find_output,
|
23
|
-
:find_outputs
|
30
|
+
:find_outputs,
|
31
|
+
:refetch,
|
32
|
+
:reparse
|
24
33
|
].freeze
|
25
34
|
end
|
26
35
|
|
@@ -87,6 +96,42 @@ module AnswersEngine
|
|
87
96
|
:parsing
|
88
97
|
end
|
89
98
|
|
99
|
+
def refetch_page gid
|
100
|
+
if save
|
101
|
+
Client::ScraperJobPage.new({gid: gid}).refetch_by_job(self.job_id)
|
102
|
+
puts "Refetch page #{gid}"
|
103
|
+
else
|
104
|
+
puts "Would have refetch page #{gid}"
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def refetch page_gid
|
109
|
+
raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
|
110
|
+
if page_gid == gid
|
111
|
+
self.refetch_self = true
|
112
|
+
return
|
113
|
+
end
|
114
|
+
refetch_page page_gid
|
115
|
+
end
|
116
|
+
|
117
|
+
def reparse_page gid
|
118
|
+
if save
|
119
|
+
Client::ScraperJobPage.new({gid: gid}).reparse_by_job(self.job_id)
|
120
|
+
puts "Reparse page #{gid}"
|
121
|
+
else
|
122
|
+
puts "Would have reparse page #{gid}"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def reparse page_gid
|
127
|
+
raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
|
128
|
+
if page_gid == gid
|
129
|
+
self.reparse_self = true
|
130
|
+
return
|
131
|
+
end
|
132
|
+
reparse_page page_gid
|
133
|
+
end
|
134
|
+
|
90
135
|
def eval_parser_script(save=false)
|
91
136
|
update_parsing_starting_status
|
92
137
|
|
@@ -95,6 +140,8 @@ module AnswersEngine
|
|
95
140
|
outputs = []
|
96
141
|
pages = []
|
97
142
|
page = init_page_vars(page)
|
143
|
+
self.refetch_self = false
|
144
|
+
self.reparse_self = false
|
98
145
|
|
99
146
|
begin
|
100
147
|
context = isolated_binding({
|
@@ -113,7 +160,13 @@ module AnswersEngine
|
|
113
160
|
|
114
161
|
puts "=========== Parsing Executed ==========="
|
115
162
|
save_pages_and_outputs(pages, outputs, :parsing)
|
116
|
-
|
163
|
+
if refetch_self
|
164
|
+
refetch_page gid
|
165
|
+
elsif reparse_self
|
166
|
+
reparse_page gid
|
167
|
+
else
|
168
|
+
update_parsing_done_status
|
169
|
+
end
|
117
170
|
end
|
118
171
|
proc.call
|
119
172
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: answersengine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|