answersengine 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1025b7622002c5eb5fef3328f51171e574404f8c54d2c334a88a9a432a02e34
|
4
|
+
data.tar.gz: ce8c68dbeecd1c6151fc294395b59051cac3584419012991d31d184204160732
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a0708e1189d35a1b3b3b8a19ad98340dc461a88079b7e5587f3622183e6235531299ceef7e42931e958d6ac62c17778183f7d26e730c01df5c83b6eab364b18b
|
7
|
+
data.tar.gz: 97085efab1572619ef43d7014da56d768686b72a4c60ae00bdea516a619832444370d0a8024678f9896111d9eedd17d5ef4679e4c2a524c7b9918664f4557b4f
|
@@ -26,11 +26,21 @@ module AnswersEngine
|
|
26
26
|
self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", params)
|
27
27
|
end
|
28
28
|
|
29
|
+
def refetch_by_job(job_id, opts={})
|
30
|
+
params = @options.merge(opts)
|
31
|
+
self.class.put("/jobs/#{job_id}/pages/refetch", params)
|
32
|
+
end
|
33
|
+
|
29
34
|
def reparse(scraper_name, opts={})
|
30
35
|
params = @options.merge(opts)
|
31
36
|
self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", params)
|
32
37
|
end
|
33
38
|
|
39
|
+
def reparse_by_job(job_id, opts={})
|
40
|
+
params = @options.merge(opts)
|
41
|
+
self.class.put("/jobs/#{job_id}/pages/reparse", params)
|
42
|
+
end
|
43
|
+
|
34
44
|
def enqueue(scraper_name, method, url, opts={})
|
35
45
|
body = {}
|
36
46
|
body[:method] = method != "" ? method : "GET"
|
@@ -2,6 +2,13 @@ module AnswersEngine
|
|
2
2
|
module Scraper
|
3
3
|
class RubyParserExecutor < Executor
|
4
4
|
attr_accessor :save
|
5
|
+
# Refetch self page flag.
|
6
|
+
# @return [Boollean]
|
7
|
+
# @note It is stronger than #reparse_self flag.
|
8
|
+
attr_accessor :refetch_self
|
9
|
+
# Reparse self page flag.
|
10
|
+
# @return [Boollean]
|
11
|
+
attr_accessor :reparse_self
|
5
12
|
|
6
13
|
def initialize(options={})
|
7
14
|
@filename = options.fetch(:filename) { raise "Filename is required"}
|
@@ -20,7 +27,9 @@ module AnswersEngine
|
|
20
27
|
:save_pages,
|
21
28
|
:save_outputs,
|
22
29
|
:find_output,
|
23
|
-
:find_outputs
|
30
|
+
:find_outputs,
|
31
|
+
:refetch,
|
32
|
+
:reparse
|
24
33
|
].freeze
|
25
34
|
end
|
26
35
|
|
@@ -87,6 +96,42 @@ module AnswersEngine
|
|
87
96
|
:parsing
|
88
97
|
end
|
89
98
|
|
99
|
+
def refetch_page gid
|
100
|
+
if save
|
101
|
+
Client::ScraperJobPage.new({gid: gid}).refetch_by_job(self.job_id)
|
102
|
+
puts "Refetch page #{gid}"
|
103
|
+
else
|
104
|
+
puts "Would have refetch page #{gid}"
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def refetch page_gid
|
109
|
+
raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
|
110
|
+
if page_gid == gid
|
111
|
+
self.refetch_self = true
|
112
|
+
return
|
113
|
+
end
|
114
|
+
refetch_page page_gid
|
115
|
+
end
|
116
|
+
|
117
|
+
def reparse_page gid
|
118
|
+
if save
|
119
|
+
Client::ScraperJobPage.new({gid: gid}).reparse_by_job(self.job_id)
|
120
|
+
puts "Reparse page #{gid}"
|
121
|
+
else
|
122
|
+
puts "Would have reparse page #{gid}"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def reparse page_gid
|
127
|
+
raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
|
128
|
+
if page_gid == gid
|
129
|
+
self.reparse_self = true
|
130
|
+
return
|
131
|
+
end
|
132
|
+
reparse_page page_gid
|
133
|
+
end
|
134
|
+
|
90
135
|
def eval_parser_script(save=false)
|
91
136
|
update_parsing_starting_status
|
92
137
|
|
@@ -95,6 +140,8 @@ module AnswersEngine
|
|
95
140
|
outputs = []
|
96
141
|
pages = []
|
97
142
|
page = init_page_vars(page)
|
143
|
+
self.refetch_self = false
|
144
|
+
self.reparse_self = false
|
98
145
|
|
99
146
|
begin
|
100
147
|
context = isolated_binding({
|
@@ -113,7 +160,13 @@ module AnswersEngine
|
|
113
160
|
|
114
161
|
puts "=========== Parsing Executed ==========="
|
115
162
|
save_pages_and_outputs(pages, outputs, :parsing)
|
116
|
-
|
163
|
+
if refetch_self
|
164
|
+
refetch_page gid
|
165
|
+
elsif reparse_self
|
166
|
+
reparse_page gid
|
167
|
+
else
|
168
|
+
update_parsing_done_status
|
169
|
+
end
|
117
170
|
end
|
118
171
|
proc.call
|
119
172
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: answersengine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|