answersengine 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: be8e1b2d4a1ca60a2b0f1ccc1fd5c00f33aa2f412214d9294e357ec2bfd353f1
4
- data.tar.gz: ec3e082f0b8905a313f43a1d7d6018640c06baa6d06501f62126cab2dbe0af32
3
+ metadata.gz: e1025b7622002c5eb5fef3328f51171e574404f8c54d2c334a88a9a432a02e34
4
+ data.tar.gz: ce8c68dbeecd1c6151fc294395b59051cac3584419012991d31d184204160732
5
5
  SHA512:
6
- metadata.gz: 5dec73a502a3431a5471f02a2b4093d57ed0cd23528a5ea6ec3915ea67031e724471df15d36499e5217a37ad0f32a51a87e7329d095670baedde6f5186ee2f42
7
- data.tar.gz: a2864beba98cdc6660e091649e89fb0756f0edd3528443ba8231ae17d079cd1efa8adab80329271f32b7dc80ab68b0f1ca9d12897dea82ab0284d66bda2566f3
6
+ metadata.gz: a0708e1189d35a1b3b3b8a19ad98340dc461a88079b7e5587f3622183e6235531299ceef7e42931e958d6ac62c17778183f7d26e730c01df5c83b6eab364b18b
7
+ data.tar.gz: 97085efab1572619ef43d7014da56d768686b72a4c60ae00bdea516a619832444370d0a8024678f9896111d9eedd17d5ef4679e4c2a524c7b9918664f4557b4f
@@ -26,11 +26,21 @@ module AnswersEngine
26
26
  self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", params)
27
27
  end
28
28
 
29
+ def refetch_by_job(job_id, opts={})
30
+ params = @options.merge(opts)
31
+ self.class.put("/jobs/#{job_id}/pages/refetch", params)
32
+ end
33
+
29
34
  def reparse(scraper_name, opts={})
30
35
  params = @options.merge(opts)
31
36
  self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", params)
32
37
  end
33
38
 
39
+ def reparse_by_job(job_id, opts={})
40
+ params = @options.merge(opts)
41
+ self.class.put("/jobs/#{job_id}/pages/reparse", params)
42
+ end
43
+
34
44
  def enqueue(scraper_name, method, url, opts={})
35
45
  body = {}
36
46
  body[:method] = method != "" ? method : "GET"
@@ -2,6 +2,13 @@ module AnswersEngine
2
2
  module Scraper
3
3
  class RubyParserExecutor < Executor
4
4
  attr_accessor :save
5
+ # Refetch self page flag.
6
+ # @return [Boollean]
7
+ # @note It is stronger than #reparse_self flag.
8
+ attr_accessor :refetch_self
9
+ # Reparse self page flag.
10
+ # @return [Boollean]
11
+ attr_accessor :reparse_self
5
12
 
6
13
  def initialize(options={})
7
14
  @filename = options.fetch(:filename) { raise "Filename is required"}
@@ -20,7 +27,9 @@ module AnswersEngine
20
27
  :save_pages,
21
28
  :save_outputs,
22
29
  :find_output,
23
- :find_outputs
30
+ :find_outputs,
31
+ :refetch,
32
+ :reparse
24
33
  ].freeze
25
34
  end
26
35
 
@@ -87,6 +96,42 @@ module AnswersEngine
87
96
  :parsing
88
97
  end
89
98
 
99
+ def refetch_page gid
100
+ if save
101
+ Client::ScraperJobPage.new({gid: gid}).refetch_by_job(self.job_id)
102
+ puts "Refetch page #{gid}"
103
+ else
104
+ puts "Would have refetch page #{gid}"
105
+ end
106
+ end
107
+
108
+ def refetch page_gid
109
+ raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
110
+ if page_gid == gid
111
+ self.refetch_self = true
112
+ return
113
+ end
114
+ refetch_page page_gid
115
+ end
116
+
117
+ def reparse_page gid
118
+ if save
119
+ Client::ScraperJobPage.new({gid: gid}).reparse_by_job(self.job_id)
120
+ puts "Reparse page #{gid}"
121
+ else
122
+ puts "Would have reparse page #{gid}"
123
+ end
124
+ end
125
+
126
+ def reparse page_gid
127
+ raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
128
+ if page_gid == gid
129
+ self.reparse_self = true
130
+ return
131
+ end
132
+ reparse_page page_gid
133
+ end
134
+
90
135
  def eval_parser_script(save=false)
91
136
  update_parsing_starting_status
92
137
 
@@ -95,6 +140,8 @@ module AnswersEngine
95
140
  outputs = []
96
141
  pages = []
97
142
  page = init_page_vars(page)
143
+ self.refetch_self = false
144
+ self.reparse_self = false
98
145
 
99
146
  begin
100
147
  context = isolated_binding({
@@ -113,7 +160,13 @@ module AnswersEngine
113
160
 
114
161
  puts "=========== Parsing Executed ==========="
115
162
  save_pages_and_outputs(pages, outputs, :parsing)
116
- update_parsing_done_status
163
+ if refetch_self
164
+ refetch_page gid
165
+ elsif reparse_self
166
+ reparse_page gid
167
+ else
168
+ update_parsing_done_status
169
+ end
117
170
  end
118
171
  proc.call
119
172
  end
@@ -1,3 +1,3 @@
1
1
  module AnswersEngine
2
- VERSION = "0.5.0"
2
+ VERSION = "0.6.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: answersengine
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-08-14 00:00:00.000000000 Z
11
+ date: 2019-08-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor