answersengine 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: be8e1b2d4a1ca60a2b0f1ccc1fd5c00f33aa2f412214d9294e357ec2bfd353f1
4
- data.tar.gz: ec3e082f0b8905a313f43a1d7d6018640c06baa6d06501f62126cab2dbe0af32
3
+ metadata.gz: e1025b7622002c5eb5fef3328f51171e574404f8c54d2c334a88a9a432a02e34
4
+ data.tar.gz: ce8c68dbeecd1c6151fc294395b59051cac3584419012991d31d184204160732
5
5
  SHA512:
6
- metadata.gz: 5dec73a502a3431a5471f02a2b4093d57ed0cd23528a5ea6ec3915ea67031e724471df15d36499e5217a37ad0f32a51a87e7329d095670baedde6f5186ee2f42
7
- data.tar.gz: a2864beba98cdc6660e091649e89fb0756f0edd3528443ba8231ae17d079cd1efa8adab80329271f32b7dc80ab68b0f1ca9d12897dea82ab0284d66bda2566f3
6
+ metadata.gz: a0708e1189d35a1b3b3b8a19ad98340dc461a88079b7e5587f3622183e6235531299ceef7e42931e958d6ac62c17778183f7d26e730c01df5c83b6eab364b18b
7
+ data.tar.gz: 97085efab1572619ef43d7014da56d768686b72a4c60ae00bdea516a619832444370d0a8024678f9896111d9eedd17d5ef4679e4c2a524c7b9918664f4557b4f
@@ -26,11 +26,21 @@ module AnswersEngine
26
26
  self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", params)
27
27
  end
28
28
 
29
+ def refetch_by_job(job_id, opts={})
30
+ params = @options.merge(opts)
31
+ self.class.put("/jobs/#{job_id}/pages/refetch", params)
32
+ end
33
+
29
34
  def reparse(scraper_name, opts={})
30
35
  params = @options.merge(opts)
31
36
  self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", params)
32
37
  end
33
38
 
39
+ def reparse_by_job(job_id, opts={})
40
+ params = @options.merge(opts)
41
+ self.class.put("/jobs/#{job_id}/pages/reparse", params)
42
+ end
43
+
34
44
  def enqueue(scraper_name, method, url, opts={})
35
45
  body = {}
36
46
  body[:method] = method != "" ? method : "GET"
@@ -2,6 +2,13 @@ module AnswersEngine
2
2
  module Scraper
3
3
  class RubyParserExecutor < Executor
4
4
  attr_accessor :save
5
+ # Refetch self page flag.
6
+ # @return [Boollean]
7
+ # @note It is stronger than #reparse_self flag.
8
+ attr_accessor :refetch_self
9
+ # Reparse self page flag.
10
+ # @return [Boollean]
11
+ attr_accessor :reparse_self
5
12
 
6
13
  def initialize(options={})
7
14
  @filename = options.fetch(:filename) { raise "Filename is required"}
@@ -20,7 +27,9 @@ module AnswersEngine
20
27
  :save_pages,
21
28
  :save_outputs,
22
29
  :find_output,
23
- :find_outputs
30
+ :find_outputs,
31
+ :refetch,
32
+ :reparse
24
33
  ].freeze
25
34
  end
26
35
 
@@ -87,6 +96,42 @@ module AnswersEngine
87
96
  :parsing
88
97
  end
89
98
 
99
+ def refetch_page gid
100
+ if save
101
+ Client::ScraperJobPage.new({gid: gid}).refetch_by_job(self.job_id)
102
+ puts "Refetch page #{gid}"
103
+ else
104
+ puts "Would have refetch page #{gid}"
105
+ end
106
+ end
107
+
108
+ def refetch page_gid
109
+ raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
110
+ if page_gid == gid
111
+ self.refetch_self = true
112
+ return
113
+ end
114
+ refetch_page page_gid
115
+ end
116
+
117
+ def reparse_page gid
118
+ if save
119
+ Client::ScraperJobPage.new({gid: gid}).reparse_by_job(self.job_id)
120
+ puts "Reparse page #{gid}"
121
+ else
122
+ puts "Would have reparse page #{gid}"
123
+ end
124
+ end
125
+
126
+ def reparse page_gid
127
+ raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
128
+ if page_gid == gid
129
+ self.reparse_self = true
130
+ return
131
+ end
132
+ reparse_page page_gid
133
+ end
134
+
90
135
  def eval_parser_script(save=false)
91
136
  update_parsing_starting_status
92
137
 
@@ -95,6 +140,8 @@ module AnswersEngine
95
140
  outputs = []
96
141
  pages = []
97
142
  page = init_page_vars(page)
143
+ self.refetch_self = false
144
+ self.reparse_self = false
98
145
 
99
146
  begin
100
147
  context = isolated_binding({
@@ -113,7 +160,13 @@ module AnswersEngine
113
160
 
114
161
  puts "=========== Parsing Executed ==========="
115
162
  save_pages_and_outputs(pages, outputs, :parsing)
116
- update_parsing_done_status
163
+ if refetch_self
164
+ refetch_page gid
165
+ elsif reparse_self
166
+ reparse_page gid
167
+ else
168
+ update_parsing_done_status
169
+ end
117
170
  end
118
171
  proc.call
119
172
  end
@@ -1,3 +1,3 @@
1
1
  module AnswersEngine
2
- VERSION = "0.5.0"
2
+ VERSION = "0.6.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: answersengine
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-08-14 00:00:00.000000000 Z
11
+ date: 2019-08-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor