ae_easy-core 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/ae_easy-core.gemspec +2 -1
- data/doc/AeEasy/Core/Config.html +1 -1
- data/doc/AeEasy/Core/Exception/OutdatedError.html +1 -1
- data/doc/AeEasy/Core/Exception.html +1 -1
- data/doc/AeEasy/Core/Helper/Cookie.html +1 -1
- data/doc/AeEasy/Core/Helper.html +1 -1
- data/doc/AeEasy/Core/Mock/FakeDb.html +1402 -449
- data/doc/AeEasy/Core/Mock/FakeExecutor.html +898 -428
- data/doc/AeEasy/Core/Mock/FakeParser.html +9 -124
- data/doc/AeEasy/Core/Mock/FakeSeeder.html +9 -118
- data/doc/AeEasy/Core/Mock.html +1 -1
- data/doc/AeEasy/Core/Plugin/CollectionVault.html +1 -1
- data/doc/AeEasy/Core/Plugin/ConfigBehavior.html +1 -1
- data/doc/AeEasy/Core/Plugin/ContextIntegrator.html +1 -1
- data/doc/AeEasy/Core/Plugin/InitializeHook.html +1 -1
- data/doc/AeEasy/Core/Plugin/Parser.html +1 -1
- data/doc/AeEasy/Core/Plugin/ParserBehavior.html +1 -1
- data/doc/AeEasy/Core/Plugin/Seeder.html +1 -1
- data/doc/AeEasy/Core/Plugin/SeederBehavior.html +1 -1
- data/doc/AeEasy/Core/Plugin.html +1 -1
- data/doc/AeEasy/Core/SmartCollection.html +1 -300
- data/doc/AeEasy/Core.html +32 -110
- data/doc/AeEasy.html +1 -1
- data/doc/_index.html +1 -1
- data/doc/file.README.html +1 -1
- data/doc/index.html +1 -1
- data/doc/method_list.html +157 -117
- data/doc/top-level-namespace.html +1 -1
- data/lib/ae_easy/core/mock/fake_db.rb +216 -14
- data/lib/ae_easy/core/mock/fake_executor.rb +112 -7
- data/lib/ae_easy/core/version.rb +1 -1
- data/lib/ae_easy/core.rb +1 -0
- metadata +18 -5
- data/Gemfile.lock +0 -57
@@ -100,7 +100,7 @@
|
|
100
100
|
</div>
|
101
101
|
|
102
102
|
<div id="footer">
|
103
|
-
Generated on Wed Feb
|
103
|
+
Generated on Wed Feb 20 18:03:17 2019 by
|
104
104
|
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
105
105
|
0.9.18 (ruby-2.5.3).
|
106
106
|
</div>
|
@@ -7,6 +7,15 @@ module AeEasy
|
|
7
7
|
PAGE_KEYS = ['gid'].freeze
|
8
8
|
# Output id keys, analog to primary keys.
|
9
9
|
OUTPUT_KEYS = ['_id', '_collection'].freeze
|
10
|
+
# Job id keys, analog to primary keys.
|
11
|
+
JOB_KEYS = ['job_id'].freeze
|
12
|
+
# Job available status.
|
13
|
+
JOB_STATUSES = {
|
14
|
+
active: 'active',
|
15
|
+
done: 'done',
|
16
|
+
cancelled: 'cancelled',
|
17
|
+
paused: 'paused'
|
18
|
+
}
|
10
19
|
# Default collection for saved outputs
|
11
20
|
DEFAULT_COLLECTION = 'default'
|
12
21
|
|
@@ -37,8 +46,11 @@ module AeEasy
|
|
37
46
|
#
|
38
47
|
# @return [Hash]
|
39
48
|
def self.build_page page, opts = {}
|
49
|
+
opts = {
|
50
|
+
allow_page_gid_override: true,
|
51
|
+
allow_job_id_override: true
|
52
|
+
}.merge opts
|
40
53
|
temp_db = AeEasy::Core::Mock::FakeDb.new opts
|
41
|
-
temp_db.enable_page_gid_override
|
42
54
|
temp_db.pages << page
|
43
55
|
temp_db.pages.first
|
44
56
|
end
|
@@ -56,15 +68,122 @@ module AeEasy
|
|
56
68
|
build_page page, opts
|
57
69
|
end
|
58
70
|
|
71
|
+
# Clean an URL to remove fragment, lowercase schema and host, and sort
|
72
|
+
# query string.
|
73
|
+
#
|
74
|
+
# @param [String] raw_url URL to clean.
|
75
|
+
#
|
76
|
+
# @return [String]
|
77
|
+
def self.clean_uri raw_url
|
78
|
+
url = URI.parse(raw_url)
|
79
|
+
url.hostname = url.hostname.downcase
|
80
|
+
url.fragment = nil
|
81
|
+
|
82
|
+
# Sort query string keys
|
83
|
+
unless url.query.nil?
|
84
|
+
query_string = CGI.parse(url.query)
|
85
|
+
keys = query_string.keys.sort
|
86
|
+
data = []
|
87
|
+
keys.each do |key|
|
88
|
+
query_string[key].each do |value|
|
89
|
+
data << "#{URI.encode key}=#{URI.encode value}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
url.query = data.join('&')
|
93
|
+
end
|
94
|
+
url.to_s
|
95
|
+
end
|
96
|
+
|
97
|
+
# Format headers for gid generation.
|
98
|
+
# @private
|
99
|
+
#
|
100
|
+
# @param [Hash|nil] headers Headers hash.
|
101
|
+
#
|
102
|
+
# @return [Hash]
|
103
|
+
def self.format_headers headers
|
104
|
+
return {} if headers.nil?
|
105
|
+
data = {}
|
106
|
+
headers.each do |key, value|
|
107
|
+
unless value.is_a? Array
|
108
|
+
data[key] = value
|
109
|
+
next
|
110
|
+
end
|
111
|
+
data[key] = value.sort
|
112
|
+
end
|
113
|
+
data
|
114
|
+
end
|
115
|
+
|
116
|
+
# Build a job with defaults by using FakeDb engine.
|
117
|
+
#
|
118
|
+
# @param [Hash] job Job initial values.
|
119
|
+
# @param [Hash] opts ({}) Configuration options (see #initialize).
|
120
|
+
#
|
121
|
+
# @return [Hash]
|
122
|
+
def self.build_job job, opts = {}
|
123
|
+
temp_db = AeEasy::Core::Mock::FakeDb.new opts
|
124
|
+
temp_db.jobs << job
|
125
|
+
temp_db.jobs.last
|
126
|
+
end
|
127
|
+
|
128
|
+
# Build a fake job by using FakeDb engine.
|
129
|
+
#
|
130
|
+
# @param [Hash] opts ({}) Configuration options (see #initialize).
|
131
|
+
# @option opts [String] :scraper_name (nil) Scraper name.
|
132
|
+
# @option opts [Integer] :job_id (nil) Job id.
|
133
|
+
# @option opts [String] :status ('done').
|
134
|
+
#
|
135
|
+
# @return [Hash]
|
136
|
+
def self.build_fake_job opts = {}
|
137
|
+
job = {
|
138
|
+
'job_id' => opts[:job_id],
|
139
|
+
'scraper_name' => opts[:scraper_name],
|
140
|
+
'status' => (opts[:status] || 'done')
|
141
|
+
}
|
142
|
+
build_job job, opts
|
143
|
+
end
|
144
|
+
|
145
|
+
# Get current job or create new one from values.
|
146
|
+
#
|
147
|
+
# @param [Integer] target_job_id (nil) Job id to ensure existance.
|
148
|
+
#
|
149
|
+
# @return [Hash]
|
150
|
+
def ensure_job target_job_id = nil
|
151
|
+
target_job_id = job_id if target_job_id.nil?
|
152
|
+
job = jobs.find{|v|v['job_id'] == target_job_id}
|
153
|
+
return job unless job.nil?
|
154
|
+
job = {
|
155
|
+
'job_id' => target_job_id,
|
156
|
+
'scraper_name' => scraper_name,
|
157
|
+
}
|
158
|
+
job['status'] = 'active' unless target_job_id != job_id
|
159
|
+
jobs << job
|
160
|
+
jobs.last
|
161
|
+
end
|
162
|
+
|
163
|
+
# Fake scraper_name.
|
164
|
+
# @return [String,nil]
|
165
|
+
def scraper_name
|
166
|
+
@scraper_name ||= 'my_scraper'
|
167
|
+
end
|
168
|
+
|
169
|
+
# Set fake scraper_name value.
|
170
|
+
def scraper_name= value
|
171
|
+
job = ensure_job
|
172
|
+
@scraper_name = value
|
173
|
+
job['scraper_name'] = scraper_name
|
174
|
+
end
|
175
|
+
|
59
176
|
# Fake job id.
|
60
177
|
# @return [Integer,nil]
|
61
178
|
def job_id
|
62
|
-
@job_id ||=
|
179
|
+
@job_id ||= generate_job_id
|
63
180
|
end
|
64
181
|
|
65
182
|
# Set fake job id value.
|
66
183
|
def job_id= value
|
67
184
|
@job_id = value
|
185
|
+
ensure_job
|
186
|
+
job_id
|
68
187
|
end
|
69
188
|
|
70
189
|
# Current fake page gid.
|
@@ -78,35 +197,102 @@ module AeEasy
|
|
78
197
|
@page_gid = value
|
79
198
|
end
|
80
199
|
|
81
|
-
# Enable page gid override on page insert.
|
200
|
+
# Enable page gid override on page or output insert.
|
82
201
|
def enable_page_gid_override
|
83
202
|
@allow_page_gid_override = true
|
84
203
|
end
|
85
204
|
|
86
|
-
# Disable page gid override on page insert.
|
205
|
+
# Disable page gid override on page or output insert.
|
87
206
|
def disable_page_gid_override
|
88
207
|
@allow_page_gid_override = false
|
89
208
|
end
|
90
209
|
|
91
|
-
# Specify whenever page gid overriding by user is allowed on page
|
92
|
-
# insert.
|
210
|
+
# Specify whenever page gid overriding by user is allowed on page or
|
211
|
+
# output insert.
|
93
212
|
#
|
94
213
|
# @return [Boolean] `true` when allowed, else `false`.
|
95
214
|
def allow_page_gid_override?
|
96
215
|
@allow_page_gid_override ||= false
|
97
216
|
end
|
98
217
|
|
218
|
+
# Enable job id override on page or output insert.
|
219
|
+
def enable_job_id_override
|
220
|
+
@allow_job_id_override = true
|
221
|
+
end
|
222
|
+
|
223
|
+
# Disable job id override on page or output insert.
|
224
|
+
def disable_job_id_override
|
225
|
+
@allow_job_id_override = false
|
226
|
+
end
|
227
|
+
|
228
|
+
# Specify whenever job id overriding by user is allowed on page or
|
229
|
+
# output insert.
|
230
|
+
#
|
231
|
+
# @return [Boolean] `true` when allowed, else `false`.
|
232
|
+
def allow_job_id_override?
|
233
|
+
@allow_job_id_override ||= false
|
234
|
+
end
|
235
|
+
|
99
236
|
# Initialize fake database.
|
100
237
|
#
|
101
238
|
# @param [Hash] opts ({}) Configuration options.
|
102
239
|
# @option opts [Integer,nil] :job_id Job id default value.
|
240
|
+
# @option opts [String,nil] :scraper_name Scraper name default value.
|
103
241
|
# @option opts [String,nil] :page_gid Page gid default value.
|
104
242
|
# @option opts [Boolean, nil] :allow_page_gid_override (false) Specify
|
105
|
-
# whenever page gid can be overrided on page insert.
|
243
|
+
# whenever page gid can be overrided on page or output insert.
|
244
|
+
# @option opts [Boolean, nil] :allow_job_id_override (false) Specify
|
245
|
+
# whenever job id can be overrided on page or output insert.
|
106
246
|
def initialize opts = {}
|
107
247
|
self.job_id = opts[:job_id]
|
248
|
+
self.scraper_name = opts[:scraper_name]
|
108
249
|
self.page_gid = opts[:page_gid]
|
109
250
|
@allow_page_gid_override = opts[:allow_page_gid_override].nil? ? false : !!opts[:allow_page_gid_override]
|
251
|
+
@allow_job_id_override = opts[:allow_job_id_override].nil? ? false : !!opts[:allow_job_id_override]
|
252
|
+
end
|
253
|
+
|
254
|
+
# Generate a fake scraper name.
|
255
|
+
#
|
256
|
+
# @return [String]
|
257
|
+
def generate_scraper_name
|
258
|
+
Faker::Internet.unique.slug
|
259
|
+
end
|
260
|
+
|
261
|
+
# Generate a fake job_id.
|
262
|
+
#
|
263
|
+
# @return [Integer]
|
264
|
+
def generate_job_id
|
265
|
+
jobs.count < 1 ? 1 : (jobs.max{|a,b|a['job_id'] <=> b['job_id']}['job_id'] + 1)
|
266
|
+
end
|
267
|
+
|
268
|
+
# Get output keys with key generators to emulate saving on db.
|
269
|
+
# @private
|
270
|
+
#
|
271
|
+
# @return [Hash]
|
272
|
+
def job_defaults
|
273
|
+
@job_defaults ||= {
|
274
|
+
'job_id' => lambda{|job| generate_job_id},
|
275
|
+
'scraper_name' => lambda{|job| generate_scraper_name},
|
276
|
+
'status' => 'done',
|
277
|
+
'created_at' => lambda{|job| Time.now}
|
278
|
+
}
|
279
|
+
end
|
280
|
+
|
281
|
+
# Stored job collection
|
282
|
+
#
|
283
|
+
# @return [AeEasy::Core::SmartCollection]
|
284
|
+
def jobs
|
285
|
+
return @jobs unless @jobs.nil?
|
286
|
+
collection = self.class.new_collection JOB_KEYS,
|
287
|
+
defaults: job_defaults
|
288
|
+
collection.bind_event(:before_defaults) do |collection, raw_item|
|
289
|
+
AeEasy::Core.deep_stringify_keys raw_item
|
290
|
+
end
|
291
|
+
collection.bind_event(:before_insert) do |collection, item, match|
|
292
|
+
item['job_id'] ||= generate_job_id
|
293
|
+
item
|
294
|
+
end
|
295
|
+
@jobs ||= collection
|
110
296
|
end
|
111
297
|
|
112
298
|
# Generate a fake UUID based on page data:
|
@@ -119,10 +305,10 @@ module AeEasy
|
|
119
305
|
# * body
|
120
306
|
# * ua_type
|
121
307
|
#
|
122
|
-
# @param [Hash]
|
308
|
+
# @param [Hash] page_data Page data.
|
123
309
|
#
|
124
310
|
# @return [String]
|
125
|
-
def generate_page_gid
|
311
|
+
def generate_page_gid page_data
|
126
312
|
fields = [
|
127
313
|
'url',
|
128
314
|
'method',
|
@@ -133,8 +319,13 @@ module AeEasy
|
|
133
319
|
'body',
|
134
320
|
'ua_type'
|
135
321
|
]
|
322
|
+
data = page_data.select{|k,v|fields.include? k}
|
323
|
+
data['url'] = self.class.clean_uri data['url']
|
324
|
+
data['headers'] = self.class.format_headers data['headers']
|
325
|
+
data['cookie'] = AeEasy::Core::Helper::Cookie.parse_from_request data['cookie'] unless data['cookie'].nil?
|
136
326
|
seed = data.select{|k,v|fields.include? k}.hash
|
137
|
-
self.class.fake_uuid seed
|
327
|
+
checksum = self.class.fake_uuid seed
|
328
|
+
"#{URI.parse(data['url']).hostname}-#{checksum}"
|
138
329
|
end
|
139
330
|
|
140
331
|
# Get page keys with key generators to emulate saving on db.
|
@@ -142,8 +333,9 @@ module AeEasy
|
|
142
333
|
#
|
143
334
|
# @return [Hash]
|
144
335
|
def page_defaults
|
145
|
-
@
|
336
|
+
@page_defaults ||= {
|
146
337
|
'url' => nil,
|
338
|
+
'job_id' => lambda{|page| job_id},
|
147
339
|
'method' => 'GET',
|
148
340
|
'headers' => {},
|
149
341
|
'fetch_type' => 'standard',
|
@@ -168,7 +360,9 @@ module AeEasy
|
|
168
360
|
collection = self.class.new_collection PAGE_KEYS,
|
169
361
|
defaults: page_defaults
|
170
362
|
collection.bind_event(:before_defaults) do |collection, raw_item|
|
171
|
-
AeEasy::Core.deep_stringify_keys raw_item
|
363
|
+
item = AeEasy::Core.deep_stringify_keys raw_item
|
364
|
+
item.delete 'job_id' unless allow_job_id_override?
|
365
|
+
item
|
172
366
|
end
|
173
367
|
collection.bind_event(:before_insert) do |collection, item, match|
|
174
368
|
if item['gid'].nil? || !allow_page_gid_override?
|
@@ -194,7 +388,7 @@ module AeEasy
|
|
194
388
|
#
|
195
389
|
# @return [Hash]
|
196
390
|
def output_defaults
|
197
|
-
@
|
391
|
+
@output_defaults ||= {
|
198
392
|
'_collection' => DEFAULT_COLLECTION,
|
199
393
|
'_job_id' => lambda{|output| job_id},
|
200
394
|
'_created_at' => lambda{|output| Time.new.strftime('%Y-%m-%dT%H:%M:%SZ')},
|
@@ -210,12 +404,18 @@ module AeEasy
|
|
210
404
|
collection = self.class.new_collection OUTPUT_KEYS,
|
211
405
|
defaults: output_defaults
|
212
406
|
collection.bind_event(:before_defaults) do |collection, raw_item|
|
213
|
-
AeEasy::Core.deep_stringify_keys raw_item
|
407
|
+
item = AeEasy::Core.deep_stringify_keys raw_item
|
408
|
+
item.delete '_job_id' unless allow_job_id_override?
|
409
|
+
item.delete '_gid_id' unless allow_page_gid_override?
|
410
|
+
item
|
214
411
|
end
|
215
412
|
collection.bind_event(:before_insert) do |collection, item, match|
|
216
413
|
item['_id'] ||= generate_output_id item
|
217
414
|
item
|
218
415
|
end
|
416
|
+
collection.bind_event(:after_insert) do |collection, item|
|
417
|
+
ensure item['job_id']
|
418
|
+
end
|
219
419
|
@outputs ||= collection
|
220
420
|
end
|
221
421
|
|
@@ -255,6 +455,8 @@ module AeEasy
|
|
255
455
|
outputs
|
256
456
|
when :pages
|
257
457
|
pages
|
458
|
+
when :jobs
|
459
|
+
jobs
|
258
460
|
else
|
259
461
|
raise ArgumentError.new "Unknown collection #{collection}."
|
260
462
|
end
|
@@ -3,6 +3,9 @@ module AeEasy
|
|
3
3
|
module Mock
|
4
4
|
# Fake executor that emulates `AnswersEngine` executor.
|
5
5
|
module FakeExecutor
|
6
|
+
# Max allowed page size when query outputs (see #find_outputs).
|
7
|
+
MAX_FIND_OUTPUTS_PER_PAGE = 500
|
8
|
+
|
6
9
|
# Page content.
|
7
10
|
# @return [String,nil]
|
8
11
|
attr_accessor :content
|
@@ -92,16 +95,28 @@ module AeEasy
|
|
92
95
|
end
|
93
96
|
@outputs = opts[:outputs]
|
94
97
|
self.job_id = opts[:job_id]
|
98
|
+
self.scraper_name = opts[:scraper_name]
|
95
99
|
self.page = opts[:page]
|
96
100
|
end
|
97
101
|
|
102
|
+
# Fake scraper name used by executor.
|
103
|
+
# @return [Integer,nil]
|
104
|
+
def scraper_name
|
105
|
+
db.scraper_name
|
106
|
+
end
|
107
|
+
|
108
|
+
# Set fake scraper name value.
|
109
|
+
def scraper_name= value
|
110
|
+
db.scraper_name = value
|
111
|
+
end
|
112
|
+
|
98
113
|
# Fake job ID used by executor.
|
99
114
|
# @return [Integer,nil]
|
100
115
|
def job_id
|
101
116
|
db.job_id
|
102
117
|
end
|
103
118
|
|
104
|
-
# Set fake job
|
119
|
+
# Set fake job ID value.
|
105
120
|
def job_id= value
|
106
121
|
db.job_id = value
|
107
122
|
page['job_id'] = value
|
@@ -124,6 +139,11 @@ module AeEasy
|
|
124
139
|
@page = value
|
125
140
|
end
|
126
141
|
|
142
|
+
# Retrive a list of saved jobs.
|
143
|
+
def saved_jobs
|
144
|
+
db.jobs
|
145
|
+
end
|
146
|
+
|
127
147
|
# Retrive a list of saved pages. Drafted pages can be included.
|
128
148
|
def saved_pages
|
129
149
|
db.pages
|
@@ -134,18 +154,29 @@ module AeEasy
|
|
134
154
|
db.outputs
|
135
155
|
end
|
136
156
|
|
137
|
-
# Save a
|
157
|
+
# Save a job collection on db and remove all the element from +list+.
|
158
|
+
#
|
159
|
+
# @param [Array] list Collection of jobs to save.
|
160
|
+
def save_jobs list
|
161
|
+
list.each{|job| db.jobs << job}
|
162
|
+
list.clear
|
163
|
+
end
|
164
|
+
|
165
|
+
# Save a page collection on db and remove all the element from +list+.
|
138
166
|
#
|
139
167
|
# @param [Array] list Collection of pages to save.
|
140
168
|
def save_pages list
|
141
169
|
list.each{|page| db.pages << page}
|
170
|
+
list.clear
|
142
171
|
end
|
143
172
|
|
144
|
-
# Save an output collection on db
|
173
|
+
# Save an output collection on db and remove all the element from
|
174
|
+
# +list+.
|
145
175
|
#
|
146
176
|
# @param [Array] list Collection of outputs to save.
|
147
177
|
def save_outputs list
|
148
178
|
list.each{|output| db.outputs << output}
|
179
|
+
list.clear
|
149
180
|
end
|
150
181
|
|
151
182
|
# Save draft pages into db and clear draft queue.
|
@@ -166,19 +197,71 @@ module AeEasy
|
|
166
197
|
flush_outputs
|
167
198
|
end
|
168
199
|
|
200
|
+
# Get latest job by scraper_name.
|
201
|
+
#
|
202
|
+
# @param [String] scraper_name Scraper name.
|
203
|
+
# @param [Hash] filter ({}) Additional_filters.
|
204
|
+
#
|
205
|
+
# @return [Hash|nil] Return nil if no scraper_name or scraper_name is
|
206
|
+
# nil.
|
207
|
+
def latest_job_by scraper_name, filter = {}
|
208
|
+
return nil if scraper_name.nil?
|
209
|
+
data = db.query :jobs, filter.merge('scraper_name' => scraper_name)
|
210
|
+
data.max{|a,b| a['created_at'] <=> b['created_at']}
|
211
|
+
end
|
212
|
+
|
169
213
|
# Find outputs by collection and query with pagination.
|
170
214
|
#
|
171
215
|
# @param [String] collection ('default') Collection name.
|
172
216
|
# @param [Hash] query ({}) Filters to query.
|
173
217
|
# @param [Integer] page (1) Page number.
|
174
218
|
# @param [Integer] per_page (30) Page size.
|
219
|
+
# @param [Hash] opts ({}) Configuration options.
|
220
|
+
# @option opts [String|nil] :scraper_name (nil) Scraper name to query
|
221
|
+
# from.
|
222
|
+
# @option opts [Integer|nil] :job_id (nil) Job's id to query from.
|
223
|
+
#
|
224
|
+
# @raise [ArgumentError] +collection+ is not String.
|
225
|
+
# @raise [ArgumentError] +query+ is not a Hash.
|
226
|
+
# @raise [ArgumentError] +page+ is not an Integer greater than 0.
|
227
|
+
# @raise [ArgumentError] +per_page+ is not an Integer between 1 and 500.
|
175
228
|
#
|
176
229
|
# @return [Array]
|
177
|
-
|
230
|
+
#
|
231
|
+
# @example
|
232
|
+
# find_outputs
|
233
|
+
# @example
|
234
|
+
# find_outputs 'my_collection'
|
235
|
+
# @example
|
236
|
+
# find_outputs 'my_collection', {}
|
237
|
+
# @example
|
238
|
+
# find_outputs 'my_collection', {}, 1
|
239
|
+
# @example
|
240
|
+
# find_outputs 'my_collection', {}, 1, 30
|
241
|
+
# @example Find from another scraper by name
|
242
|
+
# find_outputs 'my_collection', {}, 1, 30, scraper_name: 'my_scraper'
|
243
|
+
# @example Find from another scraper by job_id
|
244
|
+
# find_outputs 'my_collection', {}, 1, 30, job_id: 123
|
245
|
+
#
|
246
|
+
# @note *opts `:job_id` option is prioritize over `:scraper_name` when
|
247
|
+
# both exists. If none add provided or nil values, then current job
|
248
|
+
# will be used to query instead, this is the defaul behavior.
|
249
|
+
def find_outputs collection = 'default', query = {}, page = 1, per_page = 30, opts = {}
|
250
|
+
raise ArgumentError.new("collection needs to be a String.") unless collection.is_a?(String)
|
251
|
+
raise ArgumentError.new("query needs to be a Hash.") unless query.is_a?(Hash)
|
252
|
+
unless page.is_a?(Integer) && page > 0
|
253
|
+
raise ArgumentError.new("page needs to be an Integer greater than 0.")
|
254
|
+
end
|
255
|
+
unless per_page.is_a?(Integer) && per_page > 0 && per_page <= MAX_FIND_OUTPUTS_PER_PAGE
|
256
|
+
raise ArgumentError.new("per_page needs to be an Integer between 1 and #{MAX_FIND_OUTPUTS_PER_PAGE}.")
|
257
|
+
end
|
258
|
+
|
178
259
|
count = 0
|
179
260
|
offset = (page - 1) * per_page
|
261
|
+
job = latest_job_by(opts[:scraper_name])
|
180
262
|
fixed_query = query.merge(
|
181
|
-
'_collection' => collection
|
263
|
+
'_collection' => collection,
|
264
|
+
'_job_id' => opts[:job_id] || (job.nil? ? job_id : job['job_id'])
|
182
265
|
)
|
183
266
|
db.query :outputs, fixed_query, offset, per_page
|
184
267
|
end
|
@@ -187,10 +270,32 @@ module AeEasy
|
|
187
270
|
#
|
188
271
|
# @param [String] collection ('default') Collection name.
|
189
272
|
# @param [Hash] query ({}) Filters to query.
|
273
|
+
# @param [Hash] opts ({}) Configuration options.
|
274
|
+
# @option opts [String|nil] :scraper_name (nil) Scraper name to query
|
275
|
+
# from.
|
276
|
+
# @option opts [Integer|nil] :job_id (nil) Job's id to query from.
|
277
|
+
#
|
278
|
+
# @raise [ArgumentError] +collection+ is not String.
|
279
|
+
# @raise [ArgumentError] +query+ is not a Hash.
|
190
280
|
#
|
191
281
|
# @return [Hash, nil]
|
192
|
-
|
193
|
-
|
282
|
+
#
|
283
|
+
# @example
|
284
|
+
# find_output
|
285
|
+
# @example
|
286
|
+
# find_output 'my_collection'
|
287
|
+
# @example
|
288
|
+
# find_output 'my_collection', {}
|
289
|
+
# @example Find from another scraper by name
|
290
|
+
# find_output 'my_collection', {}, scraper_name: 'my_scraper'
|
291
|
+
# @example Find from another scraper by job_id
|
292
|
+
# find_output 'my_collection', {}, job_id: 123
|
293
|
+
#
|
294
|
+
# @note *opts `:job_id` option is prioritize over `:scraper_name` when
|
295
|
+
# both exists. If none add provided or nil values, then current job
|
296
|
+
# will be used to query instead, this is the defaul behavior.
|
297
|
+
def find_output collection = 'default', query = {}, opts = {}
|
298
|
+
result = find_outputs(collection, query, 1, 1, opts)
|
194
299
|
result.nil? ? nil : result.first
|
195
300
|
end
|
196
301
|
|
data/lib/ae_easy/core/version.rb
CHANGED
data/lib/ae_easy/core.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ae_easy-core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo Rosales
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-02-
|
11
|
+
date: 2019-02-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: answersengine
|
@@ -16,14 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.2.
|
19
|
+
version: 0.2.32
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.2.
|
26
|
+
version: 0.2.32
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: faker
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -134,7 +148,6 @@ files:
|
|
134
148
|
- ".yardopts"
|
135
149
|
- CODE_OF_CONDUCT.md
|
136
150
|
- Gemfile
|
137
|
-
- Gemfile.lock
|
138
151
|
- LICENSE
|
139
152
|
- README.md
|
140
153
|
- Rakefile
|
data/Gemfile.lock
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
PATH
|
2
|
-
remote: .
|
3
|
-
specs:
|
4
|
-
ae_easy-core (0.0.4)
|
5
|
-
answersengine (>= 0.2.25)
|
6
|
-
|
7
|
-
GEM
|
8
|
-
remote: https://rubygems.org/
|
9
|
-
specs:
|
10
|
-
ansi (1.5.0)
|
11
|
-
answersengine (0.2.31)
|
12
|
-
httparty (~> 0.16.2)
|
13
|
-
nokogiri (~> 1.6, < 1.10)
|
14
|
-
thor (~> 0.20.3)
|
15
|
-
byebug (10.0.2)
|
16
|
-
docile (1.3.1)
|
17
|
-
hirb (0.7.3)
|
18
|
-
httparty (0.16.3)
|
19
|
-
mime-types (~> 3.0)
|
20
|
-
multi_xml (>= 0.5.2)
|
21
|
-
json (2.1.0)
|
22
|
-
mime-types (3.2.2)
|
23
|
-
mime-types-data (~> 3.2015)
|
24
|
-
mime-types-data (3.2018.0812)
|
25
|
-
mini_portile2 (2.4.0)
|
26
|
-
minitest (5.11.3)
|
27
|
-
multi_xml (0.6.0)
|
28
|
-
nokogiri (1.9.1)
|
29
|
-
mini_portile2 (~> 2.4.0)
|
30
|
-
rake (10.5.0)
|
31
|
-
simplecov (0.16.1)
|
32
|
-
docile (~> 1.1)
|
33
|
-
json (>= 1.8, < 3)
|
34
|
-
simplecov-html (~> 0.10.0)
|
35
|
-
simplecov-console (0.4.2)
|
36
|
-
ansi
|
37
|
-
hirb
|
38
|
-
simplecov
|
39
|
-
simplecov-html (0.10.2)
|
40
|
-
thor (0.20.3)
|
41
|
-
timecop (0.9.1)
|
42
|
-
|
43
|
-
PLATFORMS
|
44
|
-
ruby
|
45
|
-
|
46
|
-
DEPENDENCIES
|
47
|
-
ae_easy-core!
|
48
|
-
bundler (>= 1.16.3)
|
49
|
-
byebug
|
50
|
-
minitest (>= 5.11)
|
51
|
-
rake (>= 10.0)
|
52
|
-
simplecov (>= 0.16.1)
|
53
|
-
simplecov-console (>= 0.4.2)
|
54
|
-
timecop (>= 0.9.1)
|
55
|
-
|
56
|
-
BUNDLED WITH
|
57
|
-
1.17.3
|