ae_easy-core 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/ae_easy-core.gemspec +2 -1
- data/doc/AeEasy/Core/Config.html +1 -1
- data/doc/AeEasy/Core/Exception/OutdatedError.html +1 -1
- data/doc/AeEasy/Core/Exception.html +1 -1
- data/doc/AeEasy/Core/Helper/Cookie.html +1 -1
- data/doc/AeEasy/Core/Helper.html +1 -1
- data/doc/AeEasy/Core/Mock/FakeDb.html +1402 -449
- data/doc/AeEasy/Core/Mock/FakeExecutor.html +898 -428
- data/doc/AeEasy/Core/Mock/FakeParser.html +9 -124
- data/doc/AeEasy/Core/Mock/FakeSeeder.html +9 -118
- data/doc/AeEasy/Core/Mock.html +1 -1
- data/doc/AeEasy/Core/Plugin/CollectionVault.html +1 -1
- data/doc/AeEasy/Core/Plugin/ConfigBehavior.html +1 -1
- data/doc/AeEasy/Core/Plugin/ContextIntegrator.html +1 -1
- data/doc/AeEasy/Core/Plugin/InitializeHook.html +1 -1
- data/doc/AeEasy/Core/Plugin/Parser.html +1 -1
- data/doc/AeEasy/Core/Plugin/ParserBehavior.html +1 -1
- data/doc/AeEasy/Core/Plugin/Seeder.html +1 -1
- data/doc/AeEasy/Core/Plugin/SeederBehavior.html +1 -1
- data/doc/AeEasy/Core/Plugin.html +1 -1
- data/doc/AeEasy/Core/SmartCollection.html +1 -300
- data/doc/AeEasy/Core.html +32 -110
- data/doc/AeEasy.html +1 -1
- data/doc/_index.html +1 -1
- data/doc/file.README.html +1 -1
- data/doc/index.html +1 -1
- data/doc/method_list.html +157 -117
- data/doc/top-level-namespace.html +1 -1
- data/lib/ae_easy/core/mock/fake_db.rb +216 -14
- data/lib/ae_easy/core/mock/fake_executor.rb +112 -7
- data/lib/ae_easy/core/version.rb +1 -1
- data/lib/ae_easy/core.rb +1 -0
- metadata +18 -5
- data/Gemfile.lock +0 -57
@@ -100,7 +100,7 @@
|
|
100
100
|
</div>
|
101
101
|
|
102
102
|
<div id="footer">
|
103
|
-
Generated on Wed Feb
|
103
|
+
Generated on Wed Feb 20 18:03:17 2019 by
|
104
104
|
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
105
105
|
0.9.18 (ruby-2.5.3).
|
106
106
|
</div>
|
@@ -7,6 +7,15 @@ module AeEasy
|
|
7
7
|
PAGE_KEYS = ['gid'].freeze
|
8
8
|
# Output id keys, analog to primary keys.
|
9
9
|
OUTPUT_KEYS = ['_id', '_collection'].freeze
|
10
|
+
# Job id keys, analog to primary keys.
|
11
|
+
JOB_KEYS = ['job_id'].freeze
|
12
|
+
# Job available status.
|
13
|
+
JOB_STATUSES = {
|
14
|
+
active: 'active',
|
15
|
+
done: 'done',
|
16
|
+
cancelled: 'cancelled',
|
17
|
+
paused: 'paused'
|
18
|
+
}
|
10
19
|
# Default collection for saved outputs
|
11
20
|
DEFAULT_COLLECTION = 'default'
|
12
21
|
|
@@ -37,8 +46,11 @@ module AeEasy
|
|
37
46
|
#
|
38
47
|
# @return [Hash]
|
39
48
|
def self.build_page page, opts = {}
|
49
|
+
opts = {
|
50
|
+
allow_page_gid_override: true,
|
51
|
+
allow_job_id_override: true
|
52
|
+
}.merge opts
|
40
53
|
temp_db = AeEasy::Core::Mock::FakeDb.new opts
|
41
|
-
temp_db.enable_page_gid_override
|
42
54
|
temp_db.pages << page
|
43
55
|
temp_db.pages.first
|
44
56
|
end
|
@@ -56,15 +68,122 @@ module AeEasy
|
|
56
68
|
build_page page, opts
|
57
69
|
end
|
58
70
|
|
71
|
+
# Clean an URL to remove fragment, lowercase schema and host, and sort
|
72
|
+
# query string.
|
73
|
+
#
|
74
|
+
# @param [String] raw_url URL to clean.
|
75
|
+
#
|
76
|
+
# @return [String]
|
77
|
+
def self.clean_uri raw_url
|
78
|
+
url = URI.parse(raw_url)
|
79
|
+
url.hostname = url.hostname.downcase
|
80
|
+
url.fragment = nil
|
81
|
+
|
82
|
+
# Sort query string keys
|
83
|
+
unless url.query.nil?
|
84
|
+
query_string = CGI.parse(url.query)
|
85
|
+
keys = query_string.keys.sort
|
86
|
+
data = []
|
87
|
+
keys.each do |key|
|
88
|
+
query_string[key].each do |value|
|
89
|
+
data << "#{URI.encode key}=#{URI.encode value}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
url.query = data.join('&')
|
93
|
+
end
|
94
|
+
url.to_s
|
95
|
+
end
|
96
|
+
|
97
|
+
# Format headers for gid generation.
|
98
|
+
# @private
|
99
|
+
#
|
100
|
+
# @param [Hash|nil] headers Headers hash.
|
101
|
+
#
|
102
|
+
# @return [Hash]
|
103
|
+
def self.format_headers headers
|
104
|
+
return {} if headers.nil?
|
105
|
+
data = {}
|
106
|
+
headers.each do |key, value|
|
107
|
+
unless value.is_a? Array
|
108
|
+
data[key] = value
|
109
|
+
next
|
110
|
+
end
|
111
|
+
data[key] = value.sort
|
112
|
+
end
|
113
|
+
data
|
114
|
+
end
|
115
|
+
|
116
|
+
# Build a job with defaults by using FakeDb engine.
|
117
|
+
#
|
118
|
+
# @param [Hash] job Job initial values.
|
119
|
+
# @param [Hash] opts ({}) Configuration options (see #initialize).
|
120
|
+
#
|
121
|
+
# @return [Hash]
|
122
|
+
def self.build_job job, opts = {}
|
123
|
+
temp_db = AeEasy::Core::Mock::FakeDb.new opts
|
124
|
+
temp_db.jobs << job
|
125
|
+
temp_db.jobs.last
|
126
|
+
end
|
127
|
+
|
128
|
+
# Build a fake job by using FakeDb engine.
|
129
|
+
#
|
130
|
+
# @param [Hash] opts ({}) Configuration options (see #initialize).
|
131
|
+
# @option opts [String] :scraper_name (nil) Scraper name.
|
132
|
+
# @option opts [Integer] :job_id (nil) Job id.
|
133
|
+
# @option opts [String] :status ('done').
|
134
|
+
#
|
135
|
+
# @return [Hash]
|
136
|
+
def self.build_fake_job opts = {}
|
137
|
+
job = {
|
138
|
+
'job_id' => opts[:job_id],
|
139
|
+
'scraper_name' => opts[:scraper_name],
|
140
|
+
'status' => (opts[:status] || 'done')
|
141
|
+
}
|
142
|
+
build_job job, opts
|
143
|
+
end
|
144
|
+
|
145
|
+
# Get current job or create new one from values.
|
146
|
+
#
|
147
|
+
# @param [Integer] target_job_id (nil) Job id to ensure existance.
|
148
|
+
#
|
149
|
+
# @return [Hash]
|
150
|
+
def ensure_job target_job_id = nil
|
151
|
+
target_job_id = job_id if target_job_id.nil?
|
152
|
+
job = jobs.find{|v|v['job_id'] == target_job_id}
|
153
|
+
return job unless job.nil?
|
154
|
+
job = {
|
155
|
+
'job_id' => target_job_id,
|
156
|
+
'scraper_name' => scraper_name,
|
157
|
+
}
|
158
|
+
job['status'] = 'active' unless target_job_id != job_id
|
159
|
+
jobs << job
|
160
|
+
jobs.last
|
161
|
+
end
|
162
|
+
|
163
|
+
# Fake scraper_name.
|
164
|
+
# @return [String,nil]
|
165
|
+
def scraper_name
|
166
|
+
@scraper_name ||= 'my_scraper'
|
167
|
+
end
|
168
|
+
|
169
|
+
# Set fake scraper_name value.
|
170
|
+
def scraper_name= value
|
171
|
+
job = ensure_job
|
172
|
+
@scraper_name = value
|
173
|
+
job['scraper_name'] = scraper_name
|
174
|
+
end
|
175
|
+
|
59
176
|
# Fake job id.
|
60
177
|
# @return [Integer,nil]
|
61
178
|
def job_id
|
62
|
-
@job_id ||=
|
179
|
+
@job_id ||= generate_job_id
|
63
180
|
end
|
64
181
|
|
65
182
|
# Set fake job id value.
|
66
183
|
def job_id= value
|
67
184
|
@job_id = value
|
185
|
+
ensure_job
|
186
|
+
job_id
|
68
187
|
end
|
69
188
|
|
70
189
|
# Current fake page gid.
|
@@ -78,35 +197,102 @@ module AeEasy
|
|
78
197
|
@page_gid = value
|
79
198
|
end
|
80
199
|
|
81
|
-
# Enable page gid override on page insert.
|
200
|
+
# Enable page gid override on page or output insert.
|
82
201
|
def enable_page_gid_override
|
83
202
|
@allow_page_gid_override = true
|
84
203
|
end
|
85
204
|
|
86
|
-
# Disable page gid override on page insert.
|
205
|
+
# Disable page gid override on page or output insert.
|
87
206
|
def disable_page_gid_override
|
88
207
|
@allow_page_gid_override = false
|
89
208
|
end
|
90
209
|
|
91
|
-
# Specify whenever page gid overriding by user is allowed on page
|
92
|
-
# insert.
|
210
|
+
# Specify whenever page gid overriding by user is allowed on page or
|
211
|
+
# output insert.
|
93
212
|
#
|
94
213
|
# @return [Boolean] `true` when allowed, else `false`.
|
95
214
|
def allow_page_gid_override?
|
96
215
|
@allow_page_gid_override ||= false
|
97
216
|
end
|
98
217
|
|
218
|
+
# Enable job id override on page or output insert.
|
219
|
+
def enable_job_id_override
|
220
|
+
@allow_job_id_override = true
|
221
|
+
end
|
222
|
+
|
223
|
+
# Disable job id override on page or output insert.
|
224
|
+
def disable_job_id_override
|
225
|
+
@allow_job_id_override = false
|
226
|
+
end
|
227
|
+
|
228
|
+
# Specify whenever job id overriding by user is allowed on page or
|
229
|
+
# output insert.
|
230
|
+
#
|
231
|
+
# @return [Boolean] `true` when allowed, else `false`.
|
232
|
+
def allow_job_id_override?
|
233
|
+
@allow_job_id_override ||= false
|
234
|
+
end
|
235
|
+
|
99
236
|
# Initialize fake database.
|
100
237
|
#
|
101
238
|
# @param [Hash] opts ({}) Configuration options.
|
102
239
|
# @option opts [Integer,nil] :job_id Job id default value.
|
240
|
+
# @option opts [String,nil] :scraper_name Scraper name default value.
|
103
241
|
# @option opts [String,nil] :page_gid Page gid default value.
|
104
242
|
# @option opts [Boolean, nil] :allow_page_gid_override (false) Specify
|
105
|
-
# whenever page gid can be overrided on page insert.
|
243
|
+
# whenever page gid can be overrided on page or output insert.
|
244
|
+
# @option opts [Boolean, nil] :allow_job_id_override (false) Specify
|
245
|
+
# whenever job id can be overrided on page or output insert.
|
106
246
|
def initialize opts = {}
|
107
247
|
self.job_id = opts[:job_id]
|
248
|
+
self.scraper_name = opts[:scraper_name]
|
108
249
|
self.page_gid = opts[:page_gid]
|
109
250
|
@allow_page_gid_override = opts[:allow_page_gid_override].nil? ? false : !!opts[:allow_page_gid_override]
|
251
|
+
@allow_job_id_override = opts[:allow_job_id_override].nil? ? false : !!opts[:allow_job_id_override]
|
252
|
+
end
|
253
|
+
|
254
|
+
# Generate a fake scraper name.
|
255
|
+
#
|
256
|
+
# @return [String]
|
257
|
+
def generate_scraper_name
|
258
|
+
Faker::Internet.unique.slug
|
259
|
+
end
|
260
|
+
|
261
|
+
# Generate a fake job_id.
|
262
|
+
#
|
263
|
+
# @return [Integer]
|
264
|
+
def generate_job_id
|
265
|
+
jobs.count < 1 ? 1 : (jobs.max{|a,b|a['job_id'] <=> b['job_id']}['job_id'] + 1)
|
266
|
+
end
|
267
|
+
|
268
|
+
# Get output keys with key generators to emulate saving on db.
|
269
|
+
# @private
|
270
|
+
#
|
271
|
+
# @return [Hash]
|
272
|
+
def job_defaults
|
273
|
+
@job_defaults ||= {
|
274
|
+
'job_id' => lambda{|job| generate_job_id},
|
275
|
+
'scraper_name' => lambda{|job| generate_scraper_name},
|
276
|
+
'status' => 'done',
|
277
|
+
'created_at' => lambda{|job| Time.now}
|
278
|
+
}
|
279
|
+
end
|
280
|
+
|
281
|
+
# Stored job collection
|
282
|
+
#
|
283
|
+
# @return [AeEasy::Core::SmartCollection]
|
284
|
+
def jobs
|
285
|
+
return @jobs unless @jobs.nil?
|
286
|
+
collection = self.class.new_collection JOB_KEYS,
|
287
|
+
defaults: job_defaults
|
288
|
+
collection.bind_event(:before_defaults) do |collection, raw_item|
|
289
|
+
AeEasy::Core.deep_stringify_keys raw_item
|
290
|
+
end
|
291
|
+
collection.bind_event(:before_insert) do |collection, item, match|
|
292
|
+
item['job_id'] ||= generate_job_id
|
293
|
+
item
|
294
|
+
end
|
295
|
+
@jobs ||= collection
|
110
296
|
end
|
111
297
|
|
112
298
|
# Generate a fake UUID based on page data:
|
@@ -119,10 +305,10 @@ module AeEasy
|
|
119
305
|
# * body
|
120
306
|
# * ua_type
|
121
307
|
#
|
122
|
-
# @param [Hash]
|
308
|
+
# @param [Hash] page_data Page data.
|
123
309
|
#
|
124
310
|
# @return [String]
|
125
|
-
def generate_page_gid
|
311
|
+
def generate_page_gid page_data
|
126
312
|
fields = [
|
127
313
|
'url',
|
128
314
|
'method',
|
@@ -133,8 +319,13 @@ module AeEasy
|
|
133
319
|
'body',
|
134
320
|
'ua_type'
|
135
321
|
]
|
322
|
+
data = page_data.select{|k,v|fields.include? k}
|
323
|
+
data['url'] = self.class.clean_uri data['url']
|
324
|
+
data['headers'] = self.class.format_headers data['headers']
|
325
|
+
data['cookie'] = AeEasy::Core::Helper::Cookie.parse_from_request data['cookie'] unless data['cookie'].nil?
|
136
326
|
seed = data.select{|k,v|fields.include? k}.hash
|
137
|
-
self.class.fake_uuid seed
|
327
|
+
checksum = self.class.fake_uuid seed
|
328
|
+
"#{URI.parse(data['url']).hostname}-#{checksum}"
|
138
329
|
end
|
139
330
|
|
140
331
|
# Get page keys with key generators to emulate saving on db.
|
@@ -142,8 +333,9 @@ module AeEasy
|
|
142
333
|
#
|
143
334
|
# @return [Hash]
|
144
335
|
def page_defaults
|
145
|
-
@
|
336
|
+
@page_defaults ||= {
|
146
337
|
'url' => nil,
|
338
|
+
'job_id' => lambda{|page| job_id},
|
147
339
|
'method' => 'GET',
|
148
340
|
'headers' => {},
|
149
341
|
'fetch_type' => 'standard',
|
@@ -168,7 +360,9 @@ module AeEasy
|
|
168
360
|
collection = self.class.new_collection PAGE_KEYS,
|
169
361
|
defaults: page_defaults
|
170
362
|
collection.bind_event(:before_defaults) do |collection, raw_item|
|
171
|
-
AeEasy::Core.deep_stringify_keys raw_item
|
363
|
+
item = AeEasy::Core.deep_stringify_keys raw_item
|
364
|
+
item.delete 'job_id' unless allow_job_id_override?
|
365
|
+
item
|
172
366
|
end
|
173
367
|
collection.bind_event(:before_insert) do |collection, item, match|
|
174
368
|
if item['gid'].nil? || !allow_page_gid_override?
|
@@ -194,7 +388,7 @@ module AeEasy
|
|
194
388
|
#
|
195
389
|
# @return [Hash]
|
196
390
|
def output_defaults
|
197
|
-
@
|
391
|
+
@output_defaults ||= {
|
198
392
|
'_collection' => DEFAULT_COLLECTION,
|
199
393
|
'_job_id' => lambda{|output| job_id},
|
200
394
|
'_created_at' => lambda{|output| Time.new.strftime('%Y-%m-%dT%H:%M:%SZ')},
|
@@ -210,12 +404,18 @@ module AeEasy
|
|
210
404
|
collection = self.class.new_collection OUTPUT_KEYS,
|
211
405
|
defaults: output_defaults
|
212
406
|
collection.bind_event(:before_defaults) do |collection, raw_item|
|
213
|
-
AeEasy::Core.deep_stringify_keys raw_item
|
407
|
+
item = AeEasy::Core.deep_stringify_keys raw_item
|
408
|
+
item.delete '_job_id' unless allow_job_id_override?
|
409
|
+
item.delete '_gid_id' unless allow_page_gid_override?
|
410
|
+
item
|
214
411
|
end
|
215
412
|
collection.bind_event(:before_insert) do |collection, item, match|
|
216
413
|
item['_id'] ||= generate_output_id item
|
217
414
|
item
|
218
415
|
end
|
416
|
+
collection.bind_event(:after_insert) do |collection, item|
|
417
|
+
ensure item['job_id']
|
418
|
+
end
|
219
419
|
@outputs ||= collection
|
220
420
|
end
|
221
421
|
|
@@ -255,6 +455,8 @@ module AeEasy
|
|
255
455
|
outputs
|
256
456
|
when :pages
|
257
457
|
pages
|
458
|
+
when :jobs
|
459
|
+
jobs
|
258
460
|
else
|
259
461
|
raise ArgumentError.new "Unknown collection #{collection}."
|
260
462
|
end
|
@@ -3,6 +3,9 @@ module AeEasy
|
|
3
3
|
module Mock
|
4
4
|
# Fake executor that emulates `AnswersEngine` executor.
|
5
5
|
module FakeExecutor
|
6
|
+
# Max allowed page size when query outputs (see #find_outputs).
|
7
|
+
MAX_FIND_OUTPUTS_PER_PAGE = 500
|
8
|
+
|
6
9
|
# Page content.
|
7
10
|
# @return [String,nil]
|
8
11
|
attr_accessor :content
|
@@ -92,16 +95,28 @@ module AeEasy
|
|
92
95
|
end
|
93
96
|
@outputs = opts[:outputs]
|
94
97
|
self.job_id = opts[:job_id]
|
98
|
+
self.scraper_name = opts[:scraper_name]
|
95
99
|
self.page = opts[:page]
|
96
100
|
end
|
97
101
|
|
102
|
+
# Fake scraper name used by executor.
|
103
|
+
# @return [Integer,nil]
|
104
|
+
def scraper_name
|
105
|
+
db.scraper_name
|
106
|
+
end
|
107
|
+
|
108
|
+
# Set fake scraper name value.
|
109
|
+
def scraper_name= value
|
110
|
+
db.scraper_name = value
|
111
|
+
end
|
112
|
+
|
98
113
|
# Fake job ID used by executor.
|
99
114
|
# @return [Integer,nil]
|
100
115
|
def job_id
|
101
116
|
db.job_id
|
102
117
|
end
|
103
118
|
|
104
|
-
# Set fake job
|
119
|
+
# Set fake job ID value.
|
105
120
|
def job_id= value
|
106
121
|
db.job_id = value
|
107
122
|
page['job_id'] = value
|
@@ -124,6 +139,11 @@ module AeEasy
|
|
124
139
|
@page = value
|
125
140
|
end
|
126
141
|
|
142
|
+
# Retrive a list of saved jobs.
|
143
|
+
def saved_jobs
|
144
|
+
db.jobs
|
145
|
+
end
|
146
|
+
|
127
147
|
# Retrive a list of saved pages. Drafted pages can be included.
|
128
148
|
def saved_pages
|
129
149
|
db.pages
|
@@ -134,18 +154,29 @@ module AeEasy
|
|
134
154
|
db.outputs
|
135
155
|
end
|
136
156
|
|
137
|
-
# Save a
|
157
|
+
# Save a job collection on db and remove all the element from +list+.
|
158
|
+
#
|
159
|
+
# @param [Array] list Collection of jobs to save.
|
160
|
+
def save_jobs list
|
161
|
+
list.each{|job| db.jobs << job}
|
162
|
+
list.clear
|
163
|
+
end
|
164
|
+
|
165
|
+
# Save a page collection on db and remove all the element from +list+.
|
138
166
|
#
|
139
167
|
# @param [Array] list Collection of pages to save.
|
140
168
|
def save_pages list
|
141
169
|
list.each{|page| db.pages << page}
|
170
|
+
list.clear
|
142
171
|
end
|
143
172
|
|
144
|
-
# Save an output collection on db
|
173
|
+
# Save an output collection on db and remove all the element from
|
174
|
+
# +list+.
|
145
175
|
#
|
146
176
|
# @param [Array] list Collection of outputs to save.
|
147
177
|
def save_outputs list
|
148
178
|
list.each{|output| db.outputs << output}
|
179
|
+
list.clear
|
149
180
|
end
|
150
181
|
|
151
182
|
# Save draft pages into db and clear draft queue.
|
@@ -166,19 +197,71 @@ module AeEasy
|
|
166
197
|
flush_outputs
|
167
198
|
end
|
168
199
|
|
200
|
+
# Get latest job by scraper_name.
|
201
|
+
#
|
202
|
+
# @param [String] scraper_name Scraper name.
|
203
|
+
# @param [Hash] filter ({}) Additional_filters.
|
204
|
+
#
|
205
|
+
# @return [Hash|nil] Return nil if no scraper_name or scraper_name is
|
206
|
+
# nil.
|
207
|
+
def latest_job_by scraper_name, filter = {}
|
208
|
+
return nil if scraper_name.nil?
|
209
|
+
data = db.query :jobs, filter.merge('scraper_name' => scraper_name)
|
210
|
+
data.max{|a,b| a['created_at'] <=> b['created_at']}
|
211
|
+
end
|
212
|
+
|
169
213
|
# Find outputs by collection and query with pagination.
|
170
214
|
#
|
171
215
|
# @param [String] collection ('default') Collection name.
|
172
216
|
# @param [Hash] query ({}) Filters to query.
|
173
217
|
# @param [Integer] page (1) Page number.
|
174
218
|
# @param [Integer] per_page (30) Page size.
|
219
|
+
# @param [Hash] opts ({}) Configuration options.
|
220
|
+
# @option opts [String|nil] :scraper_name (nil) Scraper name to query
|
221
|
+
# from.
|
222
|
+
# @option opts [Integer|nil] :job_id (nil) Job's id to query from.
|
223
|
+
#
|
224
|
+
# @raise [ArgumentError] +collection+ is not String.
|
225
|
+
# @raise [ArgumentError] +query+ is not a Hash.
|
226
|
+
# @raise [ArgumentError] +page+ is not an Integer greater than 0.
|
227
|
+
# @raise [ArgumentError] +per_page+ is not an Integer between 1 and 500.
|
175
228
|
#
|
176
229
|
# @return [Array]
|
177
|
-
|
230
|
+
#
|
231
|
+
# @example
|
232
|
+
# find_outputs
|
233
|
+
# @example
|
234
|
+
# find_outputs 'my_collection'
|
235
|
+
# @example
|
236
|
+
# find_outputs 'my_collection', {}
|
237
|
+
# @example
|
238
|
+
# find_outputs 'my_collection', {}, 1
|
239
|
+
# @example
|
240
|
+
# find_outputs 'my_collection', {}, 1, 30
|
241
|
+
# @example Find from another scraper by name
|
242
|
+
# find_outputs 'my_collection', {}, 1, 30, scraper_name: 'my_scraper'
|
243
|
+
# @example Find from another scraper by job_id
|
244
|
+
# find_outputs 'my_collection', {}, 1, 30, job_id: 123
|
245
|
+
#
|
246
|
+
# @note *opts `:job_id` option is prioritize over `:scraper_name` when
|
247
|
+
# both exists. If none add provided or nil values, then current job
|
248
|
+
# will be used to query instead, this is the defaul behavior.
|
249
|
+
def find_outputs collection = 'default', query = {}, page = 1, per_page = 30, opts = {}
|
250
|
+
raise ArgumentError.new("collection needs to be a String.") unless collection.is_a?(String)
|
251
|
+
raise ArgumentError.new("query needs to be a Hash.") unless query.is_a?(Hash)
|
252
|
+
unless page.is_a?(Integer) && page > 0
|
253
|
+
raise ArgumentError.new("page needs to be an Integer greater than 0.")
|
254
|
+
end
|
255
|
+
unless per_page.is_a?(Integer) && per_page > 0 && per_page <= MAX_FIND_OUTPUTS_PER_PAGE
|
256
|
+
raise ArgumentError.new("per_page needs to be an Integer between 1 and #{MAX_FIND_OUTPUTS_PER_PAGE}.")
|
257
|
+
end
|
258
|
+
|
178
259
|
count = 0
|
179
260
|
offset = (page - 1) * per_page
|
261
|
+
job = latest_job_by(opts[:scraper_name])
|
180
262
|
fixed_query = query.merge(
|
181
|
-
'_collection' => collection
|
263
|
+
'_collection' => collection,
|
264
|
+
'_job_id' => opts[:job_id] || (job.nil? ? job_id : job['job_id'])
|
182
265
|
)
|
183
266
|
db.query :outputs, fixed_query, offset, per_page
|
184
267
|
end
|
@@ -187,10 +270,32 @@ module AeEasy
|
|
187
270
|
#
|
188
271
|
# @param [String] collection ('default') Collection name.
|
189
272
|
# @param [Hash] query ({}) Filters to query.
|
273
|
+
# @param [Hash] opts ({}) Configuration options.
|
274
|
+
# @option opts [String|nil] :scraper_name (nil) Scraper name to query
|
275
|
+
# from.
|
276
|
+
# @option opts [Integer|nil] :job_id (nil) Job's id to query from.
|
277
|
+
#
|
278
|
+
# @raise [ArgumentError] +collection+ is not String.
|
279
|
+
# @raise [ArgumentError] +query+ is not a Hash.
|
190
280
|
#
|
191
281
|
# @return [Hash, nil]
|
192
|
-
|
193
|
-
|
282
|
+
#
|
283
|
+
# @example
|
284
|
+
# find_output
|
285
|
+
# @example
|
286
|
+
# find_output 'my_collection'
|
287
|
+
# @example
|
288
|
+
# find_output 'my_collection', {}
|
289
|
+
# @example Find from another scraper by name
|
290
|
+
# find_output 'my_collection', {}, scraper_name: 'my_scraper'
|
291
|
+
# @example Find from another scraper by job_id
|
292
|
+
# find_output 'my_collection', {}, job_id: 123
|
293
|
+
#
|
294
|
+
# @note *opts `:job_id` option is prioritize over `:scraper_name` when
|
295
|
+
# both exists. If none add provided or nil values, then current job
|
296
|
+
# will be used to query instead, this is the defaul behavior.
|
297
|
+
def find_output collection = 'default', query = {}, opts = {}
|
298
|
+
result = find_outputs(collection, query, 1, 1, opts)
|
194
299
|
result.nil? ? nil : result.first
|
195
300
|
end
|
196
301
|
|
data/lib/ae_easy/core/version.rb
CHANGED
data/lib/ae_easy/core.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ae_easy-core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo Rosales
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-02-
|
11
|
+
date: 2019-02-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: answersengine
|
@@ -16,14 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.2.
|
19
|
+
version: 0.2.32
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.2.
|
26
|
+
version: 0.2.32
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: faker
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -134,7 +148,6 @@ files:
|
|
134
148
|
- ".yardopts"
|
135
149
|
- CODE_OF_CONDUCT.md
|
136
150
|
- Gemfile
|
137
|
-
- Gemfile.lock
|
138
151
|
- LICENSE
|
139
152
|
- README.md
|
140
153
|
- Rakefile
|
data/Gemfile.lock
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
PATH
|
2
|
-
remote: .
|
3
|
-
specs:
|
4
|
-
ae_easy-core (0.0.4)
|
5
|
-
answersengine (>= 0.2.25)
|
6
|
-
|
7
|
-
GEM
|
8
|
-
remote: https://rubygems.org/
|
9
|
-
specs:
|
10
|
-
ansi (1.5.0)
|
11
|
-
answersengine (0.2.31)
|
12
|
-
httparty (~> 0.16.2)
|
13
|
-
nokogiri (~> 1.6, < 1.10)
|
14
|
-
thor (~> 0.20.3)
|
15
|
-
byebug (10.0.2)
|
16
|
-
docile (1.3.1)
|
17
|
-
hirb (0.7.3)
|
18
|
-
httparty (0.16.3)
|
19
|
-
mime-types (~> 3.0)
|
20
|
-
multi_xml (>= 0.5.2)
|
21
|
-
json (2.1.0)
|
22
|
-
mime-types (3.2.2)
|
23
|
-
mime-types-data (~> 3.2015)
|
24
|
-
mime-types-data (3.2018.0812)
|
25
|
-
mini_portile2 (2.4.0)
|
26
|
-
minitest (5.11.3)
|
27
|
-
multi_xml (0.6.0)
|
28
|
-
nokogiri (1.9.1)
|
29
|
-
mini_portile2 (~> 2.4.0)
|
30
|
-
rake (10.5.0)
|
31
|
-
simplecov (0.16.1)
|
32
|
-
docile (~> 1.1)
|
33
|
-
json (>= 1.8, < 3)
|
34
|
-
simplecov-html (~> 0.10.0)
|
35
|
-
simplecov-console (0.4.2)
|
36
|
-
ansi
|
37
|
-
hirb
|
38
|
-
simplecov
|
39
|
-
simplecov-html (0.10.2)
|
40
|
-
thor (0.20.3)
|
41
|
-
timecop (0.9.1)
|
42
|
-
|
43
|
-
PLATFORMS
|
44
|
-
ruby
|
45
|
-
|
46
|
-
DEPENDENCIES
|
47
|
-
ae_easy-core!
|
48
|
-
bundler (>= 1.16.3)
|
49
|
-
byebug
|
50
|
-
minitest (>= 5.11)
|
51
|
-
rake (>= 10.0)
|
52
|
-
simplecov (>= 0.16.1)
|
53
|
-
simplecov-console (>= 0.4.2)
|
54
|
-
timecop (>= 0.9.1)
|
55
|
-
|
56
|
-
BUNDLED WITH
|
57
|
-
1.17.3
|