ae_easy-test 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.travis.yml +7 -0
- data/.yardopts +1 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +65 -0
- data/LICENSE +21 -0
- data/README.md +16 -0
- data/Rakefile +22 -0
- data/ae_easy-test.gemspec +49 -0
- data/doc/AeEasy.html +117 -0
- data/doc/AeEasy/Core.html +117 -0
- data/doc/AeEasy/Core/Mock.html +115 -0
- data/doc/AeEasy/Core/Mock/FakeExecutor.html +2037 -0
- data/doc/AeEasy/Core/Modk.html +105 -0
- data/doc/AeEasy/Core/Plugin.html +117 -0
- data/doc/AeEasy/Core/Plugin/ExecutorBehavior.html +196 -0
- data/doc/AeEasy/Test.html +616 -0
- data/doc/AeEasy/Test/Helper.html +1721 -0
- data/doc/AeEasy/Test/RecordTask.html +2493 -0
- data/doc/_index.html +237 -0
- data/doc/class_list.html +51 -0
- data/doc/css/common.css +1 -0
- data/doc/css/full_list.css +58 -0
- data/doc/css/style.css +496 -0
- data/doc/file.README.html +91 -0
- data/doc/file_list.html +56 -0
- data/doc/frames.html +17 -0
- data/doc/index.html +91 -0
- data/doc/js/app.js +292 -0
- data/doc/js/full_list.js +216 -0
- data/doc/js/jquery.js +4 -0
- data/doc/method_list.html +419 -0
- data/doc/top-level-namespace.html +110 -0
- data/lib/ae_easy/test.rb +52 -0
- data/lib/ae_easy/test/helper.rb +224 -0
- data/lib/ae_easy/test/rake.rb +335 -0
- data/lib/ae_easy/test/version.rb +6 -0
- data/lib/ae_easy_override/core.rb +7 -0
- data/lib/ae_easy_override/core/mock.rb +8 -0
- data/lib/ae_easy_override/core/mock/fake_executor.rb +324 -0
- data/lib/ae_easy_override/core/plugin.rb +8 -0
- data/lib/ae_easy_override/core/plugin/executor_behavior.rb +11 -0
- metadata +201 -0
@@ -0,0 +1,110 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6
|
+
<title>
|
7
|
+
Top Level Namespace
|
8
|
+
|
9
|
+
— Documentation by YARD 0.9.18
|
10
|
+
|
11
|
+
</title>
|
12
|
+
|
13
|
+
<link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
|
14
|
+
|
15
|
+
<link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
|
16
|
+
|
17
|
+
<script type="text/javascript" charset="utf-8">
|
18
|
+
pathId = "";
|
19
|
+
relpath = '';
|
20
|
+
</script>
|
21
|
+
|
22
|
+
|
23
|
+
<script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
|
24
|
+
|
25
|
+
<script type="text/javascript" charset="utf-8" src="js/app.js"></script>
|
26
|
+
|
27
|
+
|
28
|
+
</head>
|
29
|
+
<body>
|
30
|
+
<div class="nav_wrap">
|
31
|
+
<iframe id="nav" src="class_list.html?1"></iframe>
|
32
|
+
<div id="resizer"></div>
|
33
|
+
</div>
|
34
|
+
|
35
|
+
<div id="main" tabindex="-1">
|
36
|
+
<div id="header">
|
37
|
+
<div id="menu">
|
38
|
+
|
39
|
+
<a href="_index.html">Index</a> »
|
40
|
+
|
41
|
+
|
42
|
+
<span class="title">Top Level Namespace</span>
|
43
|
+
|
44
|
+
</div>
|
45
|
+
|
46
|
+
<div id="search">
|
47
|
+
|
48
|
+
<a class="full_list_link" id="class_list_link"
|
49
|
+
href="class_list.html">
|
50
|
+
|
51
|
+
<svg width="24" height="24">
|
52
|
+
<rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
|
53
|
+
<rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
|
54
|
+
<rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
|
55
|
+
</svg>
|
56
|
+
</a>
|
57
|
+
|
58
|
+
</div>
|
59
|
+
<div class="clear"></div>
|
60
|
+
</div>
|
61
|
+
|
62
|
+
<div id="content"><h1>Top Level Namespace
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
</h1>
|
67
|
+
<div class="box_info">
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
</div>
|
80
|
+
|
81
|
+
<h2>Defined Under Namespace</h2>
|
82
|
+
<p class="children">
|
83
|
+
|
84
|
+
|
85
|
+
<strong class="modules">Modules:</strong> <span class='object_link'><a href="AeEasy.html" title="AeEasy (module)">AeEasy</a></span>
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
</p>
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
|
99
|
+
|
100
|
+
</div>
|
101
|
+
|
102
|
+
<div id="footer">
|
103
|
+
Generated on Fri Mar 8 17:49:24 2019 by
|
104
|
+
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
105
|
+
0.9.18 (ruby-2.5.3).
|
106
|
+
</div>
|
107
|
+
|
108
|
+
</div>
|
109
|
+
</body>
|
110
|
+
</html>
|
data/lib/ae_easy/test.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'ae_easy/core'
|
3
|
+
require 'ae_easy_override/core'
|
4
|
+
require 'ae_easy/test/helper'
|
5
|
+
require 'ae_easy/test/rake'
|
6
|
+
require 'ae_easy/test/version'
|
7
|
+
|
8
|
+
module AeEasy
|
9
|
+
module Test
|
10
|
+
# Enable test mode inside executors.
|
11
|
+
def self.enable_test_mode
|
12
|
+
@@test_mode = true
|
13
|
+
end
|
14
|
+
|
15
|
+
# Disable test mode inside executors.
|
16
|
+
def self.disable_test_mode
|
17
|
+
@@test_mode = false
|
18
|
+
end
|
19
|
+
|
20
|
+
# Check if test mode is enabled inside executors.
|
21
|
+
#
|
22
|
+
# @return [Boolean] `true` when test mode enabled, else `false`.
|
23
|
+
def self.test_mode?
|
24
|
+
@@test_mode ||= false
|
25
|
+
end
|
26
|
+
|
27
|
+
# Verbose data log within caller backtrace.
|
28
|
+
#
|
29
|
+
# @param [String] message Message to display.
|
30
|
+
# @param [Object,nil] data (nil) Data to inspect.
|
31
|
+
# @param [Array] log_caller (nil) Log caller. Defaults to method caller.
|
32
|
+
def self.verbose_log message, data = nil, log_caller = nil
|
33
|
+
log_caller ||= caller
|
34
|
+
caller_infos = log_caller.first.split ":"
|
35
|
+
text = data.nil? ? 'nil' : data.inspect
|
36
|
+
puts "\n#{caller_infos[0]}:#{caller_infos[1]} - #{message}#{text}\n\n"
|
37
|
+
end
|
38
|
+
|
39
|
+
# Verbose a match diff.
|
40
|
+
#
|
41
|
+
# @param [Hash] diff Match diff to verbose.
|
42
|
+
# @param [Array] log_caller (nil) Log caller. Defaults to method caller.
|
43
|
+
def self.verbose_match_diff type, diff, log_caller = nil
|
44
|
+
unless diff[:saved].nil? || diff[:saved].count < 1
|
45
|
+
verbose_log "Non matching saved #{type}: ", diff[:saved], log_caller
|
46
|
+
end
|
47
|
+
unless diff[:expected].nil? || diff[:expected].count < 1
|
48
|
+
verbose_log "Non matching expected #{type}: ", diff[:expected], log_caller
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,224 @@
|
|
1
|
+
module AeEasy
|
2
|
+
module Test
|
3
|
+
module Helper
|
4
|
+
# Load and return file contents when exists.
|
5
|
+
#
|
6
|
+
# @param [String] file_path File path to load.
|
7
|
+
# @param [Boolean] should_exists (false) Enforce file existance validation.
|
8
|
+
#
|
9
|
+
# @return [String,nil] File contents.
|
10
|
+
def self.load_file file_path, should_exists = false
|
11
|
+
return nil unless should_exists || File.exists?(file_path)
|
12
|
+
File.open(file_path, 'r', encoding: 'UTF-8').read
|
13
|
+
end
|
14
|
+
|
15
|
+
# Load and return file contents as json when exists.
|
16
|
+
#
|
17
|
+
# @param [String] file_path File path to load.
|
18
|
+
# @param [Boolean] should_exists (false) Enforce file existance validation.
|
19
|
+
#
|
20
|
+
# @return [Hash,nil] Json file contents.
|
21
|
+
def self.load_json_file file_path, should_exists = false
|
22
|
+
file_content = load_file file_path, should_exists
|
23
|
+
return nil if file_content.nil? || file_content.to_s.strip == ''
|
24
|
+
JSON.parse(file_content)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Delete keys from a hash.
|
28
|
+
#
|
29
|
+
# @param [Hash] hash Base hash to exclude from.
|
30
|
+
# @param [Array] keys Keys to exclude.
|
31
|
+
#
|
32
|
+
# @return [Hash]
|
33
|
+
def self.delete_keys_from! hash, keys
|
34
|
+
return hash if keys.nil?
|
35
|
+
keys.each{|k|hash.delete k}
|
36
|
+
hash
|
37
|
+
end
|
38
|
+
|
39
|
+
# Sanitize a copy of the hash provided.
|
40
|
+
#
|
41
|
+
# @param [Hash] raw_hash Hash to sanitize.
|
42
|
+
# @param [Hash] opts ({}) Configuration options.
|
43
|
+
# @option opts [Boolean] :deep_stringify If `true` then stringify all hash
|
44
|
+
# keys including sublevels.
|
45
|
+
# @option opts [Array,nil] :skip_keys (nil) Key array to delete from
|
46
|
+
# sanitized hash clone.
|
47
|
+
#
|
48
|
+
# @return [Hash] Sanitized hash clone.
|
49
|
+
def self.sanitize raw_hash, opts
|
50
|
+
opts = {
|
51
|
+
deep_stringify: true,
|
52
|
+
skip_keys: nil
|
53
|
+
}.merge opts
|
54
|
+
hash = (opts[:deep_stringify]) ?
|
55
|
+
AeEasy::Core.deep_stringify_keys(raw_hash) :
|
56
|
+
AeEasy::Core.deep_clone(raw_hash)
|
57
|
+
delete_keys_from! hash, opts[:skip_keys]
|
58
|
+
end
|
59
|
+
|
60
|
+
# Check if an hash element match the filter.
|
61
|
+
#
|
62
|
+
# @param [Hash] element Element to match.
|
63
|
+
# @param [Hash] filter Filters to apply.
|
64
|
+
# @param [Hash] opts ({}) Configuration options.
|
65
|
+
# @option opts [Boolean] :sanitize (true) Sanitize element and filters
|
66
|
+
# when `true`.
|
67
|
+
# @option opts [Boolean] :deep_stringify If `true` then stringify all hash
|
68
|
+
# keys including sublevels before matching.
|
69
|
+
# @option opts [Boolean] :exact_match (true) Filter should match element
|
70
|
+
# exactly.
|
71
|
+
# @option opts [Array,nil] :skip_keys (nil) Keys to skip on match.
|
72
|
+
#
|
73
|
+
# @return [Boolean] `true` when element match filters, else `false`.
|
74
|
+
def self.match? element, filter, opts = {}
|
75
|
+
opts = {
|
76
|
+
sanitize: true,
|
77
|
+
deep_stringify: true,
|
78
|
+
exact_match: true,
|
79
|
+
skip_keys: nil
|
80
|
+
}.merge opts
|
81
|
+
|
82
|
+
# Sanitize element and filter when need
|
83
|
+
if opts[:sanitize]
|
84
|
+
element = sanitize element, opts
|
85
|
+
filter = sanitize filter, opts
|
86
|
+
end
|
87
|
+
|
88
|
+
# Validate exact match when need
|
89
|
+
exact_match = opts[:exact_match]
|
90
|
+
return false if exact_match && element.keys.count != filter.keys.count
|
91
|
+
|
92
|
+
# Match element filter
|
93
|
+
filter.each do |k,v|
|
94
|
+
return false if exact_match && !element.has_key?(k)
|
95
|
+
return false if element[k] != v
|
96
|
+
end
|
97
|
+
true
|
98
|
+
end
|
99
|
+
|
100
|
+
# Generate a diff over 2 collections.
|
101
|
+
#
|
102
|
+
# @param [Array] items_a List of items to diff.
|
103
|
+
# @param [Array] items_b List of items to diff.
|
104
|
+
# @param [Hash] opts ({}) Configuration options.
|
105
|
+
# @option opts [Boolean] :exact_match (true) Fragmenent should match
|
106
|
+
# element exactly.
|
107
|
+
# @option opts [Boolean] :deep_stringify If `true` then stringify all hash
|
108
|
+
# keys including sublevels before matching.
|
109
|
+
# @option opts [Boolean] :sanitize (true) Sanitize element and filters
|
110
|
+
# when `true`.
|
111
|
+
# @option opts [Array,nil] :skip_keys (nil) Keys to skip on match.
|
112
|
+
# @option opts [Symbol] :compare_way (:both) Comparison way sense:
|
113
|
+
# * `:both` Compare left and right.
|
114
|
+
# * `:right` Compare if `items_a` are inside `items_b`.
|
115
|
+
# * `:left` Compare if `items_b` are inside `items_a`.
|
116
|
+
#
|
117
|
+
# @return [Hash] Diff results as follows:
|
118
|
+
# * `[Array] :items_a` Diff items on `items_a` collection.
|
119
|
+
# * `[Array] :items_b` Diff items on `items_b` collection.
|
120
|
+
# * `[Boolean] :match` `true` when all items match else `false`.
|
121
|
+
def self.collection_diff items_a, items_b, opts = {}
|
122
|
+
raise NotImplementedError.new('Current status WIP, don\'t use it for now.')
|
123
|
+
opts = {
|
124
|
+
exact_match: true,
|
125
|
+
deep_stringify: true,
|
126
|
+
sanitize: true,
|
127
|
+
skip_keys: nil,
|
128
|
+
compare_way: :both
|
129
|
+
}.merge opts
|
130
|
+
|
131
|
+
# Match collections items
|
132
|
+
match = nil
|
133
|
+
compare_right = opts[:compare_way] == :right || opts[:compare_way] == :both
|
134
|
+
compare_left = opts[:compare_way] == :left || opts[:compare_way] == :both
|
135
|
+
items_a = items_a.sort{|a,b|b.keys.count <=> a.keys.count}
|
136
|
+
items_b = items_b.sort{|a,b|b.keys.count <=> a.keys.count}
|
137
|
+
remaining_items = items_b + []
|
138
|
+
not_found = []
|
139
|
+
items_a.each do |item_a|
|
140
|
+
found = remaining_items.find do |item_b|
|
141
|
+
match = false
|
142
|
+
match ||= match?(item_a, item_b, opts) if compare_left
|
143
|
+
match ||= match?(item_b, item_a, opts) if compare_right
|
144
|
+
match
|
145
|
+
end
|
146
|
+
|
147
|
+
# Save diff
|
148
|
+
not_found << item_a if found.nil?
|
149
|
+
remaining_items.delete found
|
150
|
+
end
|
151
|
+
|
152
|
+
# Send diff results
|
153
|
+
{
|
154
|
+
items_a: not_found,
|
155
|
+
items_b: remaining_items,
|
156
|
+
match: (not_found.count < 1 && remaining_items.count < 1)
|
157
|
+
}
|
158
|
+
end
|
159
|
+
|
160
|
+
# Validate when an item collection match universe item collection.
|
161
|
+
#
|
162
|
+
# @param [Array] fragment Fragment of universe items to match.
|
163
|
+
# @param [Array] universe List of items.
|
164
|
+
# @param [Hash] opts ({}) Configuration options.
|
165
|
+
# @option opts [Boolean] :exact_match (true) Fragmenent should match
|
166
|
+
# element exactly.
|
167
|
+
# @option opts [Boolean] :same_count (true) Fragment item count should
|
168
|
+
# match universe item count exactly.
|
169
|
+
# @option opts [Boolean] :deep_stringify If `true` then stringify all hash
|
170
|
+
# keys including sublevels before matching.
|
171
|
+
# @option opts [Boolean] :sanitize (true) Sanitize element and filters
|
172
|
+
# when `true`.
|
173
|
+
# @option opts [Array,nil] :skip_keys (nil) Keys to skip on match.
|
174
|
+
# @option opts [Symbol] :compare_way (:both) Comparison way sense:
|
175
|
+
# * `:both` Compare left and right.
|
176
|
+
# * `:right` Compare if `items_a` are inside `items_b`.
|
177
|
+
# * `:left` Compare if `items_b` are inside `items_a`.
|
178
|
+
#
|
179
|
+
# @return [Boolean]
|
180
|
+
def self.collection_match? fragment, universe, opts = {}
|
181
|
+
opts = {
|
182
|
+
exact_match: true,
|
183
|
+
same_count: true,
|
184
|
+
deep_stringify: true,
|
185
|
+
sanitize: true,
|
186
|
+
skip_keys: nil,
|
187
|
+
compare_way: :both
|
188
|
+
}.merge opts
|
189
|
+
|
190
|
+
# False when item collections count are different
|
191
|
+
return false if (opts[:match_quantity]) && fragment.count != universe.count
|
192
|
+
|
193
|
+
diff = collection_diff fragment, universe, opts
|
194
|
+
match = diff[:items_a].count < 1 && diff[:items_b].count < 1
|
195
|
+
match
|
196
|
+
end
|
197
|
+
|
198
|
+
# Match two collections and calculate diff.
|
199
|
+
#
|
200
|
+
# @param [Array] items_a Item collection to match.
|
201
|
+
# @param [Array] items_b Item collection to match.
|
202
|
+
# @param [Hash] opts ({}) Configuration options.
|
203
|
+
# @option opts [Array] :skip (nil) Keys to skip on match.
|
204
|
+
# @option opts [Symbol] :compare_way (:left) Comparison way sense:
|
205
|
+
# * `:both` Compare left and right.
|
206
|
+
# * `:right` Compare if `items_a` are inside `items_b`.
|
207
|
+
# * `:left` Compare if `items_b` are inside `items_a`.
|
208
|
+
#
|
209
|
+
# @return [Hash] A hash with the following key pairs:
|
210
|
+
# * `[Hash] :diff` Diff results with `:items_a` and `:items_b` keys.
|
211
|
+
# * `[Boolean] :match` `true` when match else `false`.
|
212
|
+
def self.match_collections items_a, items_b, opts = {}
|
213
|
+
diff = collection_diff(
|
214
|
+
items_a,
|
215
|
+
items_b,
|
216
|
+
skip_keys: opts[:skip],
|
217
|
+
compare_way: :both
|
218
|
+
)
|
219
|
+
match = (diff[:items_a].count < 1 && diff[:items_b].count < 1)
|
220
|
+
{diff: diff, match: diff[:match]}
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
@@ -0,0 +1,335 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
module AeEasy
|
4
|
+
module Test
|
5
|
+
# Record rake task generator. It allows AnswersEngine pages snapshots to be
|
6
|
+
# recorded for an easy way to perform integration tests.
|
7
|
+
class RecordTask
|
8
|
+
# Scraper name to be used to get job_id.
|
9
|
+
#
|
10
|
+
# @return [String,nil]
|
11
|
+
attr_accessor :scraper_name
|
12
|
+
|
13
|
+
# Will show logs on stdout when enabled (see #enable_verbose and
|
14
|
+
# #disable_verbose)
|
15
|
+
#
|
16
|
+
# @return [Boolean] `true` when enabled, else `false`.
|
17
|
+
#
|
18
|
+
# @note Default value is `true`.
|
19
|
+
def verbose?
|
20
|
+
@verbose = true if @verbose.nil?
|
21
|
+
@verbose
|
22
|
+
end
|
23
|
+
|
24
|
+
# Enable verbose.
|
25
|
+
def enable_verbose
|
26
|
+
@verbose = true
|
27
|
+
end
|
28
|
+
|
29
|
+
# Disable verbose.
|
30
|
+
def disable_verbose
|
31
|
+
@verbose = false
|
32
|
+
end
|
33
|
+
|
34
|
+
# Job id to be used on page recording.
|
35
|
+
#
|
36
|
+
# @return [Integer,nil]
|
37
|
+
def job_id
|
38
|
+
@job_id ||= nil
|
39
|
+
end
|
40
|
+
|
41
|
+
# Set job id.
|
42
|
+
#
|
43
|
+
# @param [Integer,nil] value Job id.
|
44
|
+
def job_id= value
|
45
|
+
@job_id = value
|
46
|
+
end
|
47
|
+
|
48
|
+
# Log text into stdout when verbose is enabled (see #verbose?).
|
49
|
+
#
|
50
|
+
# @param [String] text Message to be log.
|
51
|
+
def log text
|
52
|
+
puts text unless verbose?
|
53
|
+
end
|
54
|
+
|
55
|
+
# Root directory to record pages. Useful to reduce input map fingerprint.
|
56
|
+
#
|
57
|
+
# @return [String,nil]
|
58
|
+
def root_dir
|
59
|
+
@root ||= nil
|
60
|
+
end
|
61
|
+
|
62
|
+
# An array of input maps to configure what gid record will be saved into
|
63
|
+
# each directory. It uses absolute paths when #root_dir is nil, and
|
64
|
+
# relative paths when it has been assigned.
|
65
|
+
#
|
66
|
+
# @return [Array] Map structure is as follows (see #record_outputs for
|
67
|
+
# details about `input_map[][:filter][:outputs]` options):
|
68
|
+
# ```
|
69
|
+
# [
|
70
|
+
# {
|
71
|
+
# gid:'my-gid-123abc',
|
72
|
+
# dir:'/path/to/input/directory',
|
73
|
+
# record_content: true/false, # Default: true
|
74
|
+
# record_failed_content: true/false, # Default: true,
|
75
|
+
# record_page: true/false, # Default: true
|
76
|
+
# record_vars: true/false, # Default: false
|
77
|
+
# filters: {
|
78
|
+
# outputs: {
|
79
|
+
# # Output filters
|
80
|
+
# }
|
81
|
+
# }
|
82
|
+
# }, {
|
83
|
+
# # ...
|
84
|
+
# }
|
85
|
+
# ]
|
86
|
+
def input_map
|
87
|
+
@input_map ||= []
|
88
|
+
end
|
89
|
+
|
90
|
+
# AnswersEngine executor used to get the data to be recorded.
|
91
|
+
#
|
92
|
+
# @return [AnswersEngine::Scraper::Executor]
|
93
|
+
def executor
|
94
|
+
@executor ||= AnswersEngine::Scraper::Executor.new
|
95
|
+
end
|
96
|
+
|
97
|
+
# Ensures that job_id exists. If #scraper_name is present and no #job_id
|
98
|
+
# was specified, then it will get the latest `job_id` for the
|
99
|
+
# `scraper_name` provided.
|
100
|
+
#
|
101
|
+
# @return [Integer,nil] Job id.
|
102
|
+
def ensure_job_id
|
103
|
+
if job_id.nil && !scraper_name.nil?
|
104
|
+
log "Retriving \"job_id\" from scraper \"#{scraper_name}\""
|
105
|
+
job_id = @executor.get_job_id scraper_name.strip
|
106
|
+
end
|
107
|
+
log(job_id.nil? ? 'No "job_id" was specified.' : "Using \"job_id\" #{job_id}.")
|
108
|
+
job_id
|
109
|
+
end
|
110
|
+
|
111
|
+
# Record a content into a file only when the content is not null. It will
|
112
|
+
# delete the existing file regardless if a new file will be created or
|
113
|
+
# not.
|
114
|
+
#
|
115
|
+
# @param [String] path File path to override.
|
116
|
+
# @param [String,nil] content Content to be saved on the file.
|
117
|
+
# @yieldparam [File] file File to save the data into.
|
118
|
+
def record_file path, content, &block
|
119
|
+
if File.exists? path
|
120
|
+
log "Deleting old \"#{path}\" file..."
|
121
|
+
File.delete path
|
122
|
+
log "Done."
|
123
|
+
end
|
124
|
+
if content.nil? && block.nil?
|
125
|
+
log 'Null content detected, skip file.'
|
126
|
+
return
|
127
|
+
end
|
128
|
+
log "Creating \"#{page}\" file..."
|
129
|
+
File.open(path) do |file|
|
130
|
+
file.write content unless content.nil?
|
131
|
+
block.call file unless block.nil?
|
132
|
+
end
|
133
|
+
log "Done."
|
134
|
+
end
|
135
|
+
|
136
|
+
# Record a page raw content (HTML, XML, excel, zip, etc.) into `content`
|
137
|
+
# file within the provided directory.
|
138
|
+
#
|
139
|
+
# @param [String] gid Page `gid` to retrieve the data from.
|
140
|
+
# @param [String] dir Directory to save file into.
|
141
|
+
def record_content gid, dir
|
142
|
+
content = executor.get_content gid
|
143
|
+
path = File.join(dir, 'content')
|
144
|
+
record_file path, content
|
145
|
+
end
|
146
|
+
|
147
|
+
# Record a page raw failed content (HTML, XML, excel, zip, etc.) into
|
148
|
+
# `failed_content` file within the provided directory.
|
149
|
+
#
|
150
|
+
# @param [String] gid Page `gid` to retrieve the data from.
|
151
|
+
# @param [String] dir Directory to save file into.
|
152
|
+
def record_failed_content gid, dir
|
153
|
+
content = executor.get_failed_content gid
|
154
|
+
path = File.join(dir, 'failed_content')
|
155
|
+
record_file path, content
|
156
|
+
end
|
157
|
+
|
158
|
+
# Record a page's global or job definition (JSON) into `page.json` file
|
159
|
+
# within the provided directory.
|
160
|
+
#
|
161
|
+
# @param [String] gid Page `gid` to retrieve the data from.
|
162
|
+
# @param [String] dir Directory to save file into.
|
163
|
+
#
|
164
|
+
# @note It will prefer job page definition over global page unless no
|
165
|
+
# `job_id` (see #job_id) or `scraper_name` (see #scraper_name) is
|
166
|
+
# defined.
|
167
|
+
def record_page gid, dir
|
168
|
+
if job_id.nil?
|
169
|
+
log 'Warning: No "scraper_name" or "job_id" was specified, will use global page instead job page.'
|
170
|
+
end
|
171
|
+
@executor.gid = gid
|
172
|
+
@executor.job_id = job_id
|
173
|
+
page = @executor.init_page()
|
174
|
+
content = JSON.pretty_generate page
|
175
|
+
path = File.join(dir, 'page.json')
|
176
|
+
record_file path, content
|
177
|
+
end
|
178
|
+
|
179
|
+
# Record a page's vars from job page definition (JSON) into `vars.json`
|
180
|
+
# file within the provided directory.
|
181
|
+
#
|
182
|
+
# @param [String] gid Page `gid` to retrieve the data from.
|
183
|
+
# @param [String] dir Directory to save file into.
|
184
|
+
#
|
185
|
+
# @note It will skip it if no `job_id` (see #job_id) or `scraper_name`
|
186
|
+
# (see #scraper_name) is defined.
|
187
|
+
def record_vars gid, dir
|
188
|
+
if job_id.nil?
|
189
|
+
log 'Warning: No "scraper_name" or "job_id" was specified, will skip vars.'
|
190
|
+
return
|
191
|
+
end
|
192
|
+
@executor.gid = gid
|
193
|
+
@executor.job_id = job_id
|
194
|
+
page = @executor.init_page()
|
195
|
+
content = JSON.pretty_generate page['vars']
|
196
|
+
path = File.join(dir, 'vars.json')
|
197
|
+
record_file path, content
|
198
|
+
end
|
199
|
+
|
200
|
+
# Record a collection of outputs (JSON) into `outputs.json` file within
|
201
|
+
# the provided directory using filters on AnswersEngine executor
|
202
|
+
# `find_outputs` method to retrieve all matching outputs regardless of
|
203
|
+
# pagination.
|
204
|
+
#
|
205
|
+
# @param [Hash, nil] filter (nil) Filters to retrieve `outputs`.
|
206
|
+
# @option filter [String] :collection ('default') Output collection.
|
207
|
+
# @option filter [Hash] :query ({}) Query that outputs should match.
|
208
|
+
# @option filter [Hash] :opts ({}) `find_outputs` configuration options
|
209
|
+
# (see AnswersEngine::Scraper::Executor#find_outputs for details).
|
210
|
+
#
|
211
|
+
# @note Will skip when `nil` is provided as filters.
|
212
|
+
def record_outputs filter = nil
|
213
|
+
if filter.nil?
|
214
|
+
log 'Skip outputs, no filter detected.'
|
215
|
+
return
|
216
|
+
end
|
217
|
+
path = File.join(dir, 'outputs.json')
|
218
|
+
filter = {
|
219
|
+
collection: 'default',
|
220
|
+
query: {},
|
221
|
+
opts: {}
|
222
|
+
}.merge filter
|
223
|
+
|
224
|
+
record_file path, nil do |file|
|
225
|
+
count = 0
|
226
|
+
page = 1
|
227
|
+
outputs = @executor.find_outputs(
|
228
|
+
filter[:collection],
|
229
|
+
filter[:query],
|
230
|
+
page,
|
231
|
+
100,
|
232
|
+
filter[:opts]
|
233
|
+
)
|
234
|
+
|
235
|
+
file.write '['
|
236
|
+
while !outputs.nil? && outputs.count > 0
|
237
|
+
page += 1
|
238
|
+
outputs.each do |output|
|
239
|
+
f.write ',' if count > 0
|
240
|
+
count += 1
|
241
|
+
file.write JSON.pretty_generate(output)
|
242
|
+
end
|
243
|
+
outputs = @executor.find_outputs(
|
244
|
+
filter[:collection],
|
245
|
+
filter[:query],
|
246
|
+
page,
|
247
|
+
100,
|
248
|
+
filter[:opts]
|
249
|
+
)
|
250
|
+
end
|
251
|
+
file.write ']'
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
# Record a page data into a specific directory.
|
256
|
+
#
|
257
|
+
# @param [Hash] map ({}) Input map configuration.
|
258
|
+
# @option map [String] :gid Page `gid` to retrieve the data from.
|
259
|
+
# @option map [String] :dir Directory to save file into.
|
260
|
+
# @option map [Boolean] :record_content (true) Record content when `true`.
|
261
|
+
# @option map [Boolean] :record_failed_content (true) Record failed_cntent
|
262
|
+
# when `true`.
|
263
|
+
# @option map [Boolean] :record_page (true) Record page when `true`.
|
264
|
+
# @option map [Boolean] :record_vars (false) Record vars when `true`.
|
265
|
+
# @option map [Hash] :filters ({outputs:nil}) Filter hash for outputs
|
266
|
+
# recording, will record only when a filter is specify.
|
267
|
+
def record map
|
268
|
+
map = {
|
269
|
+
gid: nil,
|
270
|
+
dir: nil,
|
271
|
+
record_content: true,
|
272
|
+
record_failed_content: true,
|
273
|
+
record_page: true,
|
274
|
+
record_vars: false,
|
275
|
+
filters: {
|
276
|
+
outputs: nil
|
277
|
+
}
|
278
|
+
}.merge map
|
279
|
+
|
280
|
+
gid = map[:gid].to_s.strip
|
281
|
+
raise ArgumentError.new('"gid" can\'t be empty') if gid == ''
|
282
|
+
dir = map[:dir].to_s.strip
|
283
|
+
raise ArgumentError.new('"dir" can\'t be empty') if dir == ''
|
284
|
+
dir = File.join root_dir, dir unless root_dir.nil? || root_dir.strip == ''
|
285
|
+
dir = File.expand_path dir
|
286
|
+
unless Dir.exist? dir
|
287
|
+
raise ArgumentError.new "\"#{dir}\" don't exists or is not a directory."
|
288
|
+
end
|
289
|
+
log "Recording on \"#{dir}\" directory..."
|
290
|
+
|
291
|
+
record_content gid, dir if map[:content]
|
292
|
+
record_failed_content gid, dir if map[:failed_content]
|
293
|
+
record_page gid, dir if map[:page]
|
294
|
+
record_vars gid, dir if map[:vars]
|
295
|
+
|
296
|
+
filters = map[:filters]
|
297
|
+
unless filters.nil?
|
298
|
+
record_outputs filters[:outputs] unless filters[:outputs].nil?
|
299
|
+
end
|
300
|
+
log "Finish recording \"#{dir}\" directory."
|
301
|
+
end
|
302
|
+
|
303
|
+
# Record pages from an input map collection.
|
304
|
+
#
|
305
|
+
# @param [Array] input_map Input map collection (see #input_map for
|
306
|
+
# structure).
|
307
|
+
def record_pages input_map
|
308
|
+
ensure_job_id
|
309
|
+
input_map.each do |map|
|
310
|
+
|
311
|
+
record_page gid, dir, opts
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
# Create the record rake task
|
316
|
+
def create_task
|
317
|
+
namespace 'ae_easy' do
|
318
|
+
desc "Generates input files by gid into the configured directories, use these on context loading."
|
319
|
+
task :record_pages do
|
320
|
+
record_pages input_map
|
321
|
+
end
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
# Initialize record task. Use block to configure record task.
|
326
|
+
#
|
327
|
+
# @yieldparam [AeEasy::Test::RecordTask] task Self.
|
328
|
+
def initialize &block
|
329
|
+
verbose = nil
|
330
|
+
block.call self unless block.nil?
|
331
|
+
create_task
|
332
|
+
end
|
333
|
+
end
|
334
|
+
end
|
335
|
+
end
|