ae_easy-core 0.1.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ae_easy-core.gemspec +1 -1
- data/doc/AeEasy.html +116 -116
- data/doc/AeEasy/Core.html +1217 -1217
- data/doc/AeEasy/Core/Config.html +284 -284
- data/doc/AeEasy/Core/Exception.html +116 -116
- data/doc/AeEasy/Core/Exception/OutdatedError.html +132 -132
- data/doc/AeEasy/Core/Helper.html +116 -116
- data/doc/AeEasy/Core/Helper/Cookie.html +797 -797
- data/doc/AeEasy/Core/Mock.html +231 -231
- data/doc/AeEasy/Core/Mock/FakeDb.html +3133 -2729
- data/doc/AeEasy/Core/Mock/FakeExecutor.html +2852 -2259
- data/doc/AeEasy/Core/Mock/FakeFinisher.html +160 -0
- data/doc/AeEasy/Core/Mock/FakeParser.html +157 -157
- data/doc/AeEasy/Core/Mock/FakeSeeder.html +157 -157
- data/doc/AeEasy/Core/Plugin.html +116 -116
- data/doc/AeEasy/Core/Plugin/CollectionVault.html +262 -262
- data/doc/AeEasy/Core/Plugin/ConfigBehavior.html +471 -471
- data/doc/AeEasy/Core/Plugin/ContextIntegrator.html +365 -365
- data/doc/AeEasy/Core/Plugin/Executor.html +243 -243
- data/doc/AeEasy/Core/Plugin/ExecutorBehavior.html +299 -299
- data/doc/AeEasy/Core/Plugin/Finisher.html +265 -0
- data/doc/AeEasy/Core/Plugin/FinisherBehavior.html +142 -0
- data/doc/AeEasy/Core/Plugin/InitializeHook.html +198 -198
- data/doc/AeEasy/Core/Plugin/Parser.html +254 -254
- data/doc/AeEasy/Core/Plugin/ParserBehavior.html +216 -216
- data/doc/AeEasy/Core/Plugin/Seeder.html +594 -594
- data/doc/AeEasy/Core/Plugin/SeederBehavior.html +141 -141
- data/doc/AeEasy/Core/SmartCollection.html +804 -804
- data/doc/_index.html +363 -342
- data/doc/class_list.html +51 -51
- data/doc/css/full_list.css +58 -58
- data/doc/css/style.css +496 -496
- data/doc/file.README.html +70 -70
- data/doc/file_list.html +56 -56
- data/doc/frames.html +17 -17
- data/doc/index.html +70 -70
- data/doc/js/app.js +303 -292
- data/doc/js/full_list.js +216 -216
- data/doc/js/jquery.js +3 -3
- data/doc/method_list.html +939 -851
- data/doc/top-level-namespace.html +109 -109
- data/lib/ae_easy/core/mock.rb +1 -0
- data/lib/ae_easy/core/mock/fake_db.rb +67 -1
- data/lib/ae_easy/core/mock/fake_executor.rb +61 -0
- data/lib/ae_easy/core/mock/fake_finisher.rb +28 -0
- data/lib/ae_easy/core/mock/fake_parser.rb +3 -1
- data/lib/ae_easy/core/plugin.rb +2 -0
- data/lib/ae_easy/core/plugin/finisher.rb +19 -0
- data/lib/ae_easy/core/plugin/finisher_behavior.rb +9 -0
- data/lib/ae_easy/core/version.rb +1 -1
- metadata +10 -4
@@ -1,110 +1,110 @@
|
|
1
|
-
<!DOCTYPE html>
|
2
|
-
<html>
|
3
|
-
<head>
|
4
|
-
<meta charset="utf-8">
|
5
|
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6
|
-
<title>
|
7
|
-
Top Level Namespace
|
8
|
-
|
9
|
-
— Documentation by YARD 0.9.
|
10
|
-
|
11
|
-
</title>
|
12
|
-
|
13
|
-
<link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
|
14
|
-
|
15
|
-
<link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
|
16
|
-
|
17
|
-
<script type="text/javascript" charset="utf-8">
|
18
|
-
pathId = "";
|
19
|
-
relpath = '';
|
20
|
-
</script>
|
21
|
-
|
22
|
-
|
23
|
-
<script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
|
24
|
-
|
25
|
-
<script type="text/javascript" charset="utf-8" src="js/app.js"></script>
|
26
|
-
|
27
|
-
|
28
|
-
</head>
|
29
|
-
<body>
|
30
|
-
<div class="nav_wrap">
|
31
|
-
<iframe id="nav" src="class_list.html?1"></iframe>
|
32
|
-
<div id="resizer"></div>
|
33
|
-
</div>
|
34
|
-
|
35
|
-
<div id="main" tabindex="-1">
|
36
|
-
<div id="header">
|
37
|
-
<div id="menu">
|
38
|
-
|
39
|
-
<a href="_index.html">Index</a> »
|
40
|
-
|
41
|
-
|
42
|
-
<span class="title">Top Level Namespace</span>
|
43
|
-
|
44
|
-
</div>
|
45
|
-
|
46
|
-
<div id="search">
|
47
|
-
|
48
|
-
<a class="full_list_link" id="class_list_link"
|
49
|
-
href="class_list.html">
|
50
|
-
|
51
|
-
<svg width="24" height="24">
|
52
|
-
<rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
|
53
|
-
<rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
|
54
|
-
<rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
|
55
|
-
</svg>
|
56
|
-
</a>
|
57
|
-
|
58
|
-
</div>
|
59
|
-
<div class="clear"></div>
|
60
|
-
</div>
|
61
|
-
|
62
|
-
<div id="content"><h1>Top Level Namespace
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
</h1>
|
67
|
-
<div class="box_info">
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
</div>
|
80
|
-
|
81
|
-
<h2>Defined Under Namespace</h2>
|
82
|
-
<p class="children">
|
83
|
-
|
84
|
-
|
85
|
-
<strong class="modules">Modules:</strong> <span class='object_link'><a href="AeEasy.html" title="AeEasy (module)">AeEasy</a></span>
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
</p>
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
</div>
|
101
|
-
|
102
|
-
<div id="footer">
|
103
|
-
Generated on
|
104
|
-
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
105
|
-
0.9.
|
106
|
-
</div>
|
107
|
-
|
108
|
-
</div>
|
109
|
-
</body>
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6
|
+
<title>
|
7
|
+
Top Level Namespace
|
8
|
+
|
9
|
+
— Documentation by YARD 0.9.20
|
10
|
+
|
11
|
+
</title>
|
12
|
+
|
13
|
+
<link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
|
14
|
+
|
15
|
+
<link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
|
16
|
+
|
17
|
+
<script type="text/javascript" charset="utf-8">
|
18
|
+
pathId = "";
|
19
|
+
relpath = '';
|
20
|
+
</script>
|
21
|
+
|
22
|
+
|
23
|
+
<script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
|
24
|
+
|
25
|
+
<script type="text/javascript" charset="utf-8" src="js/app.js"></script>
|
26
|
+
|
27
|
+
|
28
|
+
</head>
|
29
|
+
<body>
|
30
|
+
<div class="nav_wrap">
|
31
|
+
<iframe id="nav" src="class_list.html?1"></iframe>
|
32
|
+
<div id="resizer"></div>
|
33
|
+
</div>
|
34
|
+
|
35
|
+
<div id="main" tabindex="-1">
|
36
|
+
<div id="header">
|
37
|
+
<div id="menu">
|
38
|
+
|
39
|
+
<a href="_index.html">Index</a> »
|
40
|
+
|
41
|
+
|
42
|
+
<span class="title">Top Level Namespace</span>
|
43
|
+
|
44
|
+
</div>
|
45
|
+
|
46
|
+
<div id="search">
|
47
|
+
|
48
|
+
<a class="full_list_link" id="class_list_link"
|
49
|
+
href="class_list.html">
|
50
|
+
|
51
|
+
<svg width="24" height="24">
|
52
|
+
<rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
|
53
|
+
<rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
|
54
|
+
<rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
|
55
|
+
</svg>
|
56
|
+
</a>
|
57
|
+
|
58
|
+
</div>
|
59
|
+
<div class="clear"></div>
|
60
|
+
</div>
|
61
|
+
|
62
|
+
<div id="content"><h1>Top Level Namespace
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
</h1>
|
67
|
+
<div class="box_info">
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
</div>
|
80
|
+
|
81
|
+
<h2>Defined Under Namespace</h2>
|
82
|
+
<p class="children">
|
83
|
+
|
84
|
+
|
85
|
+
<strong class="modules">Modules:</strong> <span class='object_link'><a href="AeEasy.html" title="AeEasy (module)">AeEasy</a></span>
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
</p>
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
|
99
|
+
|
100
|
+
</div>
|
101
|
+
|
102
|
+
<div id="footer">
|
103
|
+
Generated on Fri Sep 27 02:01:30 2019 by
|
104
|
+
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
105
|
+
0.9.20 (ruby-2.5.3).
|
106
|
+
</div>
|
107
|
+
|
108
|
+
</div>
|
109
|
+
</body>
|
110
110
|
</html>
|
data/lib/ae_easy/core/mock.rb
CHANGED
@@ -152,6 +152,16 @@ module AeEasy
|
|
152
152
|
build_job job, opts
|
153
153
|
end
|
154
154
|
|
155
|
+
# Return a timestamp
|
156
|
+
#
|
157
|
+
# @param [Time] time (nil) Time from which to get time stamp.
|
158
|
+
#
|
159
|
+
# @return [String]
|
160
|
+
def self.time_stamp time = nil
|
161
|
+
time = Time.new if time.nil?
|
162
|
+
time.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
|
163
|
+
end
|
164
|
+
|
155
165
|
# Get current job or create new one from values.
|
156
166
|
#
|
157
167
|
# @param [Integer] target_job_id (nil) Job id to ensure existance.
|
@@ -345,6 +355,7 @@ module AeEasy
|
|
345
355
|
def page_defaults
|
346
356
|
@page_defaults ||= {
|
347
357
|
'url' => nil,
|
358
|
+
'status' => 'to_fetch',
|
348
359
|
'job_id' => lambda{|page| job_id},
|
349
360
|
'method' => 'GET',
|
350
361
|
'headers' => {},
|
@@ -353,6 +364,8 @@ module AeEasy
|
|
353
364
|
'no_redirect' => false,
|
354
365
|
'body' => nil,
|
355
366
|
'ua_type' => 'desktop',
|
367
|
+
'no_url_encode' => false,
|
368
|
+
'http2' => false,
|
356
369
|
'vars' => {}
|
357
370
|
}
|
358
371
|
end
|
@@ -404,7 +417,7 @@ module AeEasy
|
|
404
417
|
@output_defaults ||= {
|
405
418
|
'_collection' => DEFAULT_COLLECTION,
|
406
419
|
'_job_id' => lambda{|output| job_id},
|
407
|
-
'_created_at' => lambda{|output|
|
420
|
+
'_created_at' => lambda{|output| self.class.time_stamp},
|
408
421
|
'_gid' => lambda{|output| page_gid}
|
409
422
|
}
|
410
423
|
end
|
@@ -489,6 +502,59 @@ module AeEasy
|
|
489
502
|
end
|
490
503
|
matches
|
491
504
|
end
|
505
|
+
|
506
|
+
# Refetch a page.
|
507
|
+
#
|
508
|
+
# @param [Integer] job_id Page's job_id to refetch.
|
509
|
+
# @param [String] gid Page's gid to refetch.
|
510
|
+
def refetch job_id, gid
|
511
|
+
page = pages.find_match('gid' => gid, 'job_id' => job_id)
|
512
|
+
raise Exception.new("Page not found with job_id \"#{job_id}\" gid \"#{gid}\"") if page.nil?
|
513
|
+
page['status'] = 'to_fetch'
|
514
|
+
page['freshness'] = self.class.time_stamp
|
515
|
+
page['to_fetch'] = self.class.time_stamp
|
516
|
+
page['fetched_from'] = nil
|
517
|
+
page['fetching_at'] = '2001-01-01T00:00:00Z'
|
518
|
+
page['fetched_at'] = nil
|
519
|
+
page['fetching_try_count'] = 0
|
520
|
+
page['effective_url'] = nil
|
521
|
+
page['parsing_at'] = nil
|
522
|
+
page['parsing_failed_at'] = nil
|
523
|
+
page['parsed_at'] = nil
|
524
|
+
page['parsing_try_count'] = 0
|
525
|
+
page['parsing_fail_count'] = 0
|
526
|
+
page['parsing_updated_at'] = '2001-01-01T00:00:00Z'
|
527
|
+
page['response_checksum'] = nil
|
528
|
+
page['response_status'] = nil
|
529
|
+
page['response_status_code'] = nil
|
530
|
+
page['response_headers'] = nil
|
531
|
+
page['response_cookie'] = nil
|
532
|
+
page['response_proto'] = nil
|
533
|
+
page['content_type'] = nil
|
534
|
+
page['content_size'] = 0
|
535
|
+
page['failed_response_status_code'] = nil
|
536
|
+
page['failed_response_headers'] = nil
|
537
|
+
page['failed_response_cookie'] = nil
|
538
|
+
page['failed_effective_url'] = nil
|
539
|
+
page['failed_at'] = nil
|
540
|
+
page['failed_content_type'] = nil
|
541
|
+
end
|
542
|
+
|
543
|
+
# Reparse a page.
|
544
|
+
#
|
545
|
+
# @param [Integer] job_id Page's job_id to reparse.
|
546
|
+
# @param [String] gid Page's gid to reparse.
|
547
|
+
def reparse job_id, gid
|
548
|
+
page = pages.find_match('gid' => gid, 'job_id' => job_id)
|
549
|
+
raise Exception.new("Page not found with job_id \"#{job_id}\" gid \"#{gid}\"") if page.nil?
|
550
|
+
page['status'] = 'to_parse'
|
551
|
+
page['parsing_at'] = nil
|
552
|
+
page['parsing_failed_at'] = nil
|
553
|
+
page['parsing_updated_at'] = '2001-01-01T00:00:00Z'
|
554
|
+
page['parsed_at'] = nil
|
555
|
+
page['parsing_try_count'] = 0
|
556
|
+
page['parsing_fail_count'] = 0
|
557
|
+
end
|
492
558
|
end
|
493
559
|
end
|
494
560
|
end
|
@@ -139,6 +139,29 @@ module AeEasy
|
|
139
139
|
@page = value
|
140
140
|
end
|
141
141
|
|
142
|
+
# Refetch self page flag.
|
143
|
+
# @return [Boollean]
|
144
|
+
# @note It is stronger than #reparse_self flag.
|
145
|
+
def refetch_self
|
146
|
+
@refetch_self ||= false
|
147
|
+
end
|
148
|
+
|
149
|
+
# Set refetch self page flag.
|
150
|
+
def refetch_self= value
|
151
|
+
@refetch_self = value
|
152
|
+
end
|
153
|
+
|
154
|
+
# Reparse self page flag.
|
155
|
+
# @return [Boollean]
|
156
|
+
def reparse_self
|
157
|
+
@reparse_self ||= false
|
158
|
+
end
|
159
|
+
|
160
|
+
# Set reparse self page flag.
|
161
|
+
def reparse_self= value
|
162
|
+
@reparse_self = value
|
163
|
+
end
|
164
|
+
|
142
165
|
# Retrive a list of saved jobs.
|
143
166
|
def saved_jobs
|
144
167
|
db.jobs
|
@@ -179,6 +202,19 @@ module AeEasy
|
|
179
202
|
list.clear
|
180
203
|
end
|
181
204
|
|
205
|
+
# Execute any action applied to current page
|
206
|
+
def flush_self_actions
|
207
|
+
# Save page current page before refetch/reparse
|
208
|
+
if refetch_self || reparse_self
|
209
|
+
temp_page_gid_override = !db.allow_page_gid_override?
|
210
|
+
db.enable_page_gid_override if temp_page_gid_override
|
211
|
+
save_pages [page]
|
212
|
+
db.disable_page_gid_override if temp_page_gid_override
|
213
|
+
end
|
214
|
+
db.refetch(page['job_id'], page['gid']) if refetch_self
|
215
|
+
db.reparse(page['job_id'], page['gid']) if reparse_self
|
216
|
+
end
|
217
|
+
|
182
218
|
# Save draft pages into db and clear draft queue.
|
183
219
|
def flush_pages
|
184
220
|
save_pages pages
|
@@ -195,6 +231,7 @@ module AeEasy
|
|
195
231
|
def flush
|
196
232
|
flush_pages
|
197
233
|
flush_outputs
|
234
|
+
flush_self_actions
|
198
235
|
end
|
199
236
|
|
200
237
|
# Get latest job by scraper_name.
|
@@ -306,6 +343,30 @@ module AeEasy
|
|
306
343
|
eval(File.read(file_path), isolated_binding(vars), file_path)
|
307
344
|
flush
|
308
345
|
end
|
346
|
+
|
347
|
+
# Refetch a page by gid.
|
348
|
+
#
|
349
|
+
# @param [String] gid Page's gid to refetch.
|
350
|
+
def refetch gid
|
351
|
+
raise ArgumentError.new("gid needs to be a String.") unless gid.is_a?(String)
|
352
|
+
if page['gid'] == gid
|
353
|
+
self.refetch_self = true
|
354
|
+
return
|
355
|
+
end
|
356
|
+
db.refetch(job_id, gid)
|
357
|
+
end
|
358
|
+
|
359
|
+
# Reparse a page by gid.
|
360
|
+
#
|
361
|
+
# @param [String] page_gid Page's gid to reparse.
|
362
|
+
def reparse page_gid
|
363
|
+
raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
|
364
|
+
if page['gid'] == page_gid
|
365
|
+
self.reparse_self = true
|
366
|
+
return
|
367
|
+
end
|
368
|
+
db.reparse(job_id, page_gid)
|
369
|
+
end
|
309
370
|
end
|
310
371
|
end
|
311
372
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module AeEasy
|
2
|
+
module Core
|
3
|
+
module Mock
|
4
|
+
# Fake finisher that emulates `AnswersEngine` finisher executor.
|
5
|
+
class FakeFinisher
|
6
|
+
include AeEasy::Core::Mock::FakeExecutor
|
7
|
+
|
8
|
+
# Fake finisher exposed methods to isolated context.
|
9
|
+
# @private
|
10
|
+
#
|
11
|
+
# @return [Array]
|
12
|
+
def self.exposed_methods
|
13
|
+
real_methods = AnswersEngine::Scraper::RubyFinisherExecutor.exposed_methods.uniq
|
14
|
+
mock_methods = [
|
15
|
+
:outputs,
|
16
|
+
:save_outputs,
|
17
|
+
:find_output,
|
18
|
+
:find_outputs
|
19
|
+
]
|
20
|
+
AeEasy::Core::Mock::FakeExecutor.check_compatibility real_methods, mock_methods
|
21
|
+
mock_methods << :job_id
|
22
|
+
mock_methods.freeze
|
23
|
+
mock_methods
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|