ae_easy-core 0.1.9 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ae_easy-core.gemspec +1 -1
- data/doc/AeEasy.html +116 -116
- data/doc/AeEasy/Core.html +1217 -1217
- data/doc/AeEasy/Core/Config.html +284 -284
- data/doc/AeEasy/Core/Exception.html +116 -116
- data/doc/AeEasy/Core/Exception/OutdatedError.html +132 -132
- data/doc/AeEasy/Core/Helper.html +116 -116
- data/doc/AeEasy/Core/Helper/Cookie.html +797 -797
- data/doc/AeEasy/Core/Mock.html +231 -231
- data/doc/AeEasy/Core/Mock/FakeDb.html +3133 -2729
- data/doc/AeEasy/Core/Mock/FakeExecutor.html +2852 -2259
- data/doc/AeEasy/Core/Mock/FakeFinisher.html +160 -0
- data/doc/AeEasy/Core/Mock/FakeParser.html +157 -157
- data/doc/AeEasy/Core/Mock/FakeSeeder.html +157 -157
- data/doc/AeEasy/Core/Plugin.html +116 -116
- data/doc/AeEasy/Core/Plugin/CollectionVault.html +262 -262
- data/doc/AeEasy/Core/Plugin/ConfigBehavior.html +471 -471
- data/doc/AeEasy/Core/Plugin/ContextIntegrator.html +365 -365
- data/doc/AeEasy/Core/Plugin/Executor.html +243 -243
- data/doc/AeEasy/Core/Plugin/ExecutorBehavior.html +299 -299
- data/doc/AeEasy/Core/Plugin/Finisher.html +265 -0
- data/doc/AeEasy/Core/Plugin/FinisherBehavior.html +142 -0
- data/doc/AeEasy/Core/Plugin/InitializeHook.html +198 -198
- data/doc/AeEasy/Core/Plugin/Parser.html +254 -254
- data/doc/AeEasy/Core/Plugin/ParserBehavior.html +216 -216
- data/doc/AeEasy/Core/Plugin/Seeder.html +594 -594
- data/doc/AeEasy/Core/Plugin/SeederBehavior.html +141 -141
- data/doc/AeEasy/Core/SmartCollection.html +804 -804
- data/doc/_index.html +363 -342
- data/doc/class_list.html +51 -51
- data/doc/css/full_list.css +58 -58
- data/doc/css/style.css +496 -496
- data/doc/file.README.html +70 -70
- data/doc/file_list.html +56 -56
- data/doc/frames.html +17 -17
- data/doc/index.html +70 -70
- data/doc/js/app.js +303 -292
- data/doc/js/full_list.js +216 -216
- data/doc/js/jquery.js +3 -3
- data/doc/method_list.html +939 -851
- data/doc/top-level-namespace.html +109 -109
- data/lib/ae_easy/core/mock.rb +1 -0
- data/lib/ae_easy/core/mock/fake_db.rb +67 -1
- data/lib/ae_easy/core/mock/fake_executor.rb +61 -0
- data/lib/ae_easy/core/mock/fake_finisher.rb +28 -0
- data/lib/ae_easy/core/mock/fake_parser.rb +3 -1
- data/lib/ae_easy/core/plugin.rb +2 -0
- data/lib/ae_easy/core/plugin/finisher.rb +19 -0
- data/lib/ae_easy/core/plugin/finisher_behavior.rb +9 -0
- data/lib/ae_easy/core/version.rb +1 -1
- metadata +10 -4
@@ -1,110 +1,110 @@
|
|
1
|
-
<!DOCTYPE html>
|
2
|
-
<html>
|
3
|
-
<head>
|
4
|
-
<meta charset="utf-8">
|
5
|
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6
|
-
<title>
|
7
|
-
Top Level Namespace
|
8
|
-
|
9
|
-
— Documentation by YARD 0.9.
|
10
|
-
|
11
|
-
</title>
|
12
|
-
|
13
|
-
<link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
|
14
|
-
|
15
|
-
<link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
|
16
|
-
|
17
|
-
<script type="text/javascript" charset="utf-8">
|
18
|
-
pathId = "";
|
19
|
-
relpath = '';
|
20
|
-
</script>
|
21
|
-
|
22
|
-
|
23
|
-
<script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
|
24
|
-
|
25
|
-
<script type="text/javascript" charset="utf-8" src="js/app.js"></script>
|
26
|
-
|
27
|
-
|
28
|
-
</head>
|
29
|
-
<body>
|
30
|
-
<div class="nav_wrap">
|
31
|
-
<iframe id="nav" src="class_list.html?1"></iframe>
|
32
|
-
<div id="resizer"></div>
|
33
|
-
</div>
|
34
|
-
|
35
|
-
<div id="main" tabindex="-1">
|
36
|
-
<div id="header">
|
37
|
-
<div id="menu">
|
38
|
-
|
39
|
-
<a href="_index.html">Index</a> »
|
40
|
-
|
41
|
-
|
42
|
-
<span class="title">Top Level Namespace</span>
|
43
|
-
|
44
|
-
</div>
|
45
|
-
|
46
|
-
<div id="search">
|
47
|
-
|
48
|
-
<a class="full_list_link" id="class_list_link"
|
49
|
-
href="class_list.html">
|
50
|
-
|
51
|
-
<svg width="24" height="24">
|
52
|
-
<rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
|
53
|
-
<rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
|
54
|
-
<rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
|
55
|
-
</svg>
|
56
|
-
</a>
|
57
|
-
|
58
|
-
</div>
|
59
|
-
<div class="clear"></div>
|
60
|
-
</div>
|
61
|
-
|
62
|
-
<div id="content"><h1>Top Level Namespace
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
</h1>
|
67
|
-
<div class="box_info">
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
</div>
|
80
|
-
|
81
|
-
<h2>Defined Under Namespace</h2>
|
82
|
-
<p class="children">
|
83
|
-
|
84
|
-
|
85
|
-
<strong class="modules">Modules:</strong> <span class='object_link'><a href="AeEasy.html" title="AeEasy (module)">AeEasy</a></span>
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
</p>
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
</div>
|
101
|
-
|
102
|
-
<div id="footer">
|
103
|
-
Generated on
|
104
|
-
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
105
|
-
0.9.
|
106
|
-
</div>
|
107
|
-
|
108
|
-
</div>
|
109
|
-
</body>
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6
|
+
<title>
|
7
|
+
Top Level Namespace
|
8
|
+
|
9
|
+
— Documentation by YARD 0.9.20
|
10
|
+
|
11
|
+
</title>
|
12
|
+
|
13
|
+
<link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
|
14
|
+
|
15
|
+
<link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
|
16
|
+
|
17
|
+
<script type="text/javascript" charset="utf-8">
|
18
|
+
pathId = "";
|
19
|
+
relpath = '';
|
20
|
+
</script>
|
21
|
+
|
22
|
+
|
23
|
+
<script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
|
24
|
+
|
25
|
+
<script type="text/javascript" charset="utf-8" src="js/app.js"></script>
|
26
|
+
|
27
|
+
|
28
|
+
</head>
|
29
|
+
<body>
|
30
|
+
<div class="nav_wrap">
|
31
|
+
<iframe id="nav" src="class_list.html?1"></iframe>
|
32
|
+
<div id="resizer"></div>
|
33
|
+
</div>
|
34
|
+
|
35
|
+
<div id="main" tabindex="-1">
|
36
|
+
<div id="header">
|
37
|
+
<div id="menu">
|
38
|
+
|
39
|
+
<a href="_index.html">Index</a> »
|
40
|
+
|
41
|
+
|
42
|
+
<span class="title">Top Level Namespace</span>
|
43
|
+
|
44
|
+
</div>
|
45
|
+
|
46
|
+
<div id="search">
|
47
|
+
|
48
|
+
<a class="full_list_link" id="class_list_link"
|
49
|
+
href="class_list.html">
|
50
|
+
|
51
|
+
<svg width="24" height="24">
|
52
|
+
<rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
|
53
|
+
<rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
|
54
|
+
<rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
|
55
|
+
</svg>
|
56
|
+
</a>
|
57
|
+
|
58
|
+
</div>
|
59
|
+
<div class="clear"></div>
|
60
|
+
</div>
|
61
|
+
|
62
|
+
<div id="content"><h1>Top Level Namespace
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
</h1>
|
67
|
+
<div class="box_info">
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
</div>
|
80
|
+
|
81
|
+
<h2>Defined Under Namespace</h2>
|
82
|
+
<p class="children">
|
83
|
+
|
84
|
+
|
85
|
+
<strong class="modules">Modules:</strong> <span class='object_link'><a href="AeEasy.html" title="AeEasy (module)">AeEasy</a></span>
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
</p>
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
|
99
|
+
|
100
|
+
</div>
|
101
|
+
|
102
|
+
<div id="footer">
|
103
|
+
Generated on Fri Sep 27 02:01:30 2019 by
|
104
|
+
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
105
|
+
0.9.20 (ruby-2.5.3).
|
106
|
+
</div>
|
107
|
+
|
108
|
+
</div>
|
109
|
+
</body>
|
110
110
|
</html>
|
data/lib/ae_easy/core/mock.rb
CHANGED
@@ -152,6 +152,16 @@ module AeEasy
|
|
152
152
|
build_job job, opts
|
153
153
|
end
|
154
154
|
|
155
|
+
# Return a timestamp
|
156
|
+
#
|
157
|
+
# @param [Time] time (nil) Time from which to get time stamp.
|
158
|
+
#
|
159
|
+
# @return [String]
|
160
|
+
def self.time_stamp time = nil
|
161
|
+
time = Time.new if time.nil?
|
162
|
+
time.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
|
163
|
+
end
|
164
|
+
|
155
165
|
# Get current job or create new one from values.
|
156
166
|
#
|
157
167
|
# @param [Integer] target_job_id (nil) Job id to ensure existance.
|
@@ -345,6 +355,7 @@ module AeEasy
|
|
345
355
|
def page_defaults
|
346
356
|
@page_defaults ||= {
|
347
357
|
'url' => nil,
|
358
|
+
'status' => 'to_fetch',
|
348
359
|
'job_id' => lambda{|page| job_id},
|
349
360
|
'method' => 'GET',
|
350
361
|
'headers' => {},
|
@@ -353,6 +364,8 @@ module AeEasy
|
|
353
364
|
'no_redirect' => false,
|
354
365
|
'body' => nil,
|
355
366
|
'ua_type' => 'desktop',
|
367
|
+
'no_url_encode' => false,
|
368
|
+
'http2' => false,
|
356
369
|
'vars' => {}
|
357
370
|
}
|
358
371
|
end
|
@@ -404,7 +417,7 @@ module AeEasy
|
|
404
417
|
@output_defaults ||= {
|
405
418
|
'_collection' => DEFAULT_COLLECTION,
|
406
419
|
'_job_id' => lambda{|output| job_id},
|
407
|
-
'_created_at' => lambda{|output|
|
420
|
+
'_created_at' => lambda{|output| self.class.time_stamp},
|
408
421
|
'_gid' => lambda{|output| page_gid}
|
409
422
|
}
|
410
423
|
end
|
@@ -489,6 +502,59 @@ module AeEasy
|
|
489
502
|
end
|
490
503
|
matches
|
491
504
|
end
|
505
|
+
|
506
|
+
# Refetch a page.
|
507
|
+
#
|
508
|
+
# @param [Integer] job_id Page's job_id to refetch.
|
509
|
+
# @param [String] gid Page's gid to refetch.
|
510
|
+
def refetch job_id, gid
|
511
|
+
page = pages.find_match('gid' => gid, 'job_id' => job_id)
|
512
|
+
raise Exception.new("Page not found with job_id \"#{job_id}\" gid \"#{gid}\"") if page.nil?
|
513
|
+
page['status'] = 'to_fetch'
|
514
|
+
page['freshness'] = self.class.time_stamp
|
515
|
+
page['to_fetch'] = self.class.time_stamp
|
516
|
+
page['fetched_from'] = nil
|
517
|
+
page['fetching_at'] = '2001-01-01T00:00:00Z'
|
518
|
+
page['fetched_at'] = nil
|
519
|
+
page['fetching_try_count'] = 0
|
520
|
+
page['effective_url'] = nil
|
521
|
+
page['parsing_at'] = nil
|
522
|
+
page['parsing_failed_at'] = nil
|
523
|
+
page['parsed_at'] = nil
|
524
|
+
page['parsing_try_count'] = 0
|
525
|
+
page['parsing_fail_count'] = 0
|
526
|
+
page['parsing_updated_at'] = '2001-01-01T00:00:00Z'
|
527
|
+
page['response_checksum'] = nil
|
528
|
+
page['response_status'] = nil
|
529
|
+
page['response_status_code'] = nil
|
530
|
+
page['response_headers'] = nil
|
531
|
+
page['response_cookie'] = nil
|
532
|
+
page['response_proto'] = nil
|
533
|
+
page['content_type'] = nil
|
534
|
+
page['content_size'] = 0
|
535
|
+
page['failed_response_status_code'] = nil
|
536
|
+
page['failed_response_headers'] = nil
|
537
|
+
page['failed_response_cookie'] = nil
|
538
|
+
page['failed_effective_url'] = nil
|
539
|
+
page['failed_at'] = nil
|
540
|
+
page['failed_content_type'] = nil
|
541
|
+
end
|
542
|
+
|
543
|
+
# Reparse a page.
|
544
|
+
#
|
545
|
+
# @param [Integer] job_id Page's job_id to reparse.
|
546
|
+
# @param [String] gid Page's gid to reparse.
|
547
|
+
def reparse job_id, gid
|
548
|
+
page = pages.find_match('gid' => gid, 'job_id' => job_id)
|
549
|
+
raise Exception.new("Page not found with job_id \"#{job_id}\" gid \"#{gid}\"") if page.nil?
|
550
|
+
page['status'] = 'to_parse'
|
551
|
+
page['parsing_at'] = nil
|
552
|
+
page['parsing_failed_at'] = nil
|
553
|
+
page['parsing_updated_at'] = '2001-01-01T00:00:00Z'
|
554
|
+
page['parsed_at'] = nil
|
555
|
+
page['parsing_try_count'] = 0
|
556
|
+
page['parsing_fail_count'] = 0
|
557
|
+
end
|
492
558
|
end
|
493
559
|
end
|
494
560
|
end
|
@@ -139,6 +139,29 @@ module AeEasy
|
|
139
139
|
@page = value
|
140
140
|
end
|
141
141
|
|
142
|
+
# Refetch self page flag.
|
143
|
+
# @return [Boollean]
|
144
|
+
# @note It is stronger than #reparse_self flag.
|
145
|
+
def refetch_self
|
146
|
+
@refetch_self ||= false
|
147
|
+
end
|
148
|
+
|
149
|
+
# Set refetch self page flag.
|
150
|
+
def refetch_self= value
|
151
|
+
@refetch_self = value
|
152
|
+
end
|
153
|
+
|
154
|
+
# Reparse self page flag.
|
155
|
+
# @return [Boollean]
|
156
|
+
def reparse_self
|
157
|
+
@reparse_self ||= false
|
158
|
+
end
|
159
|
+
|
160
|
+
# Set reparse self page flag.
|
161
|
+
def reparse_self= value
|
162
|
+
@reparse_self = value
|
163
|
+
end
|
164
|
+
|
142
165
|
# Retrive a list of saved jobs.
|
143
166
|
def saved_jobs
|
144
167
|
db.jobs
|
@@ -179,6 +202,19 @@ module AeEasy
|
|
179
202
|
list.clear
|
180
203
|
end
|
181
204
|
|
205
|
+
# Execute any action applied to current page
|
206
|
+
def flush_self_actions
|
207
|
+
# Save page current page before refetch/reparse
|
208
|
+
if refetch_self || reparse_self
|
209
|
+
temp_page_gid_override = !db.allow_page_gid_override?
|
210
|
+
db.enable_page_gid_override if temp_page_gid_override
|
211
|
+
save_pages [page]
|
212
|
+
db.disable_page_gid_override if temp_page_gid_override
|
213
|
+
end
|
214
|
+
db.refetch(page['job_id'], page['gid']) if refetch_self
|
215
|
+
db.reparse(page['job_id'], page['gid']) if reparse_self
|
216
|
+
end
|
217
|
+
|
182
218
|
# Save draft pages into db and clear draft queue.
|
183
219
|
def flush_pages
|
184
220
|
save_pages pages
|
@@ -195,6 +231,7 @@ module AeEasy
|
|
195
231
|
def flush
|
196
232
|
flush_pages
|
197
233
|
flush_outputs
|
234
|
+
flush_self_actions
|
198
235
|
end
|
199
236
|
|
200
237
|
# Get latest job by scraper_name.
|
@@ -306,6 +343,30 @@ module AeEasy
|
|
306
343
|
eval(File.read(file_path), isolated_binding(vars), file_path)
|
307
344
|
flush
|
308
345
|
end
|
346
|
+
|
347
|
+
# Refetch a page by gid.
|
348
|
+
#
|
349
|
+
# @param [String] gid Page's gid to refetch.
|
350
|
+
def refetch gid
|
351
|
+
raise ArgumentError.new("gid needs to be a String.") unless gid.is_a?(String)
|
352
|
+
if page['gid'] == gid
|
353
|
+
self.refetch_self = true
|
354
|
+
return
|
355
|
+
end
|
356
|
+
db.refetch(job_id, gid)
|
357
|
+
end
|
358
|
+
|
359
|
+
# Reparse a page by gid.
|
360
|
+
#
|
361
|
+
# @param [String] page_gid Page's gid to reparse.
|
362
|
+
def reparse page_gid
|
363
|
+
raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
|
364
|
+
if page['gid'] == page_gid
|
365
|
+
self.reparse_self = true
|
366
|
+
return
|
367
|
+
end
|
368
|
+
db.reparse(job_id, page_gid)
|
369
|
+
end
|
309
370
|
end
|
310
371
|
end
|
311
372
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module AeEasy
|
2
|
+
module Core
|
3
|
+
module Mock
|
4
|
+
# Fake finisher that emulates `AnswersEngine` finisher executor.
|
5
|
+
class FakeFinisher
|
6
|
+
include AeEasy::Core::Mock::FakeExecutor
|
7
|
+
|
8
|
+
# Fake finisher exposed methods to isolated context.
|
9
|
+
# @private
|
10
|
+
#
|
11
|
+
# @return [Array]
|
12
|
+
def self.exposed_methods
|
13
|
+
real_methods = AnswersEngine::Scraper::RubyFinisherExecutor.exposed_methods.uniq
|
14
|
+
mock_methods = [
|
15
|
+
:outputs,
|
16
|
+
:save_outputs,
|
17
|
+
:find_output,
|
18
|
+
:find_outputs
|
19
|
+
]
|
20
|
+
AeEasy::Core::Mock::FakeExecutor.check_compatibility real_methods, mock_methods
|
21
|
+
mock_methods << :job_id
|
22
|
+
mock_methods.freeze
|
23
|
+
mock_methods
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|