ae_easy-core 0.1.9 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ae_easy-core.gemspec +1 -1
  4. data/doc/AeEasy.html +116 -116
  5. data/doc/AeEasy/Core.html +1217 -1217
  6. data/doc/AeEasy/Core/Config.html +284 -284
  7. data/doc/AeEasy/Core/Exception.html +116 -116
  8. data/doc/AeEasy/Core/Exception/OutdatedError.html +132 -132
  9. data/doc/AeEasy/Core/Helper.html +116 -116
  10. data/doc/AeEasy/Core/Helper/Cookie.html +797 -797
  11. data/doc/AeEasy/Core/Mock.html +231 -231
  12. data/doc/AeEasy/Core/Mock/FakeDb.html +3133 -2729
  13. data/doc/AeEasy/Core/Mock/FakeExecutor.html +2852 -2259
  14. data/doc/AeEasy/Core/Mock/FakeFinisher.html +160 -0
  15. data/doc/AeEasy/Core/Mock/FakeParser.html +157 -157
  16. data/doc/AeEasy/Core/Mock/FakeSeeder.html +157 -157
  17. data/doc/AeEasy/Core/Plugin.html +116 -116
  18. data/doc/AeEasy/Core/Plugin/CollectionVault.html +262 -262
  19. data/doc/AeEasy/Core/Plugin/ConfigBehavior.html +471 -471
  20. data/doc/AeEasy/Core/Plugin/ContextIntegrator.html +365 -365
  21. data/doc/AeEasy/Core/Plugin/Executor.html +243 -243
  22. data/doc/AeEasy/Core/Plugin/ExecutorBehavior.html +299 -299
  23. data/doc/AeEasy/Core/Plugin/Finisher.html +265 -0
  24. data/doc/AeEasy/Core/Plugin/FinisherBehavior.html +142 -0
  25. data/doc/AeEasy/Core/Plugin/InitializeHook.html +198 -198
  26. data/doc/AeEasy/Core/Plugin/Parser.html +254 -254
  27. data/doc/AeEasy/Core/Plugin/ParserBehavior.html +216 -216
  28. data/doc/AeEasy/Core/Plugin/Seeder.html +594 -594
  29. data/doc/AeEasy/Core/Plugin/SeederBehavior.html +141 -141
  30. data/doc/AeEasy/Core/SmartCollection.html +804 -804
  31. data/doc/_index.html +363 -342
  32. data/doc/class_list.html +51 -51
  33. data/doc/css/full_list.css +58 -58
  34. data/doc/css/style.css +496 -496
  35. data/doc/file.README.html +70 -70
  36. data/doc/file_list.html +56 -56
  37. data/doc/frames.html +17 -17
  38. data/doc/index.html +70 -70
  39. data/doc/js/app.js +303 -292
  40. data/doc/js/full_list.js +216 -216
  41. data/doc/js/jquery.js +3 -3
  42. data/doc/method_list.html +939 -851
  43. data/doc/top-level-namespace.html +109 -109
  44. data/lib/ae_easy/core/mock.rb +1 -0
  45. data/lib/ae_easy/core/mock/fake_db.rb +67 -1
  46. data/lib/ae_easy/core/mock/fake_executor.rb +61 -0
  47. data/lib/ae_easy/core/mock/fake_finisher.rb +28 -0
  48. data/lib/ae_easy/core/mock/fake_parser.rb +3 -1
  49. data/lib/ae_easy/core/plugin.rb +2 -0
  50. data/lib/ae_easy/core/plugin/finisher.rb +19 -0
  51. data/lib/ae_easy/core/plugin/finisher_behavior.rb +9 -0
  52. data/lib/ae_easy/core/version.rb +1 -1
  53. metadata +10 -4
@@ -1,110 +1,110 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>
7
- Top Level Namespace
8
-
9
- &mdash; Documentation by YARD 0.9.18
10
-
11
- </title>
12
-
13
- <link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
14
-
15
- <link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
16
-
17
- <script type="text/javascript" charset="utf-8">
18
- pathId = "";
19
- relpath = '';
20
- </script>
21
-
22
-
23
- <script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
24
-
25
- <script type="text/javascript" charset="utf-8" src="js/app.js"></script>
26
-
27
-
28
- </head>
29
- <body>
30
- <div class="nav_wrap">
31
- <iframe id="nav" src="class_list.html?1"></iframe>
32
- <div id="resizer"></div>
33
- </div>
34
-
35
- <div id="main" tabindex="-1">
36
- <div id="header">
37
- <div id="menu">
38
-
39
- <a href="_index.html">Index</a> &raquo;
40
-
41
-
42
- <span class="title">Top Level Namespace</span>
43
-
44
- </div>
45
-
46
- <div id="search">
47
-
48
- <a class="full_list_link" id="class_list_link"
49
- href="class_list.html">
50
-
51
- <svg width="24" height="24">
52
- <rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
53
- <rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
54
- <rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
55
- </svg>
56
- </a>
57
-
58
- </div>
59
- <div class="clear"></div>
60
- </div>
61
-
62
- <div id="content"><h1>Top Level Namespace
63
-
64
-
65
-
66
- </h1>
67
- <div class="box_info">
68
-
69
-
70
-
71
-
72
-
73
-
74
-
75
-
76
-
77
-
78
-
79
- </div>
80
-
81
- <h2>Defined Under Namespace</h2>
82
- <p class="children">
83
-
84
-
85
- <strong class="modules">Modules:</strong> <span class='object_link'><a href="AeEasy.html" title="AeEasy (module)">AeEasy</a></span>
86
-
87
-
88
-
89
-
90
- </p>
91
-
92
-
93
-
94
-
95
-
96
-
97
-
98
-
99
-
100
- </div>
101
-
102
- <div id="footer">
103
- Generated on Mon Mar 11 20:05:16 2019 by
104
- <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
105
- 0.9.18 (ruby-2.5.3).
106
- </div>
107
-
108
- </div>
109
- </body>
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>
7
+ Top Level Namespace
8
+
9
+ &mdash; Documentation by YARD 0.9.20
10
+
11
+ </title>
12
+
13
+ <link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
14
+
15
+ <link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
16
+
17
+ <script type="text/javascript" charset="utf-8">
18
+ pathId = "";
19
+ relpath = '';
20
+ </script>
21
+
22
+
23
+ <script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
24
+
25
+ <script type="text/javascript" charset="utf-8" src="js/app.js"></script>
26
+
27
+
28
+ </head>
29
+ <body>
30
+ <div class="nav_wrap">
31
+ <iframe id="nav" src="class_list.html?1"></iframe>
32
+ <div id="resizer"></div>
33
+ </div>
34
+
35
+ <div id="main" tabindex="-1">
36
+ <div id="header">
37
+ <div id="menu">
38
+
39
+ <a href="_index.html">Index</a> &raquo;
40
+
41
+
42
+ <span class="title">Top Level Namespace</span>
43
+
44
+ </div>
45
+
46
+ <div id="search">
47
+
48
+ <a class="full_list_link" id="class_list_link"
49
+ href="class_list.html">
50
+
51
+ <svg width="24" height="24">
52
+ <rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
53
+ <rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
54
+ <rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
55
+ </svg>
56
+ </a>
57
+
58
+ </div>
59
+ <div class="clear"></div>
60
+ </div>
61
+
62
+ <div id="content"><h1>Top Level Namespace
63
+
64
+
65
+
66
+ </h1>
67
+ <div class="box_info">
68
+
69
+
70
+
71
+
72
+
73
+
74
+
75
+
76
+
77
+
78
+
79
+ </div>
80
+
81
+ <h2>Defined Under Namespace</h2>
82
+ <p class="children">
83
+
84
+
85
+ <strong class="modules">Modules:</strong> <span class='object_link'><a href="AeEasy.html" title="AeEasy (module)">AeEasy</a></span>
86
+
87
+
88
+
89
+
90
+ </p>
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+ </div>
101
+
102
+ <div id="footer">
103
+ Generated on Fri Sep 27 02:01:30 2019 by
104
+ <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
105
+ 0.9.20 (ruby-2.5.3).
106
+ </div>
107
+
108
+ </div>
109
+ </body>
110
110
  </html>
@@ -2,6 +2,7 @@ require 'ae_easy/core/mock/fake_db'
2
2
  require 'ae_easy/core/mock/fake_executor'
3
3
  require 'ae_easy/core/mock/fake_parser'
4
4
  require 'ae_easy/core/mock/fake_seeder'
5
+ require 'ae_easy/core/mock/fake_finisher'
5
6
 
6
7
  module AeEasy
7
8
  module Core
@@ -152,6 +152,16 @@ module AeEasy
152
152
  build_job job, opts
153
153
  end
154
154
 
155
+ # Return a timestamp
156
+ #
157
+ # @param [Time] time (nil) Time from which to get time stamp.
158
+ #
159
+ # @return [String]
160
+ def self.time_stamp time = nil
161
+ time = Time.new if time.nil?
162
+ time.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
163
+ end
164
+
155
165
  # Get current job or create new one from values.
156
166
  #
157
167
  # @param [Integer] target_job_id (nil) Job id to ensure existance.
@@ -345,6 +355,7 @@ module AeEasy
345
355
  def page_defaults
346
356
  @page_defaults ||= {
347
357
  'url' => nil,
358
+ 'status' => 'to_fetch',
348
359
  'job_id' => lambda{|page| job_id},
349
360
  'method' => 'GET',
350
361
  'headers' => {},
@@ -353,6 +364,8 @@ module AeEasy
353
364
  'no_redirect' => false,
354
365
  'body' => nil,
355
366
  'ua_type' => 'desktop',
367
+ 'no_url_encode' => false,
368
+ 'http2' => false,
356
369
  'vars' => {}
357
370
  }
358
371
  end
@@ -404,7 +417,7 @@ module AeEasy
404
417
  @output_defaults ||= {
405
418
  '_collection' => DEFAULT_COLLECTION,
406
419
  '_job_id' => lambda{|output| job_id},
407
- '_created_at' => lambda{|output| Time.new.strftime('%Y-%m-%dT%H:%M:%SZ')},
420
+ '_created_at' => lambda{|output| self.class.time_stamp},
408
421
  '_gid' => lambda{|output| page_gid}
409
422
  }
410
423
  end
@@ -489,6 +502,59 @@ module AeEasy
489
502
  end
490
503
  matches
491
504
  end
505
+
506
+ # Refetch a page.
507
+ #
508
+ # @param [Integer] job_id Page's job_id to refetch.
509
+ # @param [String] gid Page's gid to refetch.
510
+ def refetch job_id, gid
511
+ page = pages.find_match('gid' => gid, 'job_id' => job_id)
512
+ raise Exception.new("Page not found with job_id \"#{job_id}\" gid \"#{gid}\"") if page.nil?
513
+ page['status'] = 'to_fetch'
514
+ page['freshness'] = self.class.time_stamp
515
+ page['to_fetch'] = self.class.time_stamp
516
+ page['fetched_from'] = nil
517
+ page['fetching_at'] = '2001-01-01T00:00:00Z'
518
+ page['fetched_at'] = nil
519
+ page['fetching_try_count'] = 0
520
+ page['effective_url'] = nil
521
+ page['parsing_at'] = nil
522
+ page['parsing_failed_at'] = nil
523
+ page['parsed_at'] = nil
524
+ page['parsing_try_count'] = 0
525
+ page['parsing_fail_count'] = 0
526
+ page['parsing_updated_at'] = '2001-01-01T00:00:00Z'
527
+ page['response_checksum'] = nil
528
+ page['response_status'] = nil
529
+ page['response_status_code'] = nil
530
+ page['response_headers'] = nil
531
+ page['response_cookie'] = nil
532
+ page['response_proto'] = nil
533
+ page['content_type'] = nil
534
+ page['content_size'] = 0
535
+ page['failed_response_status_code'] = nil
536
+ page['failed_response_headers'] = nil
537
+ page['failed_response_cookie'] = nil
538
+ page['failed_effective_url'] = nil
539
+ page['failed_at'] = nil
540
+ page['failed_content_type'] = nil
541
+ end
542
+
543
+ # Reparse a page.
544
+ #
545
+ # @param [Integer] job_id Page's job_id to reparse.
546
+ # @param [String] gid Page's gid to reparse.
547
+ def reparse job_id, gid
548
+ page = pages.find_match('gid' => gid, 'job_id' => job_id)
549
+ raise Exception.new("Page not found with job_id \"#{job_id}\" gid \"#{gid}\"") if page.nil?
550
+ page['status'] = 'to_parse'
551
+ page['parsing_at'] = nil
552
+ page['parsing_failed_at'] = nil
553
+ page['parsing_updated_at'] = '2001-01-01T00:00:00Z'
554
+ page['parsed_at'] = nil
555
+ page['parsing_try_count'] = 0
556
+ page['parsing_fail_count'] = 0
557
+ end
492
558
  end
493
559
  end
494
560
  end
@@ -139,6 +139,29 @@ module AeEasy
139
139
  @page = value
140
140
  end
141
141
 
142
+ # Refetch self page flag.
143
+ # @return [Boollean]
144
+ # @note It is stronger than #reparse_self flag.
145
+ def refetch_self
146
+ @refetch_self ||= false
147
+ end
148
+
149
+ # Set refetch self page flag.
150
+ def refetch_self= value
151
+ @refetch_self = value
152
+ end
153
+
154
+ # Reparse self page flag.
155
+ # @return [Boollean]
156
+ def reparse_self
157
+ @reparse_self ||= false
158
+ end
159
+
160
+ # Set reparse self page flag.
161
+ def reparse_self= value
162
+ @reparse_self = value
163
+ end
164
+
142
165
  # Retrive a list of saved jobs.
143
166
  def saved_jobs
144
167
  db.jobs
@@ -179,6 +202,19 @@ module AeEasy
179
202
  list.clear
180
203
  end
181
204
 
205
+ # Execute any action applied to current page
206
+ def flush_self_actions
207
+ # Save page current page before refetch/reparse
208
+ if refetch_self || reparse_self
209
+ temp_page_gid_override = !db.allow_page_gid_override?
210
+ db.enable_page_gid_override if temp_page_gid_override
211
+ save_pages [page]
212
+ db.disable_page_gid_override if temp_page_gid_override
213
+ end
214
+ db.refetch(page['job_id'], page['gid']) if refetch_self
215
+ db.reparse(page['job_id'], page['gid']) if reparse_self
216
+ end
217
+
182
218
  # Save draft pages into db and clear draft queue.
183
219
  def flush_pages
184
220
  save_pages pages
@@ -195,6 +231,7 @@ module AeEasy
195
231
  def flush
196
232
  flush_pages
197
233
  flush_outputs
234
+ flush_self_actions
198
235
  end
199
236
 
200
237
  # Get latest job by scraper_name.
@@ -306,6 +343,30 @@ module AeEasy
306
343
  eval(File.read(file_path), isolated_binding(vars), file_path)
307
344
  flush
308
345
  end
346
+
347
+ # Refetch a page by gid.
348
+ #
349
+ # @param [String] gid Page's gid to refetch.
350
+ def refetch gid
351
+ raise ArgumentError.new("gid needs to be a String.") unless gid.is_a?(String)
352
+ if page['gid'] == gid
353
+ self.refetch_self = true
354
+ return
355
+ end
356
+ db.refetch(job_id, gid)
357
+ end
358
+
359
+ # Reparse a page by gid.
360
+ #
361
+ # @param [String] page_gid Page's gid to reparse.
362
+ def reparse page_gid
363
+ raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
364
+ if page['gid'] == page_gid
365
+ self.reparse_self = true
366
+ return
367
+ end
368
+ db.reparse(job_id, page_gid)
369
+ end
309
370
  end
310
371
  end
311
372
  end
@@ -0,0 +1,28 @@
1
+ module AeEasy
2
+ module Core
3
+ module Mock
4
+ # Fake finisher that emulates `AnswersEngine` finisher executor.
5
+ class FakeFinisher
6
+ include AeEasy::Core::Mock::FakeExecutor
7
+
8
+ # Fake finisher exposed methods to isolated context.
9
+ # @private
10
+ #
11
+ # @return [Array]
12
+ def self.exposed_methods
13
+ real_methods = AnswersEngine::Scraper::RubyFinisherExecutor.exposed_methods.uniq
14
+ mock_methods = [
15
+ :outputs,
16
+ :save_outputs,
17
+ :find_output,
18
+ :find_outputs
19
+ ]
20
+ AeEasy::Core::Mock::FakeExecutor.check_compatibility real_methods, mock_methods
21
+ mock_methods << :job_id
22
+ mock_methods.freeze
23
+ mock_methods
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -20,7 +20,9 @@ module AeEasy
20
20
  :save_pages,
21
21
  :save_outputs,
22
22
  :find_output,
23
- :find_outputs
23
+ :find_outputs,
24
+ :refetch,
25
+ :reparse
24
26
  ].freeze
25
27
  AeEasy::Core::Mock::FakeExecutor.check_compatibility real_methods, mock_methods
26
28
  mock_methods