ae_easy-core 0.1.9 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ae_easy-core.gemspec +1 -1
  4. data/doc/AeEasy.html +116 -116
  5. data/doc/AeEasy/Core.html +1217 -1217
  6. data/doc/AeEasy/Core/Config.html +284 -284
  7. data/doc/AeEasy/Core/Exception.html +116 -116
  8. data/doc/AeEasy/Core/Exception/OutdatedError.html +132 -132
  9. data/doc/AeEasy/Core/Helper.html +116 -116
  10. data/doc/AeEasy/Core/Helper/Cookie.html +797 -797
  11. data/doc/AeEasy/Core/Mock.html +231 -231
  12. data/doc/AeEasy/Core/Mock/FakeDb.html +3133 -2729
  13. data/doc/AeEasy/Core/Mock/FakeExecutor.html +2852 -2259
  14. data/doc/AeEasy/Core/Mock/FakeFinisher.html +160 -0
  15. data/doc/AeEasy/Core/Mock/FakeParser.html +157 -157
  16. data/doc/AeEasy/Core/Mock/FakeSeeder.html +157 -157
  17. data/doc/AeEasy/Core/Plugin.html +116 -116
  18. data/doc/AeEasy/Core/Plugin/CollectionVault.html +262 -262
  19. data/doc/AeEasy/Core/Plugin/ConfigBehavior.html +471 -471
  20. data/doc/AeEasy/Core/Plugin/ContextIntegrator.html +365 -365
  21. data/doc/AeEasy/Core/Plugin/Executor.html +243 -243
  22. data/doc/AeEasy/Core/Plugin/ExecutorBehavior.html +299 -299
  23. data/doc/AeEasy/Core/Plugin/Finisher.html +265 -0
  24. data/doc/AeEasy/Core/Plugin/FinisherBehavior.html +142 -0
  25. data/doc/AeEasy/Core/Plugin/InitializeHook.html +198 -198
  26. data/doc/AeEasy/Core/Plugin/Parser.html +254 -254
  27. data/doc/AeEasy/Core/Plugin/ParserBehavior.html +216 -216
  28. data/doc/AeEasy/Core/Plugin/Seeder.html +594 -594
  29. data/doc/AeEasy/Core/Plugin/SeederBehavior.html +141 -141
  30. data/doc/AeEasy/Core/SmartCollection.html +804 -804
  31. data/doc/_index.html +363 -342
  32. data/doc/class_list.html +51 -51
  33. data/doc/css/full_list.css +58 -58
  34. data/doc/css/style.css +496 -496
  35. data/doc/file.README.html +70 -70
  36. data/doc/file_list.html +56 -56
  37. data/doc/frames.html +17 -17
  38. data/doc/index.html +70 -70
  39. data/doc/js/app.js +303 -292
  40. data/doc/js/full_list.js +216 -216
  41. data/doc/js/jquery.js +3 -3
  42. data/doc/method_list.html +939 -851
  43. data/doc/top-level-namespace.html +109 -109
  44. data/lib/ae_easy/core/mock.rb +1 -0
  45. data/lib/ae_easy/core/mock/fake_db.rb +67 -1
  46. data/lib/ae_easy/core/mock/fake_executor.rb +61 -0
  47. data/lib/ae_easy/core/mock/fake_finisher.rb +28 -0
  48. data/lib/ae_easy/core/mock/fake_parser.rb +3 -1
  49. data/lib/ae_easy/core/plugin.rb +2 -0
  50. data/lib/ae_easy/core/plugin/finisher.rb +19 -0
  51. data/lib/ae_easy/core/plugin/finisher_behavior.rb +9 -0
  52. data/lib/ae_easy/core/version.rb +1 -1
  53. metadata +10 -4
@@ -1,110 +1,110 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>
7
- Top Level Namespace
8
-
9
- &mdash; Documentation by YARD 0.9.18
10
-
11
- </title>
12
-
13
- <link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
14
-
15
- <link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
16
-
17
- <script type="text/javascript" charset="utf-8">
18
- pathId = "";
19
- relpath = '';
20
- </script>
21
-
22
-
23
- <script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
24
-
25
- <script type="text/javascript" charset="utf-8" src="js/app.js"></script>
26
-
27
-
28
- </head>
29
- <body>
30
- <div class="nav_wrap">
31
- <iframe id="nav" src="class_list.html?1"></iframe>
32
- <div id="resizer"></div>
33
- </div>
34
-
35
- <div id="main" tabindex="-1">
36
- <div id="header">
37
- <div id="menu">
38
-
39
- <a href="_index.html">Index</a> &raquo;
40
-
41
-
42
- <span class="title">Top Level Namespace</span>
43
-
44
- </div>
45
-
46
- <div id="search">
47
-
48
- <a class="full_list_link" id="class_list_link"
49
- href="class_list.html">
50
-
51
- <svg width="24" height="24">
52
- <rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
53
- <rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
54
- <rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
55
- </svg>
56
- </a>
57
-
58
- </div>
59
- <div class="clear"></div>
60
- </div>
61
-
62
- <div id="content"><h1>Top Level Namespace
63
-
64
-
65
-
66
- </h1>
67
- <div class="box_info">
68
-
69
-
70
-
71
-
72
-
73
-
74
-
75
-
76
-
77
-
78
-
79
- </div>
80
-
81
- <h2>Defined Under Namespace</h2>
82
- <p class="children">
83
-
84
-
85
- <strong class="modules">Modules:</strong> <span class='object_link'><a href="AeEasy.html" title="AeEasy (module)">AeEasy</a></span>
86
-
87
-
88
-
89
-
90
- </p>
91
-
92
-
93
-
94
-
95
-
96
-
97
-
98
-
99
-
100
- </div>
101
-
102
- <div id="footer">
103
- Generated on Mon Mar 11 20:05:16 2019 by
104
- <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
105
- 0.9.18 (ruby-2.5.3).
106
- </div>
107
-
108
- </div>
109
- </body>
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>
7
+ Top Level Namespace
8
+
9
+ &mdash; Documentation by YARD 0.9.20
10
+
11
+ </title>
12
+
13
+ <link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
14
+
15
+ <link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
16
+
17
+ <script type="text/javascript" charset="utf-8">
18
+ pathId = "";
19
+ relpath = '';
20
+ </script>
21
+
22
+
23
+ <script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
24
+
25
+ <script type="text/javascript" charset="utf-8" src="js/app.js"></script>
26
+
27
+
28
+ </head>
29
+ <body>
30
+ <div class="nav_wrap">
31
+ <iframe id="nav" src="class_list.html?1"></iframe>
32
+ <div id="resizer"></div>
33
+ </div>
34
+
35
+ <div id="main" tabindex="-1">
36
+ <div id="header">
37
+ <div id="menu">
38
+
39
+ <a href="_index.html">Index</a> &raquo;
40
+
41
+
42
+ <span class="title">Top Level Namespace</span>
43
+
44
+ </div>
45
+
46
+ <div id="search">
47
+
48
+ <a class="full_list_link" id="class_list_link"
49
+ href="class_list.html">
50
+
51
+ <svg width="24" height="24">
52
+ <rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
53
+ <rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
54
+ <rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
55
+ </svg>
56
+ </a>
57
+
58
+ </div>
59
+ <div class="clear"></div>
60
+ </div>
61
+
62
+ <div id="content"><h1>Top Level Namespace
63
+
64
+
65
+
66
+ </h1>
67
+ <div class="box_info">
68
+
69
+
70
+
71
+
72
+
73
+
74
+
75
+
76
+
77
+
78
+
79
+ </div>
80
+
81
+ <h2>Defined Under Namespace</h2>
82
+ <p class="children">
83
+
84
+
85
+ <strong class="modules">Modules:</strong> <span class='object_link'><a href="AeEasy.html" title="AeEasy (module)">AeEasy</a></span>
86
+
87
+
88
+
89
+
90
+ </p>
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+ </div>
101
+
102
+ <div id="footer">
103
+ Generated on Fri Sep 27 02:01:30 2019 by
104
+ <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
105
+ 0.9.20 (ruby-2.5.3).
106
+ </div>
107
+
108
+ </div>
109
+ </body>
110
110
  </html>
@@ -2,6 +2,7 @@ require 'ae_easy/core/mock/fake_db'
2
2
  require 'ae_easy/core/mock/fake_executor'
3
3
  require 'ae_easy/core/mock/fake_parser'
4
4
  require 'ae_easy/core/mock/fake_seeder'
5
+ require 'ae_easy/core/mock/fake_finisher'
5
6
 
6
7
  module AeEasy
7
8
  module Core
@@ -152,6 +152,16 @@ module AeEasy
152
152
  build_job job, opts
153
153
  end
154
154
 
155
+ # Return a timestamp
156
+ #
157
+ # @param [Time] time (nil) Time from which to get time stamp.
158
+ #
159
+ # @return [String]
160
+ def self.time_stamp time = nil
161
+ time = Time.new if time.nil?
162
+ time.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
163
+ end
164
+
155
165
  # Get current job or create new one from values.
156
166
  #
157
167
  # @param [Integer] target_job_id (nil) Job id to ensure existance.
@@ -345,6 +355,7 @@ module AeEasy
345
355
  def page_defaults
346
356
  @page_defaults ||= {
347
357
  'url' => nil,
358
+ 'status' => 'to_fetch',
348
359
  'job_id' => lambda{|page| job_id},
349
360
  'method' => 'GET',
350
361
  'headers' => {},
@@ -353,6 +364,8 @@ module AeEasy
353
364
  'no_redirect' => false,
354
365
  'body' => nil,
355
366
  'ua_type' => 'desktop',
367
+ 'no_url_encode' => false,
368
+ 'http2' => false,
356
369
  'vars' => {}
357
370
  }
358
371
  end
@@ -404,7 +417,7 @@ module AeEasy
404
417
  @output_defaults ||= {
405
418
  '_collection' => DEFAULT_COLLECTION,
406
419
  '_job_id' => lambda{|output| job_id},
407
- '_created_at' => lambda{|output| Time.new.strftime('%Y-%m-%dT%H:%M:%SZ')},
420
+ '_created_at' => lambda{|output| self.class.time_stamp},
408
421
  '_gid' => lambda{|output| page_gid}
409
422
  }
410
423
  end
@@ -489,6 +502,59 @@ module AeEasy
489
502
  end
490
503
  matches
491
504
  end
505
+
506
+ # Refetch a page.
507
+ #
508
+ # @param [Integer] job_id Page's job_id to refetch.
509
+ # @param [String] gid Page's gid to refetch.
510
+ def refetch job_id, gid
511
+ page = pages.find_match('gid' => gid, 'job_id' => job_id)
512
+ raise Exception.new("Page not found with job_id \"#{job_id}\" gid \"#{gid}\"") if page.nil?
513
+ page['status'] = 'to_fetch'
514
+ page['freshness'] = self.class.time_stamp
515
+ page['to_fetch'] = self.class.time_stamp
516
+ page['fetched_from'] = nil
517
+ page['fetching_at'] = '2001-01-01T00:00:00Z'
518
+ page['fetched_at'] = nil
519
+ page['fetching_try_count'] = 0
520
+ page['effective_url'] = nil
521
+ page['parsing_at'] = nil
522
+ page['parsing_failed_at'] = nil
523
+ page['parsed_at'] = nil
524
+ page['parsing_try_count'] = 0
525
+ page['parsing_fail_count'] = 0
526
+ page['parsing_updated_at'] = '2001-01-01T00:00:00Z'
527
+ page['response_checksum'] = nil
528
+ page['response_status'] = nil
529
+ page['response_status_code'] = nil
530
+ page['response_headers'] = nil
531
+ page['response_cookie'] = nil
532
+ page['response_proto'] = nil
533
+ page['content_type'] = nil
534
+ page['content_size'] = 0
535
+ page['failed_response_status_code'] = nil
536
+ page['failed_response_headers'] = nil
537
+ page['failed_response_cookie'] = nil
538
+ page['failed_effective_url'] = nil
539
+ page['failed_at'] = nil
540
+ page['failed_content_type'] = nil
541
+ end
542
+
543
+ # Reparse a page.
544
+ #
545
+ # @param [Integer] job_id Page's job_id to reparse.
546
+ # @param [String] gid Page's gid to reparse.
547
+ def reparse job_id, gid
548
+ page = pages.find_match('gid' => gid, 'job_id' => job_id)
549
+ raise Exception.new("Page not found with job_id \"#{job_id}\" gid \"#{gid}\"") if page.nil?
550
+ page['status'] = 'to_parse'
551
+ page['parsing_at'] = nil
552
+ page['parsing_failed_at'] = nil
553
+ page['parsing_updated_at'] = '2001-01-01T00:00:00Z'
554
+ page['parsed_at'] = nil
555
+ page['parsing_try_count'] = 0
556
+ page['parsing_fail_count'] = 0
557
+ end
492
558
  end
493
559
  end
494
560
  end
@@ -139,6 +139,29 @@ module AeEasy
139
139
  @page = value
140
140
  end
141
141
 
142
+ # Refetch self page flag.
143
+ # @return [Boollean]
144
+ # @note It is stronger than #reparse_self flag.
145
+ def refetch_self
146
+ @refetch_self ||= false
147
+ end
148
+
149
+ # Set refetch self page flag.
150
+ def refetch_self= value
151
+ @refetch_self = value
152
+ end
153
+
154
+ # Reparse self page flag.
155
+ # @return [Boollean]
156
+ def reparse_self
157
+ @reparse_self ||= false
158
+ end
159
+
160
+ # Set reparse self page flag.
161
+ def reparse_self= value
162
+ @reparse_self = value
163
+ end
164
+
142
165
  # Retrive a list of saved jobs.
143
166
  def saved_jobs
144
167
  db.jobs
@@ -179,6 +202,19 @@ module AeEasy
179
202
  list.clear
180
203
  end
181
204
 
205
+ # Execute any action applied to current page
206
+ def flush_self_actions
207
+ # Save page current page before refetch/reparse
208
+ if refetch_self || reparse_self
209
+ temp_page_gid_override = !db.allow_page_gid_override?
210
+ db.enable_page_gid_override if temp_page_gid_override
211
+ save_pages [page]
212
+ db.disable_page_gid_override if temp_page_gid_override
213
+ end
214
+ db.refetch(page['job_id'], page['gid']) if refetch_self
215
+ db.reparse(page['job_id'], page['gid']) if reparse_self
216
+ end
217
+
182
218
  # Save draft pages into db and clear draft queue.
183
219
  def flush_pages
184
220
  save_pages pages
@@ -195,6 +231,7 @@ module AeEasy
195
231
  def flush
196
232
  flush_pages
197
233
  flush_outputs
234
+ flush_self_actions
198
235
  end
199
236
 
200
237
  # Get latest job by scraper_name.
@@ -306,6 +343,30 @@ module AeEasy
306
343
  eval(File.read(file_path), isolated_binding(vars), file_path)
307
344
  flush
308
345
  end
346
+
347
+ # Refetch a page by gid.
348
+ #
349
+ # @param [String] gid Page's gid to refetch.
350
+ def refetch gid
351
+ raise ArgumentError.new("gid needs to be a String.") unless gid.is_a?(String)
352
+ if page['gid'] == gid
353
+ self.refetch_self = true
354
+ return
355
+ end
356
+ db.refetch(job_id, gid)
357
+ end
358
+
359
+ # Reparse a page by gid.
360
+ #
361
+ # @param [String] page_gid Page's gid to reparse.
362
+ def reparse page_gid
363
+ raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
364
+ if page['gid'] == page_gid
365
+ self.reparse_self = true
366
+ return
367
+ end
368
+ db.reparse(job_id, page_gid)
369
+ end
309
370
  end
310
371
  end
311
372
  end
@@ -0,0 +1,28 @@
1
+ module AeEasy
2
+ module Core
3
+ module Mock
4
+ # Fake finisher that emulates `AnswersEngine` finisher executor.
5
+ class FakeFinisher
6
+ include AeEasy::Core::Mock::FakeExecutor
7
+
8
+ # Fake finisher exposed methods to isolated context.
9
+ # @private
10
+ #
11
+ # @return [Array]
12
+ def self.exposed_methods
13
+ real_methods = AnswersEngine::Scraper::RubyFinisherExecutor.exposed_methods.uniq
14
+ mock_methods = [
15
+ :outputs,
16
+ :save_outputs,
17
+ :find_output,
18
+ :find_outputs
19
+ ]
20
+ AeEasy::Core::Mock::FakeExecutor.check_compatibility real_methods, mock_methods
21
+ mock_methods << :job_id
22
+ mock_methods.freeze
23
+ mock_methods
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -20,7 +20,9 @@ module AeEasy
20
20
  :save_pages,
21
21
  :save_outputs,
22
22
  :find_output,
23
- :find_outputs
23
+ :find_outputs,
24
+ :refetch,
25
+ :reparse
24
26
  ].freeze
25
27
  AeEasy::Core::Mock::FakeExecutor.check_compatibility real_methods, mock_methods
26
28
  mock_methods