klepto 0.4.9 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/klepto/bot.rb CHANGED
@@ -53,41 +53,33 @@ EOS
53
53
 
54
54
  begin
55
55
  browser.fetch! url
56
+
57
+ # Fire callbacks on GET
58
+ config.after_handlers[:get].each do |ah|
59
+ ah.call(browser.page, browser, url)
60
+ end
61
+
62
+ # Dispatch all the handlers for HTTP Status Codes.
63
+ browser.statuses.each do |status|
64
+ config.dispatch_status_handlers(status, browser.page)
65
+ end
66
+
67
+ # If the page was not a failure or if not aborting, structure that bad boy.
68
+ if (browser.failure? && config.abort_on_failure?) || (config.abort_on_redirect? && browser.was_redirected?)
69
+ config.after_handlers[:abort].each do |ah|
70
+ ah.call(browser.page,{
71
+ browser_failure: browser.failure?,
72
+ abort_on_failure: config.abort_on_failure?,
73
+ abort_on_redirect: config.abort_on_redirect?,
74
+ redirect: browser.was_redirected?
75
+ })
76
+ end
77
+ else
78
+ resources << __structure(browser.page)
79
+ end
56
80
  rescue Capybara::Poltergeist::TimeoutError => ex
57
81
  config.dispatch_timeout_handler(ex, url)
58
82
  end
59
-
60
- # Fire callbacks on GET
61
- config.after_handlers[:get].each do |ah|
62
- ah.call(browser.page, browser, url)
63
- end
64
-
65
- # Capybara automatically follows redirects... Checking the page here
66
- # to see if it has changed, and if so add it on to the stack of statuses.
67
- # statuses is an array because it holds the actually HTTP response code and an
68
- # approximate code (2xx for example). :redirect will be pushed onto the stack if a
69
- # redirect happened.
70
- statuses = [browser.status, browser.statusx]
71
- statuses.push :redirect if url != browser.page.current_url
72
-
73
- # Dispatch all the handlers for HTTP Status Codes.
74
- statuses.each do |status|
75
- config.dispatch_status_handlers(status, browser.page)
76
- end
77
-
78
- # If the page was not a failure or if not aborting, structure that bad boy.
79
- if (browser.failure? && config.abort_on_failure?) || (config.abort_on_redirect? && statuses.include?(:redirect))
80
- config.after_handlers[:abort].each do |ah|
81
- ah.call(browser.page,{
82
- browser_failure: browser.failure?,
83
- abort_on_failure: config.abort_on_failure?,
84
- abort_on_redirect: config.abort_on_redirect?,
85
- redirect: statuses.include?(:redirect)
86
- })
87
- end
88
- else
89
- resources << __structure(browser.page)
90
- end
91
83
  end
92
84
 
93
85
  @resources
@@ -2,6 +2,7 @@ module Klepto
2
2
  class Browser
3
3
  include Capybara::DSL
4
4
 
5
+ attr_reader :last_url
5
6
  def initialize(*args)
6
7
  Klepto.logger.debug("===== Initializing new browser. =====")
7
8
  super
@@ -31,11 +32,29 @@ module Klepto
31
32
  !success?
32
33
  end
33
34
 
35
+ def was_redirected?
36
+ @last_url != page.current_url
37
+ end
38
+
39
+ # Capybara automatically follows redirects... Checking the page here
40
+ # to see if it has changed, and if so add it on to the stack of statuses.
41
+ # statuses is an array because it holds the actually HTTP response code and an
42
+ # approximate code (2xx for example). :redirect will be pushed onto the stack if a
43
+ # redirect happened.
44
+ def statuses
45
+ if !was_redirected?
46
+ [status, statusx]
47
+ else
48
+ [status, statusx, :redirect]
49
+ end
50
+ end
51
+
34
52
  def statusx
35
53
  page.status_code.to_s[0..-3] + "xx"
36
54
  end
37
55
 
38
56
  def fetch!(url)
57
+ @last_url = url
39
58
  Klepto.logger.debug("Fetching #{url}")
40
59
  #Capybara.using_driver use_driver do
41
60
  visit url
@@ -1,3 +1,3 @@
1
1
  module Klepto
2
- VERSION = "0.4.9"
2
+ VERSION = "0.5.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: klepto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.9
4
+ version: 0.5.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-30 00:00:00.000000000 Z
12
+ date: 2013-05-31 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: poltergeist
16
- requirement: &70325624163420 !ruby/object:Gem::Requirement
16
+ requirement: &70221809709220 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - =
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.1.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70325624163420
24
+ version_requirements: *70221809709220
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: capybara
27
- requirement: &70325624162920 !ruby/object:Gem::Requirement
27
+ requirement: &70221809708720 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - =
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 2.0.2
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70325624162920
35
+ version_requirements: *70221809708720
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: nokogiri
38
- requirement: &70325624162460 !ruby/object:Gem::Requirement
38
+ requirement: &70221809708260 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 1.5.6
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70325624162460
46
+ version_requirements: *70221809708260
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: activesupport
49
- requirement: &70325624162080 !ruby/object:Gem::Requirement
49
+ requirement: &70221809707880 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70325624162080
57
+ version_requirements: *70221809707880
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: multi_json
60
- requirement: &70325624161540 !ruby/object:Gem::Requirement
60
+ requirement: &70221797327360 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,7 +65,7 @@ dependencies:
65
65
  version: '1.0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70325624161540
68
+ version_requirements: *70221797327360
69
69
  description: Tearing up web pages into ActiveRecord resources
70
70
  email:
71
71
  - github@coryodaniel.com