klepto 0.4.9 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/klepto/bot.rb +24 -32
- data/lib/klepto/browser.rb +19 -0
- data/lib/klepto/version.rb +1 -1
- metadata +12 -12
data/lib/klepto/bot.rb
CHANGED
@@ -53,41 +53,33 @@ EOS
|
|
53
53
|
|
54
54
|
begin
|
55
55
|
browser.fetch! url
|
56
|
+
|
57
|
+
# Fire callbacks on GET
|
58
|
+
config.after_handlers[:get].each do |ah|
|
59
|
+
ah.call(browser.page, browser, url)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Dispatch all the handlers for HTTP Status Codes.
|
63
|
+
browser.statuses.each do |status|
|
64
|
+
config.dispatch_status_handlers(status, browser.page)
|
65
|
+
end
|
66
|
+
|
67
|
+
# If the page was not a failure or if not aborting, structure that bad boy.
|
68
|
+
if (browser.failure? && config.abort_on_failure?) || (config.abort_on_redirect? && browser.was_redirected?)
|
69
|
+
config.after_handlers[:abort].each do |ah|
|
70
|
+
ah.call(browser.page,{
|
71
|
+
browser_failure: browser.failure?,
|
72
|
+
abort_on_failure: config.abort_on_failure?,
|
73
|
+
abort_on_redirect: config.abort_on_redirect?,
|
74
|
+
redirect: browser.was_redirected?
|
75
|
+
})
|
76
|
+
end
|
77
|
+
else
|
78
|
+
resources << __structure(browser.page)
|
79
|
+
end
|
56
80
|
rescue Capybara::Poltergeist::TimeoutError => ex
|
57
81
|
config.dispatch_timeout_handler(ex, url)
|
58
82
|
end
|
59
|
-
|
60
|
-
# Fire callbacks on GET
|
61
|
-
config.after_handlers[:get].each do |ah|
|
62
|
-
ah.call(browser.page, browser, url)
|
63
|
-
end
|
64
|
-
|
65
|
-
# Capybara automatically follows redirects... Checking the page here
|
66
|
-
# to see if it has changed, and if so add it on to the stack of statuses.
|
67
|
-
# statuses is an array because it holds the actually HTTP response code and an
|
68
|
-
# approximate code (2xx for example). :redirect will be pushed onto the stack if a
|
69
|
-
# redirect happened.
|
70
|
-
statuses = [browser.status, browser.statusx]
|
71
|
-
statuses.push :redirect if url != browser.page.current_url
|
72
|
-
|
73
|
-
# Dispatch all the handlers for HTTP Status Codes.
|
74
|
-
statuses.each do |status|
|
75
|
-
config.dispatch_status_handlers(status, browser.page)
|
76
|
-
end
|
77
|
-
|
78
|
-
# If the page was not a failure or if not aborting, structure that bad boy.
|
79
|
-
if (browser.failure? && config.abort_on_failure?) || (config.abort_on_redirect? && statuses.include?(:redirect))
|
80
|
-
config.after_handlers[:abort].each do |ah|
|
81
|
-
ah.call(browser.page,{
|
82
|
-
browser_failure: browser.failure?,
|
83
|
-
abort_on_failure: config.abort_on_failure?,
|
84
|
-
abort_on_redirect: config.abort_on_redirect?,
|
85
|
-
redirect: statuses.include?(:redirect)
|
86
|
-
})
|
87
|
-
end
|
88
|
-
else
|
89
|
-
resources << __structure(browser.page)
|
90
|
-
end
|
91
83
|
end
|
92
84
|
|
93
85
|
@resources
|
data/lib/klepto/browser.rb
CHANGED
@@ -2,6 +2,7 @@ module Klepto
|
|
2
2
|
class Browser
|
3
3
|
include Capybara::DSL
|
4
4
|
|
5
|
+
attr_reader :last_url
|
5
6
|
def initialize(*args)
|
6
7
|
Klepto.logger.debug("===== Initializing new browser. =====")
|
7
8
|
super
|
@@ -31,11 +32,29 @@ module Klepto
|
|
31
32
|
!success?
|
32
33
|
end
|
33
34
|
|
35
|
+
def was_redirected?
|
36
|
+
@last_url != page.current_url
|
37
|
+
end
|
38
|
+
|
39
|
+
# Capybara automatically follows redirects... Checking the page here
|
40
|
+
# to see if it has changed, and if so add it on to the stack of statuses.
|
41
|
+
# statuses is an array because it holds the actually HTTP response code and an
|
42
|
+
# approximate code (2xx for example). :redirect will be pushed onto the stack if a
|
43
|
+
# redirect happened.
|
44
|
+
def statuses
|
45
|
+
if !was_redirected?
|
46
|
+
[status, statusx]
|
47
|
+
else
|
48
|
+
[status, statusx, :redirect]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
34
52
|
def statusx
|
35
53
|
page.status_code.to_s[0..-3] + "xx"
|
36
54
|
end
|
37
55
|
|
38
56
|
def fetch!(url)
|
57
|
+
@last_url = url
|
39
58
|
Klepto.logger.debug("Fetching #{url}")
|
40
59
|
#Capybara.using_driver use_driver do
|
41
60
|
visit url
|
data/lib/klepto/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: klepto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-05-
|
12
|
+
date: 2013-05-31 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: poltergeist
|
16
|
-
requirement: &
|
16
|
+
requirement: &70221809709220 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - =
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.1.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70221809709220
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: capybara
|
27
|
-
requirement: &
|
27
|
+
requirement: &70221809708720 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - =
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 2.0.2
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70221809708720
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: nokogiri
|
38
|
-
requirement: &
|
38
|
+
requirement: &70221809708260 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.5.6
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70221809708260
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: activesupport
|
49
|
-
requirement: &
|
49
|
+
requirement: &70221809707880 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70221809707880
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: multi_json
|
60
|
-
requirement: &
|
60
|
+
requirement: &70221797327360 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,7 +65,7 @@ dependencies:
|
|
65
65
|
version: '1.0'
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70221797327360
|
69
69
|
description: Tearing up web pages into ActiveRecord resources
|
70
70
|
email:
|
71
71
|
- github@coryodaniel.com
|