klepto 0.2.9 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/klepto/bot.rb +6 -5
- data/lib/klepto/config.rb +9 -0
- data/lib/klepto/version.rb +1 -1
- data/spec/lib/klepto/bot_spec.rb +45 -19
- metadata +11 -11
data/lib/klepto/bot.rb
CHANGED
@@ -46,18 +46,19 @@ EOS
|
|
46
46
|
# redirect happened.
|
47
47
|
statuses = [browser.status, browser.statusx]
|
48
48
|
statuses.push :redirect if url != browser.page.current_url
|
49
|
+
|
49
50
|
# Dispatch all the handlers for HTTP Status Codes.
|
50
51
|
statuses.each do |status|
|
51
52
|
config.dispatch_status_handlers(status, browser.page)
|
52
53
|
end
|
53
|
-
|
54
|
+
|
54
55
|
# If the page was not a failure or if not aborting, structure that bad boy.
|
55
|
-
if browser.
|
56
|
-
resources << __structure(browser.page)
|
57
|
-
else
|
56
|
+
if (browser.failure? && config.abort_on_failure?) || (config.abort_on_redirect? && statuses.include?(:redirect))
|
58
57
|
config.after_handlers[:abort].each do |ah|
|
59
58
|
ah.call(browser.page)
|
60
|
-
end
|
59
|
+
end
|
60
|
+
else
|
61
|
+
resources << __structure(browser.page)
|
61
62
|
end
|
62
63
|
end
|
63
64
|
|
data/lib/klepto/config.rb
CHANGED
@@ -5,6 +5,7 @@ module Klepto
|
|
5
5
|
def initialize
|
6
6
|
@headers = {}
|
7
7
|
@abort_on_failure = true
|
8
|
+
@abort_on_redirect = false
|
8
9
|
@urls = []
|
9
10
|
@after_handlers = {
|
10
11
|
:each => [],
|
@@ -32,11 +33,19 @@ module Klepto
|
|
32
33
|
!!@abort_on_failure
|
33
34
|
end
|
34
35
|
|
36
|
+
def abort_on_redirect?
|
37
|
+
!!@abort_on_redirect
|
38
|
+
end
|
39
|
+
|
35
40
|
# 4xx, 5xx
|
36
41
|
def abort_on_failure(aof)
|
37
42
|
@abort_on_failure = aof
|
38
43
|
end
|
39
44
|
|
45
|
+
def abort_on_redirect(aor)
|
46
|
+
@abort_on_redirect = aor
|
47
|
+
end
|
48
|
+
|
40
49
|
def on_http_status(*statuses,&block)
|
41
50
|
statuses.each do |status|
|
42
51
|
@status_handlers[status] ||= []
|
data/lib/klepto/version.rb
CHANGED
data/spec/lib/klepto/bot_spec.rb
CHANGED
@@ -3,28 +3,54 @@ require 'spec_helper'
|
|
3
3
|
describe Klepto::Bot do
|
4
4
|
describe 'Klepto::Bot.new' do
|
5
5
|
describe 'create a bot with a redirect' do
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
6
|
+
describe 'that aborts on redirect' do
|
7
|
+
before(:each) do
|
8
|
+
@bot = Klepto::Bot.new("https://www.twitter.com/justinbieber"){
|
9
|
+
name 'h1.fullname'
|
10
|
+
config.abort_on_redirect true
|
11
|
+
|
12
|
+
config.after(:abort){
|
13
|
+
StatusLog.create message: 'Abort!'
|
14
|
+
}
|
15
|
+
}
|
16
|
+
@structure = @bot.resources
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'should structure not have structured the data' do
|
20
|
+
@structure.should be_empty
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'should have dispatched abort handlers' do
|
24
|
+
statuses = StatusLog.all.map(&:message)
|
25
|
+
statuses.should include 'Abort!'
|
26
|
+
end
|
17
27
|
end
|
18
28
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
29
|
+
describe 'that follows a redirect' do
|
30
|
+
before(:each) do
|
31
|
+
@bot = Klepto::Bot.new("https://www.twitter.com/justinbieber"){
|
32
|
+
name 'h1.fullname'
|
33
|
+
config.on_http_status(:redirect){
|
34
|
+
StatusLog.create message: 'redirect'
|
35
|
+
}
|
36
|
+
config.on_http_status(200){
|
37
|
+
StatusLog.create message: '200'
|
38
|
+
}
|
39
|
+
}
|
40
|
+
@structure = @bot.resources
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'should structure the data' do
|
44
|
+
@structure.first[:name].should match(/Justin/i)
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'should have dispatched status handlers' do
|
48
|
+
statuses = StatusLog.all.map(&:message)
|
49
|
+
statuses.should include 'redirect'
|
50
|
+
statuses.should include '200'
|
51
|
+
end
|
27
52
|
end
|
53
|
+
|
28
54
|
end
|
29
55
|
|
30
56
|
describe 'aborting after a failure' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: klepto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2013-04-30 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: poltergeist
|
16
|
-
requirement: &
|
16
|
+
requirement: &70212079013140 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - =
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.1.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70212079013140
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: capybara
|
27
|
-
requirement: &
|
27
|
+
requirement: &70212079012420 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - =
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 2.0.2
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70212079012420
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: nokogiri
|
38
|
-
requirement: &
|
38
|
+
requirement: &70212079011740 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.5.6
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70212079011740
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: activesupport
|
49
|
-
requirement: &
|
49
|
+
requirement: &70212079011320 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70212079011320
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: multi_json
|
60
|
-
requirement: &
|
60
|
+
requirement: &70212079010680 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,7 +65,7 @@ dependencies:
|
|
65
65
|
version: '1.0'
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70212079010680
|
69
69
|
description: Tearing up web pages into ActiveRecord resources
|
70
70
|
email:
|
71
71
|
- github@coryodaniel.com
|