klepto 0.5.1 → 0.5.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/klepto/bot.rb +6 -2
- data/lib/klepto/config.rb +7 -0
- data/lib/klepto/version.rb +2 -2
- data/spec/lib/klepto/bot_spec.rb +6 -0
- metadata +11 -11
data/lib/klepto/bot.rb
CHANGED
@@ -17,6 +17,7 @@ module Klepto
|
|
17
17
|
@config = Klepto::Config.new
|
18
18
|
@config.urls urls
|
19
19
|
@queue = []
|
20
|
+
@pages = {}
|
20
21
|
|
21
22
|
# Evaluate the block as DSL, proxy off anything that isn't on #config
|
22
23
|
# to a queue, then apply that queue to the top-level Klepto::Structure
|
@@ -26,6 +27,7 @@ module Klepto
|
|
26
27
|
# and restore method_missing (for sanity sake)
|
27
28
|
instance_eval <<-EOS
|
28
29
|
def queue; @queue; end;
|
30
|
+
def pages; @pages; end;
|
29
31
|
def parse!(*_urls); __process!(*_urls); end;
|
30
32
|
def resources; @resources; end;
|
31
33
|
def method_missing(meth, *args, &block)
|
@@ -53,7 +55,9 @@ EOS
|
|
53
55
|
|
54
56
|
begin
|
55
57
|
browser.fetch! url
|
56
|
-
|
58
|
+
|
59
|
+
@pages[url] = browser.page if config.keep_pages
|
60
|
+
|
57
61
|
# Fire callbacks on GET
|
58
62
|
config.after_handlers[:get].each do |ah|
|
59
63
|
ah.call(browser.page, browser, url)
|
@@ -75,7 +79,7 @@ EOS
|
|
75
79
|
})
|
76
80
|
end
|
77
81
|
else
|
78
|
-
resources << __structure(browser.page)
|
82
|
+
@resources << __structure(browser.page)
|
79
83
|
end
|
80
84
|
rescue Capybara::Poltergeist::TimeoutError => ex
|
81
85
|
config.dispatch_timeout_handler(ex, url)
|
data/lib/klepto/config.rb
CHANGED
@@ -2,9 +2,11 @@ module Klepto
|
|
2
2
|
class Config
|
3
3
|
attr_reader :after_handlers
|
4
4
|
attr_reader :before_handlers
|
5
|
+
attr_reader :keep_pages
|
5
6
|
|
6
7
|
def initialize
|
7
8
|
@headers = {}
|
9
|
+
@keep_pages = false
|
8
10
|
@abort_on_failure = true
|
9
11
|
@abort_on_redirect = false
|
10
12
|
@urls = []
|
@@ -30,6 +32,11 @@ module Klepto
|
|
30
32
|
# @default_driver
|
31
33
|
# end
|
32
34
|
|
35
|
+
def keep_pages(_keep = nil)
|
36
|
+
@keep_pages = _keep if _keep != nil
|
37
|
+
@keep_pages
|
38
|
+
end
|
39
|
+
|
33
40
|
def headers(_headers=nil)
|
34
41
|
@headers = _headers if _headers
|
35
42
|
@headers
|
data/lib/klepto/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module Klepto
|
2
|
-
VERSION = "0.5.
|
3
|
-
end
|
2
|
+
VERSION = "0.5.3"
|
3
|
+
end
|
data/spec/lib/klepto/bot_spec.rb
CHANGED
@@ -114,6 +114,8 @@ describe Klepto::Bot do
|
|
114
114
|
'X-Sup-Dawg' => "Yo, What's up?"
|
115
115
|
})
|
116
116
|
|
117
|
+
config.keep_pages true
|
118
|
+
|
117
119
|
# Structure that stuff
|
118
120
|
name 'h1.fullname'
|
119
121
|
username "//span[contains(concat(' ',normalize-space(@class),' '),' screen-name ')]", :syntax => :xpath
|
@@ -178,6 +180,10 @@ describe Klepto::Bot do
|
|
178
180
|
@structure.first[:last_tweet][:twitter_id].should == @structure.first[:tweets].first[:twitter_id]
|
179
181
|
end
|
180
182
|
|
183
|
+
it 'should have the pages stored' do
|
184
|
+
@bot.pages["https://twitter.com/justinbieber"].should_not be_nil
|
185
|
+
end
|
186
|
+
|
181
187
|
it 'should be able to #parse! a url' do
|
182
188
|
@new_structure = @bot.parse!("https://twitter.com/justinbieber")
|
183
189
|
@new_structure.first[:name].should match(/Justin/i)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: klepto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2013-05-31 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: poltergeist
|
16
|
-
requirement: &
|
16
|
+
requirement: &70314273924640 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - =
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.1.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70314273924640
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: capybara
|
27
|
-
requirement: &
|
27
|
+
requirement: &70314260798000 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - =
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 2.0.2
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70314260798000
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: nokogiri
|
38
|
-
requirement: &
|
38
|
+
requirement: &70314260796320 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.5.6
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70314260796320
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: activesupport
|
49
|
-
requirement: &
|
49
|
+
requirement: &70314260795500 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70314260795500
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: multi_json
|
60
|
-
requirement: &
|
60
|
+
requirement: &70314260792000 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,7 +65,7 @@ dependencies:
|
|
65
65
|
version: '1.0'
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70314260792000
|
69
69
|
description: Tearing up web pages into ActiveRecord resources
|
70
70
|
email:
|
71
71
|
- github@coryodaniel.com
|