klepto 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/klepto/bot.rb +6 -2
- data/lib/klepto/config.rb +7 -0
- data/lib/klepto/version.rb +2 -2
- data/spec/lib/klepto/bot_spec.rb +6 -0
- metadata +11 -11
data/lib/klepto/bot.rb
CHANGED
@@ -17,6 +17,7 @@ module Klepto
|
|
17
17
|
@config = Klepto::Config.new
|
18
18
|
@config.urls urls
|
19
19
|
@queue = []
|
20
|
+
@pages = {}
|
20
21
|
|
21
22
|
# Evaluate the block as DSL, proxy off anything that isn't on #config
|
22
23
|
# to a queue, then apply that queue to the top-level Klepto::Structure
|
@@ -26,6 +27,7 @@ module Klepto
|
|
26
27
|
# and restore method_missing (for sanity sake)
|
27
28
|
instance_eval <<-EOS
|
28
29
|
def queue; @queue; end;
|
30
|
+
def pages; @pages; end;
|
29
31
|
def parse!(*_urls); __process!(*_urls); end;
|
30
32
|
def resources; @resources; end;
|
31
33
|
def method_missing(meth, *args, &block)
|
@@ -53,7 +55,9 @@ EOS
|
|
53
55
|
|
54
56
|
begin
|
55
57
|
browser.fetch! url
|
56
|
-
|
58
|
+
|
59
|
+
@pages[url] = browser.page if config.keep_pages
|
60
|
+
|
57
61
|
# Fire callbacks on GET
|
58
62
|
config.after_handlers[:get].each do |ah|
|
59
63
|
ah.call(browser.page, browser, url)
|
@@ -75,7 +79,7 @@ EOS
|
|
75
79
|
})
|
76
80
|
end
|
77
81
|
else
|
78
|
-
resources << __structure(browser.page)
|
82
|
+
@resources << __structure(browser.page)
|
79
83
|
end
|
80
84
|
rescue Capybara::Poltergeist::TimeoutError => ex
|
81
85
|
config.dispatch_timeout_handler(ex, url)
|
data/lib/klepto/config.rb
CHANGED
@@ -2,9 +2,11 @@ module Klepto
|
|
2
2
|
class Config
|
3
3
|
attr_reader :after_handlers
|
4
4
|
attr_reader :before_handlers
|
5
|
+
attr_reader :keep_pages
|
5
6
|
|
6
7
|
def initialize
|
7
8
|
@headers = {}
|
9
|
+
@keep_pages = false
|
8
10
|
@abort_on_failure = true
|
9
11
|
@abort_on_redirect = false
|
10
12
|
@urls = []
|
@@ -30,6 +32,11 @@ module Klepto
|
|
30
32
|
# @default_driver
|
31
33
|
# end
|
32
34
|
|
35
|
+
def keep_pages(_keep = nil)
|
36
|
+
@keep_pages = _keep if _keep != nil
|
37
|
+
@keep_pages
|
38
|
+
end
|
39
|
+
|
33
40
|
def headers(_headers=nil)
|
34
41
|
@headers = _headers if _headers
|
35
42
|
@headers
|
data/lib/klepto/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module Klepto
|
2
|
-
VERSION = "0.5.
|
3
|
-
end
|
2
|
+
VERSION = "0.5.3"
|
3
|
+
end
|
data/spec/lib/klepto/bot_spec.rb
CHANGED
@@ -114,6 +114,8 @@ describe Klepto::Bot do
|
|
114
114
|
'X-Sup-Dawg' => "Yo, What's up?"
|
115
115
|
})
|
116
116
|
|
117
|
+
config.keep_pages true
|
118
|
+
|
117
119
|
# Structure that stuff
|
118
120
|
name 'h1.fullname'
|
119
121
|
username "//span[contains(concat(' ',normalize-space(@class),' '),' screen-name ')]", :syntax => :xpath
|
@@ -178,6 +180,10 @@ describe Klepto::Bot do
|
|
178
180
|
@structure.first[:last_tweet][:twitter_id].should == @structure.first[:tweets].first[:twitter_id]
|
179
181
|
end
|
180
182
|
|
183
|
+
it 'should have the pages stored' do
|
184
|
+
@bot.pages["https://twitter.com/justinbieber"].should_not be_nil
|
185
|
+
end
|
186
|
+
|
181
187
|
it 'should be able to #parse! a url' do
|
182
188
|
@new_structure = @bot.parse!("https://twitter.com/justinbieber")
|
183
189
|
@new_structure.first[:name].should match(/Justin/i)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: klepto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2013-05-31 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: poltergeist
|
16
|
-
requirement: &
|
16
|
+
requirement: &70314273924640 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - =
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.1.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70314273924640
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: capybara
|
27
|
-
requirement: &
|
27
|
+
requirement: &70314260798000 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - =
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 2.0.2
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70314260798000
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: nokogiri
|
38
|
-
requirement: &
|
38
|
+
requirement: &70314260796320 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.5.6
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70314260796320
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: activesupport
|
49
|
-
requirement: &
|
49
|
+
requirement: &70314260795500 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70314260795500
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: multi_json
|
60
|
-
requirement: &
|
60
|
+
requirement: &70314260792000 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,7 +65,7 @@ dependencies:
|
|
65
65
|
version: '1.0'
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70314260792000
|
69
69
|
description: Tearing up web pages into ActiveRecord resources
|
70
70
|
email:
|
71
71
|
- github@coryodaniel.com
|