klepto 0.5.1 → 0.5.3

Sign up to get free protection for your applications and to get access to all the features.
data/lib/klepto/bot.rb CHANGED
@@ -17,6 +17,7 @@ module Klepto
17
17
  @config = Klepto::Config.new
18
18
  @config.urls urls
19
19
  @queue = []
20
+ @pages = {}
20
21
 
21
22
  # Evaluate the block as DSL, proxy off anything that isn't on #config
22
23
  # to a queue, then apply that queue to the top-level Klepto::Structure
@@ -26,6 +27,7 @@ module Klepto
26
27
  # and restore method_missing (for sanity sake)
27
28
  instance_eval <<-EOS
28
29
  def queue; @queue; end;
30
+ def pages; @pages; end;
29
31
  def parse!(*_urls); __process!(*_urls); end;
30
32
  def resources; @resources; end;
31
33
  def method_missing(meth, *args, &block)
@@ -53,7 +55,9 @@ EOS
53
55
 
54
56
  begin
55
57
  browser.fetch! url
56
-
58
+
59
+ @pages[url] = browser.page if config.keep_pages
60
+
57
61
  # Fire callbacks on GET
58
62
  config.after_handlers[:get].each do |ah|
59
63
  ah.call(browser.page, browser, url)
@@ -75,7 +79,7 @@ EOS
75
79
  })
76
80
  end
77
81
  else
78
- resources << __structure(browser.page)
82
+ @resources << __structure(browser.page)
79
83
  end
80
84
  rescue Capybara::Poltergeist::TimeoutError => ex
81
85
  config.dispatch_timeout_handler(ex, url)
data/lib/klepto/config.rb CHANGED
@@ -2,9 +2,11 @@ module Klepto
2
2
  class Config
3
3
  attr_reader :after_handlers
4
4
  attr_reader :before_handlers
5
+ attr_reader :keep_pages
5
6
 
6
7
  def initialize
7
8
  @headers = {}
9
+ @keep_pages = false
8
10
  @abort_on_failure = true
9
11
  @abort_on_redirect = false
10
12
  @urls = []
@@ -30,6 +32,11 @@ module Klepto
30
32
  # @default_driver
31
33
  # end
32
34
 
35
+ def keep_pages(_keep = nil)
36
+ @keep_pages = _keep if _keep != nil
37
+ @keep_pages
38
+ end
39
+
33
40
  def headers(_headers=nil)
34
41
  @headers = _headers if _headers
35
42
  @headers
@@ -1,3 +1,3 @@
1
1
  module Klepto
2
- VERSION = "0.5.1"
3
- end
2
+ VERSION = "0.5.3"
3
+ end
@@ -114,6 +114,8 @@ describe Klepto::Bot do
114
114
  'X-Sup-Dawg' => "Yo, What's up?"
115
115
  })
116
116
 
117
+ config.keep_pages true
118
+
117
119
  # Structure that stuff
118
120
  name 'h1.fullname'
119
121
  username "//span[contains(concat(' ',normalize-space(@class),' '),' screen-name ')]", :syntax => :xpath
@@ -178,6 +180,10 @@ describe Klepto::Bot do
178
180
  @structure.first[:last_tweet][:twitter_id].should == @structure.first[:tweets].first[:twitter_id]
179
181
  end
180
182
 
183
+ it 'should have the pages stored' do
184
+ @bot.pages["https://twitter.com/justinbieber"].should_not be_nil
185
+ end
186
+
181
187
  it 'should be able to #parse! a url' do
182
188
  @new_structure = @bot.parse!("https://twitter.com/justinbieber")
183
189
  @new_structure.first[:name].should match(/Justin/i)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: klepto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-05-31 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: poltergeist
16
- requirement: &70221809709220 !ruby/object:Gem::Requirement
16
+ requirement: &70314273924640 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - =
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.1.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70221809709220
24
+ version_requirements: *70314273924640
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: capybara
27
- requirement: &70221809708720 !ruby/object:Gem::Requirement
27
+ requirement: &70314260798000 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - =
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 2.0.2
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70221809708720
35
+ version_requirements: *70314260798000
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: nokogiri
38
- requirement: &70221809708260 !ruby/object:Gem::Requirement
38
+ requirement: &70314260796320 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 1.5.6
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70221809708260
46
+ version_requirements: *70314260796320
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: activesupport
49
- requirement: &70221809707880 !ruby/object:Gem::Requirement
49
+ requirement: &70314260795500 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70221809707880
57
+ version_requirements: *70314260795500
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: multi_json
60
- requirement: &70221797327360 !ruby/object:Gem::Requirement
60
+ requirement: &70314260792000 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,7 +65,7 @@ dependencies:
65
65
  version: '1.0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70221797327360
68
+ version_requirements: *70314260792000
69
69
  description: Tearing up web pages into ActiveRecord resources
70
70
  email:
71
71
  - github@coryodaniel.com