klepto 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/klepto/bot.rb CHANGED
@@ -17,6 +17,7 @@ module Klepto
17
17
  @config = Klepto::Config.new
18
18
  @config.urls urls
19
19
  @queue = []
20
+ @pages = {}
20
21
 
21
22
  # Evaluate the block as DSL, proxy off anything that isn't on #config
22
23
  # to a queue, then apply that queue to the top-level Klepto::Structure
@@ -26,6 +27,7 @@ module Klepto
26
27
  # and restore method_missing (for sanity sake)
27
28
  instance_eval <<-EOS
28
29
  def queue; @queue; end;
30
+ def pages; @pages; end;
29
31
  def parse!(*_urls); __process!(*_urls); end;
30
32
  def resources; @resources; end;
31
33
  def method_missing(meth, *args, &block)
@@ -53,7 +55,9 @@ EOS
53
55
 
54
56
  begin
55
57
  browser.fetch! url
56
-
58
+
59
+ @pages[url] = browser.page if config.keep_pages
60
+
57
61
  # Fire callbacks on GET
58
62
  config.after_handlers[:get].each do |ah|
59
63
  ah.call(browser.page, browser, url)
@@ -75,7 +79,7 @@ EOS
75
79
  })
76
80
  end
77
81
  else
78
- resources << __structure(browser.page)
82
+ @resources << __structure(browser.page)
79
83
  end
80
84
  rescue Capybara::Poltergeist::TimeoutError => ex
81
85
  config.dispatch_timeout_handler(ex, url)
data/lib/klepto/config.rb CHANGED
@@ -2,9 +2,11 @@ module Klepto
2
2
  class Config
3
3
  attr_reader :after_handlers
4
4
  attr_reader :before_handlers
5
+ attr_reader :keep_pages
5
6
 
6
7
  def initialize
7
8
  @headers = {}
9
+ @keep_pages = false
8
10
  @abort_on_failure = true
9
11
  @abort_on_redirect = false
10
12
  @urls = []
@@ -30,6 +32,11 @@ module Klepto
30
32
  # @default_driver
31
33
  # end
32
34
 
35
+ def keep_pages(_keep = nil)
36
+ @keep_pages = _keep if _keep != nil
37
+ @keep_pages
38
+ end
39
+
33
40
  def headers(_headers=nil)
34
41
  @headers = _headers if _headers
35
42
  @headers
@@ -1,3 +1,3 @@
1
1
  module Klepto
2
- VERSION = "0.5.1"
3
- end
2
+ VERSION = "0.5.3"
3
+ end
@@ -114,6 +114,8 @@ describe Klepto::Bot do
114
114
  'X-Sup-Dawg' => "Yo, What's up?"
115
115
  })
116
116
 
117
+ config.keep_pages true
118
+
117
119
  # Structure that stuff
118
120
  name 'h1.fullname'
119
121
  username "//span[contains(concat(' ',normalize-space(@class),' '),' screen-name ')]", :syntax => :xpath
@@ -178,6 +180,10 @@ describe Klepto::Bot do
178
180
  @structure.first[:last_tweet][:twitter_id].should == @structure.first[:tweets].first[:twitter_id]
179
181
  end
180
182
 
183
+ it 'should have the pages stored' do
184
+ @bot.pages["https://twitter.com/justinbieber"].should_not be_nil
185
+ end
186
+
181
187
  it 'should be able to #parse! a url' do
182
188
  @new_structure = @bot.parse!("https://twitter.com/justinbieber")
183
189
  @new_structure.first[:name].should match(/Justin/i)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: klepto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-05-31 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: poltergeist
16
- requirement: &70221809709220 !ruby/object:Gem::Requirement
16
+ requirement: &70314273924640 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - =
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.1.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70221809709220
24
+ version_requirements: *70314273924640
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: capybara
27
- requirement: &70221809708720 !ruby/object:Gem::Requirement
27
+ requirement: &70314260798000 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - =
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 2.0.2
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70221809708720
35
+ version_requirements: *70314260798000
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: nokogiri
38
- requirement: &70221809708260 !ruby/object:Gem::Requirement
38
+ requirement: &70314260796320 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 1.5.6
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70221809708260
46
+ version_requirements: *70314260796320
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: activesupport
49
- requirement: &70221809707880 !ruby/object:Gem::Requirement
49
+ requirement: &70314260795500 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70221809707880
57
+ version_requirements: *70314260795500
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: multi_json
60
- requirement: &70221797327360 !ruby/object:Gem::Requirement
60
+ requirement: &70314260792000 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,7 +65,7 @@ dependencies:
65
65
  version: '1.0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70221797327360
68
+ version_requirements: *70314260792000
69
69
  description: Tearing up web pages into ActiveRecord resources
70
70
  email:
71
71
  - github@coryodaniel.com