klepto 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -84,7 +84,8 @@ Say you want a bunch of Bieb tweets! How is there not profit in that?
84
84
  end
85
85
 
86
86
  # Multiple Nested structures? Let klepto know this is a collection of resources
87
- tweets 'li.stream-item', :as => :collection do
87
+ # Does bieber, tweet to much? Maybe. Lets only get the new stuff kids crave.
88
+ tweets 'li.stream-item', :as => :collection, :limit => 10 do
88
89
  twitter_id do |node|
89
90
  node['data-item-id']
90
91
  end
@@ -190,16 +191,6 @@ cookies({
190
191
  })
191
192
  ```
192
193
 
193
- event handlers...
194
- --------------------
195
- ```ruby
196
- on_http_status(500,404) do |response, bot|
197
- email('admin@example.com', bot.status, bot.summary)
198
- end
199
- on_http_status('3xx') do |response, bot|
200
- end
201
- ```
202
-
203
194
  Pre-req Steps
204
195
  --------------------
205
196
  ```ruby
data/lib/klepto/bot.rb CHANGED
@@ -7,8 +7,12 @@ module Klepto
7
7
  @config.urls urls
8
8
  @queue = []
9
9
 
10
+ # Evaluate the block as DSL, proxy off anything that isn't on #config
11
+ # to a queue, then apply that queue to the top-level Klepto::Structure
10
12
  instance_eval &block
11
13
 
14
+ # After DSL evaluation is queued up, put some methods onto this instance
15
+ # and restore method_missing (for sanity sake)
12
16
  instance_eval <<-EOS
13
17
  def queue; @queue; end;
14
18
  def resources; @resources; end;
@@ -20,6 +24,7 @@ EOS
20
24
  __process!
21
25
  end
22
26
 
27
+ # Structure all the pages
23
28
  def __process!
24
29
  @resources = []
25
30
 
@@ -29,16 +34,24 @@ EOS
29
34
  browser.set_headers config.headers
30
35
  browser.fetch! url
31
36
 
37
+ # Fire callbacks on GET
32
38
  config.after_handlers[:get].each do |ah|
33
39
  ah.call(browser.page)
34
40
  end
35
41
 
42
+ # Capybara automatically follows redirects... Checking the page here
43
+ # to see if it has changed, and if so add it on to the stack of statuses.
44
+ # statuses is an array because it holds the actually HTTP response code and an
45
+ # approximate code (2xx for example). :redirect will be pushed onto the stack if a
46
+ # redirect happened.
36
47
  statuses = [browser.status, browser.statusx]
37
48
  statuses.push :redirect if url != browser.page.current_url
49
+ # Dispatch all the handlers for HTTP Status Codes.
38
50
  statuses.each do |status|
39
51
  config.dispatch_status_handlers(status, browser.page)
40
52
  end
41
53
 
54
+ # If the page was not a failure or if not aborting, structure that bad boy.
42
55
  if !browser.failure? || (browser.failure? && !config.abort_on_failure?)
43
56
  resources << __structure(browser.page)
44
57
  else
@@ -54,6 +67,7 @@ EOS
54
67
  def __structure(context)
55
68
  structure = Structure.new(context)
56
69
 
70
+ # A queue of DSL instructions
57
71
  queue.each do |instruction|
58
72
  if instruction[2]
59
73
  structure.send instruction[0], *instruction[1], &instruction[2]
@@ -62,10 +76,9 @@ EOS
62
76
  end
63
77
  end
64
78
 
65
- config.after_handlers[:each].each do |ah|
66
- ah.call(structure._hash)
67
- end
68
-
79
+ # Call after(:each) handlers...
80
+ config.after_handlers[:each].each { |ah| ah.call(structure._hash) }
81
+
69
82
  structure._hash
70
83
  end
71
84
 
data/lib/klepto/config.rb CHANGED
@@ -13,8 +13,16 @@ module Klepto
13
13
  }
14
14
  @before_handlers = {:each => []}
15
15
  @status_handlers = {}
16
+ @handlers = {}
16
17
  end
17
18
 
19
+ # TODO: DRY up handlers...
20
+ # def dispatch(group, handler, *args)
21
+ # @handlers[group] ||= {}
22
+ # @handlers[group][handler] ||= []
23
+ # @handlers[group][handler].each{|handler| handler.call(*args)}
24
+ # end
25
+
18
26
  def headers(_headers=nil)
19
27
  @headers = _headers if _headers
20
28
  @headers
@@ -20,17 +20,20 @@ module Klepto
20
20
  #options[:as] :collection, :resource
21
21
  #options[:match] :first, :all
22
22
  #options[:syntax] :xpath, :css
23
+ #options[:limit] Integer elements to structure when :match => :all or :as => :collection
23
24
  def method_missing(meth, *args, &block)
24
25
  options = args.last.is_a?(Hash) ? args.pop : {}
25
26
  options[:syntax] ||= :css
26
27
  options[:match] ||= :first
27
28
  options[:attr] ||= nil
29
+ options[:limit] ||= nil
28
30
  selector = args.shift
29
31
 
30
32
  if options[:as] == :collection
31
33
  @_hash[meth] = []
32
34
  result = _context.all( options[:syntax], selector )
33
- result.each do |ele|
35
+ options[:limit] ||= result.length
36
+ result[0, options[:limit]].each do |ele|
34
37
  @_hash[meth].push Structure.build(ele, self, &block)
35
38
  end
36
39
  elsif options[:as] == :resource
@@ -45,7 +48,8 @@ module Klepto
45
48
 
46
49
  if options[:match] == :all
47
50
  @_hash[meth] = []
48
- result.each do |node|
51
+ options[:limit] ||= result.length
52
+ result[0, options[:limit]].each do |node|
49
53
  @_hash[meth] << block.call( node )
50
54
  end
51
55
  else
@@ -55,7 +59,8 @@ module Klepto
55
59
  result = _context.send( options[:match], options[:syntax], selector )
56
60
  if options[:match] == :all
57
61
  @_hash[meth] = []
58
- result.each do |node|
62
+ options[:limit] ||= result.length
63
+ result[0, options[:limit]].each do |node|
59
64
  @_hash[meth] << (node[options[:attr]] || node.try(:text))
60
65
  end
61
66
  else
@@ -1,3 +1,3 @@
1
1
  module Klepto
2
- VERSION = "0.2.5"
2
+ VERSION = "0.2.6"
3
3
  end
@@ -172,5 +172,49 @@ describe Klepto::Bot do
172
172
  statuses.should include 'got a page'
173
173
  end
174
174
  end
175
+
176
+
177
+ describe 'creating a bot with a node limit' do
178
+ before(:each) do
179
+ @bot = Klepto::Bot.new("https://twitter.com/justinbieber"){
180
+ config.headers({
181
+ 'Referer' => 'http://www.twitter.com',
182
+ 'X-Sup-Dawg' => "Yo, What's up?"
183
+ })
184
+
185
+ # Structure that stuff
186
+ name 'h1.fullname'
187
+ username "span.screen-name"
188
+
189
+ tweets 'li.stream-item', :as => :collection, :limit => 5 do
190
+ twitter_id do |node|
191
+ node['data-item-id']
192
+ end
193
+ tweet '.content p', :css
194
+ timestamp '._timestamp', :attr => 'data-time'
195
+ permalink '.time a', :css, :attr => :href
196
+ end
197
+
198
+ config.after(:each) do |resource|
199
+ @user = User.new
200
+ @user.name = resource[:name]
201
+ @user.username = resource[:username]
202
+ @user.save
203
+
204
+ resource[:tweets].each do |tweet|
205
+ Tweet.create(tweet)
206
+ end
207
+ end
208
+ }
209
+ @structure = @bot.resources
210
+ end
211
+
212
+ it 'should limit the nodes structured' do
213
+ User.count.should be(1)
214
+ Tweet.count.should be(5)
215
+ end
216
+ end
217
+
218
+
175
219
  end
176
220
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: klepto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-04-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: poltergeist
16
- requirement: &70341397514440 !ruby/object:Gem::Requirement
16
+ requirement: &70115769000140 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - =
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.1.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70341397514440
24
+ version_requirements: *70115769000140
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: capybara
27
- requirement: &70341397512360 !ruby/object:Gem::Requirement
27
+ requirement: &70115768999120 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - =
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 2.0.2
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70341397512360
35
+ version_requirements: *70115768999120
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: nokogiri
38
- requirement: &70341397509940 !ruby/object:Gem::Requirement
38
+ requirement: &70115768997540 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 1.5.6
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70341397509940
46
+ version_requirements: *70115768997540
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: activesupport
49
- requirement: &70341397506060 !ruby/object:Gem::Requirement
49
+ requirement: &70115768995980 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70341397506060
57
+ version_requirements: *70115768995980
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: multi_json
60
- requirement: &70341397504900 !ruby/object:Gem::Requirement
60
+ requirement: &70115768994800 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,7 +65,7 @@ dependencies:
65
65
  version: '1.0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70341397504900
68
+ version_requirements: *70115768994800
69
69
  description: Tearing up web pages into ActiveRecord resources
70
70
  email:
71
71
  - github@coryodaniel.com