klepto 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -84,7 +84,8 @@ Say you want a bunch of Bieb tweets! How is there not profit in that?
84
84
  end
85
85
 
86
86
  # Multiple Nested structures? Let klepto know this is a collection of resources
87
- tweets 'li.stream-item', :as => :collection do
87
+ # Does bieber, tweet to much? Maybe. Lets only get the new stuff kids crave.
88
+ tweets 'li.stream-item', :as => :collection, :limit => 10 do
88
89
  twitter_id do |node|
89
90
  node['data-item-id']
90
91
  end
@@ -190,16 +191,6 @@ cookies({
190
191
  })
191
192
  ```
192
193
 
193
- event handlers...
194
- --------------------
195
- ```ruby
196
- on_http_status(500,404) do |response, bot|
197
- email('admin@example.com', bot.status, bot.summary)
198
- end
199
- on_http_status('3xx') do |response, bot|
200
- end
201
- ```
202
-
203
194
  Pre-req Steps
204
195
  --------------------
205
196
  ```ruby
data/lib/klepto/bot.rb CHANGED
@@ -7,8 +7,12 @@ module Klepto
7
7
  @config.urls urls
8
8
  @queue = []
9
9
 
10
+ # Evaluate the block as DSL, proxy off anything that isn't on #config
11
+ # to a queue, then apply that queue to the top-level Klepto::Structure
10
12
  instance_eval &block
11
13
 
14
+ # After DSL evaluation is queued up, put some methods onto this instance
15
+ # and restore method_missing (for sanity sake)
12
16
  instance_eval <<-EOS
13
17
  def queue; @queue; end;
14
18
  def resources; @resources; end;
@@ -20,6 +24,7 @@ EOS
20
24
  __process!
21
25
  end
22
26
 
27
+ # Structure all the pages
23
28
  def __process!
24
29
  @resources = []
25
30
 
@@ -29,16 +34,24 @@ EOS
29
34
  browser.set_headers config.headers
30
35
  browser.fetch! url
31
36
 
37
+ # Fire callbacks on GET
32
38
  config.after_handlers[:get].each do |ah|
33
39
  ah.call(browser.page)
34
40
  end
35
41
 
42
+ # Capybara automatically follows redirects... Checking the page here
43
+ # to see if it has changed, and if so add it on to the stack of statuses.
44
+ # statuses is an array because it holds the actually HTTP response code and an
45
+ # approximate code (2xx for example). :redirect will be pushed onto the stack if a
46
+ # redirect happened.
36
47
  statuses = [browser.status, browser.statusx]
37
48
  statuses.push :redirect if url != browser.page.current_url
49
+ # Dispatch all the handlers for HTTP Status Codes.
38
50
  statuses.each do |status|
39
51
  config.dispatch_status_handlers(status, browser.page)
40
52
  end
41
53
 
54
+ # If the page was not a failure or if not aborting, structure that bad boy.
42
55
  if !browser.failure? || (browser.failure? && !config.abort_on_failure?)
43
56
  resources << __structure(browser.page)
44
57
  else
@@ -54,6 +67,7 @@ EOS
54
67
  def __structure(context)
55
68
  structure = Structure.new(context)
56
69
 
70
+ # A queue of DSL instructions
57
71
  queue.each do |instruction|
58
72
  if instruction[2]
59
73
  structure.send instruction[0], *instruction[1], &instruction[2]
@@ -62,10 +76,9 @@ EOS
62
76
  end
63
77
  end
64
78
 
65
- config.after_handlers[:each].each do |ah|
66
- ah.call(structure._hash)
67
- end
68
-
79
+ # Call after(:each) handlers...
80
+ config.after_handlers[:each].each { |ah| ah.call(structure._hash) }
81
+
69
82
  structure._hash
70
83
  end
71
84
 
data/lib/klepto/config.rb CHANGED
@@ -13,8 +13,16 @@ module Klepto
13
13
  }
14
14
  @before_handlers = {:each => []}
15
15
  @status_handlers = {}
16
+ @handlers = {}
16
17
  end
17
18
 
19
+ # TODO: DRY up handlers...
20
+ # def dispatch(group, handler, *args)
21
+ # @handlers[group] ||= {}
22
+ # @handlers[group][handler] ||= []
23
+ # @handlers[group][handler].each{|handler| handler.call(*args)}
24
+ # end
25
+
18
26
  def headers(_headers=nil)
19
27
  @headers = _headers if _headers
20
28
  @headers
@@ -20,17 +20,20 @@ module Klepto
20
20
  #options[:as] :collection, :resource
21
21
  #options[:match] :first, :all
22
22
  #options[:syntax] :xpath, :css
23
+ #options[:limit] Integer elements to structure when :match => :all or :as => :collection
23
24
  def method_missing(meth, *args, &block)
24
25
  options = args.last.is_a?(Hash) ? args.pop : {}
25
26
  options[:syntax] ||= :css
26
27
  options[:match] ||= :first
27
28
  options[:attr] ||= nil
29
+ options[:limit] ||= nil
28
30
  selector = args.shift
29
31
 
30
32
  if options[:as] == :collection
31
33
  @_hash[meth] = []
32
34
  result = _context.all( options[:syntax], selector )
33
- result.each do |ele|
35
+ options[:limit] ||= result.length
36
+ result[0, options[:limit]].each do |ele|
34
37
  @_hash[meth].push Structure.build(ele, self, &block)
35
38
  end
36
39
  elsif options[:as] == :resource
@@ -45,7 +48,8 @@ module Klepto
45
48
 
46
49
  if options[:match] == :all
47
50
  @_hash[meth] = []
48
- result.each do |node|
51
+ options[:limit] ||= result.length
52
+ result[0, options[:limit]].each do |node|
49
53
  @_hash[meth] << block.call( node )
50
54
  end
51
55
  else
@@ -55,7 +59,8 @@ module Klepto
55
59
  result = _context.send( options[:match], options[:syntax], selector )
56
60
  if options[:match] == :all
57
61
  @_hash[meth] = []
58
- result.each do |node|
62
+ options[:limit] ||= result.length
63
+ result[0, options[:limit]].each do |node|
59
64
  @_hash[meth] << (node[options[:attr]] || node.try(:text))
60
65
  end
61
66
  else
@@ -1,3 +1,3 @@
1
1
  module Klepto
2
- VERSION = "0.2.5"
2
+ VERSION = "0.2.6"
3
3
  end
@@ -172,5 +172,49 @@ describe Klepto::Bot do
172
172
  statuses.should include 'got a page'
173
173
  end
174
174
  end
175
+
176
+
177
+ describe 'creating a bot with a node limit' do
178
+ before(:each) do
179
+ @bot = Klepto::Bot.new("https://twitter.com/justinbieber"){
180
+ config.headers({
181
+ 'Referer' => 'http://www.twitter.com',
182
+ 'X-Sup-Dawg' => "Yo, What's up?"
183
+ })
184
+
185
+ # Structure that stuff
186
+ name 'h1.fullname'
187
+ username "span.screen-name"
188
+
189
+ tweets 'li.stream-item', :as => :collection, :limit => 5 do
190
+ twitter_id do |node|
191
+ node['data-item-id']
192
+ end
193
+ tweet '.content p', :css
194
+ timestamp '._timestamp', :attr => 'data-time'
195
+ permalink '.time a', :css, :attr => :href
196
+ end
197
+
198
+ config.after(:each) do |resource|
199
+ @user = User.new
200
+ @user.name = resource[:name]
201
+ @user.username = resource[:username]
202
+ @user.save
203
+
204
+ resource[:tweets].each do |tweet|
205
+ Tweet.create(tweet)
206
+ end
207
+ end
208
+ }
209
+ @structure = @bot.resources
210
+ end
211
+
212
+ it 'should limit the nodes structured' do
213
+ User.count.should be(1)
214
+ Tweet.count.should be(5)
215
+ end
216
+ end
217
+
218
+
175
219
  end
176
220
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: klepto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-04-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: poltergeist
16
- requirement: &70341397514440 !ruby/object:Gem::Requirement
16
+ requirement: &70115769000140 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - =
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.1.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70341397514440
24
+ version_requirements: *70115769000140
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: capybara
27
- requirement: &70341397512360 !ruby/object:Gem::Requirement
27
+ requirement: &70115768999120 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - =
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 2.0.2
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70341397512360
35
+ version_requirements: *70115768999120
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: nokogiri
38
- requirement: &70341397509940 !ruby/object:Gem::Requirement
38
+ requirement: &70115768997540 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 1.5.6
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70341397509940
46
+ version_requirements: *70115768997540
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: activesupport
49
- requirement: &70341397506060 !ruby/object:Gem::Requirement
49
+ requirement: &70115768995980 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70341397506060
57
+ version_requirements: *70115768995980
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: multi_json
60
- requirement: &70341397504900 !ruby/object:Gem::Requirement
60
+ requirement: &70115768994800 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,7 +65,7 @@ dependencies:
65
65
  version: '1.0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70341397504900
68
+ version_requirements: *70115768994800
69
69
  description: Tearing up web pages into ActiveRecord resources
70
70
  email:
71
71
  - github@coryodaniel.com