klepto 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +2 -11
- data/lib/klepto/bot.rb +17 -4
- data/lib/klepto/config.rb +8 -0
- data/lib/klepto/structure.rb +8 -3
- data/lib/klepto/version.rb +1 -1
- data/spec/lib/klepto/bot_spec.rb +44 -0
- metadata +11 -11
data/README.md
CHANGED
@@ -84,7 +84,8 @@ Say you want a bunch of Bieb tweets! How is there not profit in that?
|
|
84
84
|
end
|
85
85
|
|
86
86
|
# Multiple Nested structures? Let klepto know this is a collection of resources
|
87
|
-
|
87
|
+
# Does bieber, tweet to much? Maybe. Lets only get the new stuff kids crave.
|
88
|
+
tweets 'li.stream-item', :as => :collection, :limit => 10 do
|
88
89
|
twitter_id do |node|
|
89
90
|
node['data-item-id']
|
90
91
|
end
|
@@ -190,16 +191,6 @@ cookies({
|
|
190
191
|
})
|
191
192
|
```
|
192
193
|
|
193
|
-
event handlers...
|
194
|
-
--------------------
|
195
|
-
```ruby
|
196
|
-
on_http_status(500,404) do |response, bot|
|
197
|
-
email('admin@example.com', bot.status, bot.summary)
|
198
|
-
end
|
199
|
-
on_http_status('3xx') do |response, bot|
|
200
|
-
end
|
201
|
-
```
|
202
|
-
|
203
194
|
Pre-req Steps
|
204
195
|
--------------------
|
205
196
|
```ruby
|
data/lib/klepto/bot.rb
CHANGED
@@ -7,8 +7,12 @@ module Klepto
|
|
7
7
|
@config.urls urls
|
8
8
|
@queue = []
|
9
9
|
|
10
|
+
# Evaluate the block as DSL, proxy off anything that isn't on #config
|
11
|
+
# to a queue, then apply that queue to the top-level Klepto::Structure
|
10
12
|
instance_eval &block
|
11
13
|
|
14
|
+
# After DSL evaluation is queued up, put some methods onto this instance
|
15
|
+
# and restore method_missing (for sanity sake)
|
12
16
|
instance_eval <<-EOS
|
13
17
|
def queue; @queue; end;
|
14
18
|
def resources; @resources; end;
|
@@ -20,6 +24,7 @@ EOS
|
|
20
24
|
__process!
|
21
25
|
end
|
22
26
|
|
27
|
+
# Structure all the pages
|
23
28
|
def __process!
|
24
29
|
@resources = []
|
25
30
|
|
@@ -29,16 +34,24 @@ EOS
|
|
29
34
|
browser.set_headers config.headers
|
30
35
|
browser.fetch! url
|
31
36
|
|
37
|
+
# Fire callbacks on GET
|
32
38
|
config.after_handlers[:get].each do |ah|
|
33
39
|
ah.call(browser.page)
|
34
40
|
end
|
35
41
|
|
42
|
+
# Capybara automatically follows redirects... Checking the page here
|
43
|
+
# to see if it has changed, and if so add it on to the stack of statuses.
|
44
|
+
# statuses is an array because it holds the actually HTTP response code and an
|
45
|
+
# approximate code (2xx for example). :redirect will be pushed onto the stack if a
|
46
|
+
# redirect happened.
|
36
47
|
statuses = [browser.status, browser.statusx]
|
37
48
|
statuses.push :redirect if url != browser.page.current_url
|
49
|
+
# Dispatch all the handlers for HTTP Status Codes.
|
38
50
|
statuses.each do |status|
|
39
51
|
config.dispatch_status_handlers(status, browser.page)
|
40
52
|
end
|
41
53
|
|
54
|
+
# If the page was not a failure or if not aborting, structure that bad boy.
|
42
55
|
if !browser.failure? || (browser.failure? && !config.abort_on_failure?)
|
43
56
|
resources << __structure(browser.page)
|
44
57
|
else
|
@@ -54,6 +67,7 @@ EOS
|
|
54
67
|
def __structure(context)
|
55
68
|
structure = Structure.new(context)
|
56
69
|
|
70
|
+
# A queue of DSL instructions
|
57
71
|
queue.each do |instruction|
|
58
72
|
if instruction[2]
|
59
73
|
structure.send instruction[0], *instruction[1], &instruction[2]
|
@@ -62,10 +76,9 @@ EOS
|
|
62
76
|
end
|
63
77
|
end
|
64
78
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
79
|
+
# Call after(:each) handlers...
|
80
|
+
config.after_handlers[:each].each { |ah| ah.call(structure._hash) }
|
81
|
+
|
69
82
|
structure._hash
|
70
83
|
end
|
71
84
|
|
data/lib/klepto/config.rb
CHANGED
@@ -13,8 +13,16 @@ module Klepto
|
|
13
13
|
}
|
14
14
|
@before_handlers = {:each => []}
|
15
15
|
@status_handlers = {}
|
16
|
+
@handlers = {}
|
16
17
|
end
|
17
18
|
|
19
|
+
# TODO: DRY up handlers...
|
20
|
+
# def dispatch(group, handler, *args)
|
21
|
+
# @handlers[group] ||= {}
|
22
|
+
# @handlers[group][handler] ||= []
|
23
|
+
# @handlers[group][handler].each{|handler| handler.call(*args)}
|
24
|
+
# end
|
25
|
+
|
18
26
|
def headers(_headers=nil)
|
19
27
|
@headers = _headers if _headers
|
20
28
|
@headers
|
data/lib/klepto/structure.rb
CHANGED
@@ -20,17 +20,20 @@ module Klepto
|
|
20
20
|
#options[:as] :collection, :resource
|
21
21
|
#options[:match] :first, :all
|
22
22
|
#options[:syntax] :xpath, :css
|
23
|
+
#options[:limit] Integer elements to structure when :match => :all or :as => :collection
|
23
24
|
def method_missing(meth, *args, &block)
|
24
25
|
options = args.last.is_a?(Hash) ? args.pop : {}
|
25
26
|
options[:syntax] ||= :css
|
26
27
|
options[:match] ||= :first
|
27
28
|
options[:attr] ||= nil
|
29
|
+
options[:limit] ||= nil
|
28
30
|
selector = args.shift
|
29
31
|
|
30
32
|
if options[:as] == :collection
|
31
33
|
@_hash[meth] = []
|
32
34
|
result = _context.all( options[:syntax], selector )
|
33
|
-
result.
|
35
|
+
options[:limit] ||= result.length
|
36
|
+
result[0, options[:limit]].each do |ele|
|
34
37
|
@_hash[meth].push Structure.build(ele, self, &block)
|
35
38
|
end
|
36
39
|
elsif options[:as] == :resource
|
@@ -45,7 +48,8 @@ module Klepto
|
|
45
48
|
|
46
49
|
if options[:match] == :all
|
47
50
|
@_hash[meth] = []
|
48
|
-
result.
|
51
|
+
options[:limit] ||= result.length
|
52
|
+
result[0, options[:limit]].each do |node|
|
49
53
|
@_hash[meth] << block.call( node )
|
50
54
|
end
|
51
55
|
else
|
@@ -55,7 +59,8 @@ module Klepto
|
|
55
59
|
result = _context.send( options[:match], options[:syntax], selector )
|
56
60
|
if options[:match] == :all
|
57
61
|
@_hash[meth] = []
|
58
|
-
result.
|
62
|
+
options[:limit] ||= result.length
|
63
|
+
result[0, options[:limit]].each do |node|
|
59
64
|
@_hash[meth] << (node[options[:attr]] || node.try(:text))
|
60
65
|
end
|
61
66
|
else
|
data/lib/klepto/version.rb
CHANGED
data/spec/lib/klepto/bot_spec.rb
CHANGED
@@ -172,5 +172,49 @@ describe Klepto::Bot do
|
|
172
172
|
statuses.should include 'got a page'
|
173
173
|
end
|
174
174
|
end
|
175
|
+
|
176
|
+
|
177
|
+
describe 'creating a bot with a node limit' do
|
178
|
+
before(:each) do
|
179
|
+
@bot = Klepto::Bot.new("https://twitter.com/justinbieber"){
|
180
|
+
config.headers({
|
181
|
+
'Referer' => 'http://www.twitter.com',
|
182
|
+
'X-Sup-Dawg' => "Yo, What's up?"
|
183
|
+
})
|
184
|
+
|
185
|
+
# Structure that stuff
|
186
|
+
name 'h1.fullname'
|
187
|
+
username "span.screen-name"
|
188
|
+
|
189
|
+
tweets 'li.stream-item', :as => :collection, :limit => 5 do
|
190
|
+
twitter_id do |node|
|
191
|
+
node['data-item-id']
|
192
|
+
end
|
193
|
+
tweet '.content p', :css
|
194
|
+
timestamp '._timestamp', :attr => 'data-time'
|
195
|
+
permalink '.time a', :css, :attr => :href
|
196
|
+
end
|
197
|
+
|
198
|
+
config.after(:each) do |resource|
|
199
|
+
@user = User.new
|
200
|
+
@user.name = resource[:name]
|
201
|
+
@user.username = resource[:username]
|
202
|
+
@user.save
|
203
|
+
|
204
|
+
resource[:tweets].each do |tweet|
|
205
|
+
Tweet.create(tweet)
|
206
|
+
end
|
207
|
+
end
|
208
|
+
}
|
209
|
+
@structure = @bot.resources
|
210
|
+
end
|
211
|
+
|
212
|
+
it 'should limit the nodes structured' do
|
213
|
+
User.count.should be(1)
|
214
|
+
Tweet.count.should be(5)
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
|
175
219
|
end
|
176
220
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: klepto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2013-04-19 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: poltergeist
|
16
|
-
requirement: &
|
16
|
+
requirement: &70115769000140 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - =
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.1.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70115769000140
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: capybara
|
27
|
-
requirement: &
|
27
|
+
requirement: &70115768999120 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - =
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 2.0.2
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70115768999120
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: nokogiri
|
38
|
-
requirement: &
|
38
|
+
requirement: &70115768997540 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.5.6
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70115768997540
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: activesupport
|
49
|
-
requirement: &
|
49
|
+
requirement: &70115768995980 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70115768995980
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: multi_json
|
60
|
-
requirement: &
|
60
|
+
requirement: &70115768994800 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,7 +65,7 @@ dependencies:
|
|
65
65
|
version: '1.0'
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70115768994800
|
69
69
|
description: Tearing up web pages into ActiveRecord resources
|
70
70
|
email:
|
71
71
|
- github@coryodaniel.com
|