klepto 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +23 -17
- data/lib/klepto/bot.rb +66 -0
- data/lib/klepto/browser.rb +8 -0
- data/lib/klepto/config.rb +29 -0
- data/lib/klepto/structure.rb +3 -24
- data/lib/klepto/version.rb +1 -1
- data/lib/klepto.rb +2 -1
- data/samples/concept.rb +30 -24
- data/spec/lib/klepto/bot_spec.rb +126 -0
- data/spec/lib/klepto/config_spec.rb +23 -8
- data/spec/lib/klepto/structure_spec.rb +0 -61
- data/spec/orm/active_record.rb +10 -0
- data/spec/spec_helper.rb +1 -0
- metadata +15 -12
data/README.md
CHANGED
@@ -17,7 +17,7 @@ Say you want a bunch of Bieb tweets! How is there not profit in that?
|
|
17
17
|
|
18
18
|
```ruby
|
19
19
|
# Crawl a web site or multiple. Structure#crawl takes a *splat!
|
20
|
-
@structures = Klepto::
|
20
|
+
@structures = Klepto::Bot.new("https://twitter.com/justinbieber"){
|
21
21
|
# By default, it uses CSS selectors
|
22
22
|
name 'h1.fullname'
|
23
23
|
|
@@ -53,9 +53,29 @@ Say you want a bunch of Bieb tweets! How is there not profit in that?
|
|
53
53
|
permalink '.time a', :css, :attr => :href
|
54
54
|
end
|
55
55
|
|
56
|
+
# Set some headers, why not.
|
57
|
+
config.headers({
|
58
|
+
'Referer' => 'http://www.twitter.com'
|
59
|
+
})
|
60
|
+
|
61
|
+
# on_http_status can take a splat of statuses or ~statuses(4xx,5xx)
|
62
|
+
# you can also have multiple handlers on a status
|
63
|
+
# Note: Capybara automatically follows redirects, so the statuses 3xx
|
64
|
+
# are never present. If you want to watch for a redirect pass see below
|
65
|
+
config.on_http_status(:redirect){
|
66
|
+
puts "Something redirected..."
|
67
|
+
}
|
68
|
+
config.on_http_status(200){
|
69
|
+
puts "Expected this, NBD."
|
70
|
+
}
|
71
|
+
|
72
|
+
config.on_http_status('5xx','4xx'){
|
73
|
+
puts "HOLY CRAP!"
|
74
|
+
}
|
75
|
+
|
56
76
|
# If you want to do something with each resource, like stick it in AR
|
57
77
|
# go for it here...
|
58
|
-
|
78
|
+
config.after do |resource|
|
59
79
|
@user = User.new
|
60
80
|
@user.name = resource[:name]
|
61
81
|
@user.username = resource[:username]
|
@@ -79,7 +99,7 @@ end
|
|
79
99
|
```ruby
|
80
100
|
@html = Capybara::Node::Simple.new(@html_string)
|
81
101
|
@structure = Klepto::Structure.build(@html){
|
82
|
-
# inside the build method, everything works the same as
|
102
|
+
# inside the build method, everything works the same as Bot.new
|
83
103
|
name 'h1.fullname'
|
84
104
|
username 'span.screen-name'
|
85
105
|
|
@@ -98,20 +118,6 @@ end
|
|
98
118
|
}
|
99
119
|
```
|
100
120
|
|
101
|
-
## Extra Configuration
|
102
|
-
```ruby
|
103
|
-
config = {
|
104
|
-
:headers => {
|
105
|
-
'Referer' => 'http://www.twitter.com',
|
106
|
-
'X-Sup-Dawg' => "Yo, What's up?"
|
107
|
-
}
|
108
|
-
}
|
109
|
-
@structures = Klepto::Structure.crawl("https://twitter.com/justinbieber",config){
|
110
|
-
#... yada, yada
|
111
|
-
}
|
112
|
-
```
|
113
|
-
|
114
|
-
|
115
121
|
|
116
122
|
## Stuff I'm going to add.
|
117
123
|
Cookie Stuffing
|
data/lib/klepto/bot.rb
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
module Klepto
|
2
|
+
class Bot
|
3
|
+
attr_reader :config
|
4
|
+
|
5
|
+
def initialize(*urls, &block)
|
6
|
+
@config = Klepto::Config.new
|
7
|
+
@config.urls urls
|
8
|
+
@queue = []
|
9
|
+
|
10
|
+
instance_eval &block
|
11
|
+
|
12
|
+
instance_eval <<-EOS
|
13
|
+
def queue; @queue; end;
|
14
|
+
def resources; @resources; end;
|
15
|
+
EOS
|
16
|
+
|
17
|
+
__process!
|
18
|
+
end
|
19
|
+
|
20
|
+
def __dispatch_handlers_for(status_code)
|
21
|
+
if status_code.is_a?(Fixnum)
|
22
|
+
elsif status_code.is_a?(Symbol)
|
23
|
+
elsif status_code.is_a?(String)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def __process!
|
28
|
+
@resources = []
|
29
|
+
|
30
|
+
config.urls.each do |url|
|
31
|
+
browser = Klepto::Browser.new
|
32
|
+
|
33
|
+
browser.set_headers config.headers
|
34
|
+
browser.fetch! url
|
35
|
+
|
36
|
+
statuses = [browser.status, browser.statusx]
|
37
|
+
statuses.push :redirect if url != browser.page.current_url
|
38
|
+
statuses.each do |status|
|
39
|
+
config.dispatch_status_handlers(status, browser.page)
|
40
|
+
end
|
41
|
+
|
42
|
+
structure = Structure.new(browser.page)
|
43
|
+
|
44
|
+
queue.each do |instruction|
|
45
|
+
if instruction[2]
|
46
|
+
structure.send instruction[0], *instruction[1], &instruction[2]
|
47
|
+
else
|
48
|
+
structure.send instruction[0], *instruction[1]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
config.after_handlers[:each].each do |ah|
|
53
|
+
ah.call(structure._hash)
|
54
|
+
end
|
55
|
+
|
56
|
+
resources << structure._hash
|
57
|
+
end
|
58
|
+
|
59
|
+
@resources
|
60
|
+
end
|
61
|
+
|
62
|
+
def method_missing(meth, *args, &block)
|
63
|
+
@queue.push([meth, args, block])
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
data/lib/klepto/browser.rb
CHANGED
data/lib/klepto/config.rb
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
module Klepto
|
2
2
|
class Config
|
3
|
+
attr_reader :after_handlers
|
4
|
+
|
3
5
|
def initialize
|
4
6
|
@headers = {}
|
5
7
|
@urls = []
|
8
|
+
@after_handlers = {:each => []}
|
9
|
+
@before_handlers = {:each => []}
|
10
|
+
@status_handlers = {}
|
6
11
|
end
|
7
12
|
|
8
13
|
def headers(_headers=nil)
|
@@ -10,8 +15,32 @@ module Klepto
|
|
10
15
|
@headers
|
11
16
|
end
|
12
17
|
|
18
|
+
def on_http_status(*statuses,&block)
|
19
|
+
statuses.each do |status|
|
20
|
+
@status_handlers[status] ||= []
|
21
|
+
@status_handlers[status].push block
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def dispatch_status_handlers(status, page)
|
26
|
+
handlers = @status_handlers[status]
|
27
|
+
if handlers.present?
|
28
|
+
@status_handlers[status].each do |handler|
|
29
|
+
handler.call(page)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def after(which = :each, &block)
|
35
|
+
@after_handlers[which] ||= []
|
36
|
+
@after_handlers[which].push block
|
37
|
+
end
|
38
|
+
|
13
39
|
def url(*args)
|
14
40
|
@urls += args
|
41
|
+
@urls.flatten!
|
42
|
+
@urls.uniq!
|
43
|
+
@urls
|
15
44
|
end
|
16
45
|
alias :urls :url
|
17
46
|
end
|
data/lib/klepto/structure.rb
CHANGED
@@ -1,31 +1,14 @@
|
|
1
1
|
module Klepto
|
2
2
|
class Structure
|
3
3
|
def self.build(_context=nil, _parent=nil, &block)
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
hb._after_handler.call(hb._hash)
|
8
|
-
end
|
9
|
-
|
10
|
-
hb._hash
|
11
|
-
end
|
12
|
-
|
13
|
-
def self.crawl(*urls, &block)
|
14
|
-
config = urls.last.is_a?(Hash) ? urls.pop : {}
|
15
|
-
resources = []
|
16
|
-
urls.each do |url|
|
17
|
-
browser = Klepto::Browser.new
|
18
|
-
browser.set_headers config[:headers]
|
19
|
-
browser.fetch! url
|
20
|
-
resources << Structure.build(browser.page, &block)
|
21
|
-
end
|
22
|
-
resources
|
4
|
+
structure = Structure.new(_context, _parent)
|
5
|
+
structure.instance_eval &block
|
6
|
+
structure._hash
|
23
7
|
end
|
24
8
|
|
25
9
|
attr_reader :_parent
|
26
10
|
attr_reader :_hash
|
27
11
|
attr_reader :_context
|
28
|
-
attr_reader :_after_handler
|
29
12
|
|
30
13
|
def initialize(_context=nil, _parent=nil)
|
31
14
|
@_context = _context
|
@@ -34,10 +17,6 @@ module Klepto
|
|
34
17
|
@_after_handler = nil
|
35
18
|
end
|
36
19
|
|
37
|
-
def after_crawl(&block)
|
38
|
-
@_after_handler = block
|
39
|
-
end
|
40
|
-
|
41
20
|
#options[:as] :collection, :resource
|
42
21
|
#options[:match] :first, :all
|
43
22
|
#options[:syntax] :xpath, :css
|
data/lib/klepto/version.rb
CHANGED
data/lib/klepto.rb
CHANGED
data/samples/concept.rb
CHANGED
@@ -2,30 +2,41 @@
|
|
2
2
|
require 'bundler/setup'
|
3
3
|
require 'klepto'
|
4
4
|
|
5
|
-
|
5
|
+
Klepto::Bot.new do
|
6
6
|
config.headers 'Referer' => 'http://www.twitter.com'
|
7
|
+
config.on_http_status('5xx','4xx'){
|
8
|
+
puts "HOLY CRAP!"
|
9
|
+
}
|
10
|
+
|
11
|
+
# If you want to do something with each resource, like stick it in AR
|
12
|
+
# go for it here...
|
13
|
+
config.after do |resource|
|
14
|
+
@user = User.new
|
15
|
+
@user.name = resource[:name]
|
16
|
+
@user.username = resource[:username]
|
17
|
+
@user.save
|
18
|
+
|
19
|
+
resource[:tweets].each do |tweet|
|
20
|
+
Tweet.create(tweet)
|
21
|
+
end
|
22
|
+
end
|
7
23
|
|
8
|
-
config.steps [
|
9
|
-
[:GET, 'https://twitter.com/login'],
|
10
|
-
[:POST,'https://twitter.com/sessions',
|
11
|
-
{
|
12
|
-
session: {
|
13
|
-
username_or_email: 'example',
|
14
|
-
password:'123456'
|
15
|
-
}
|
16
|
-
}
|
17
|
-
]
|
18
|
-
]
|
19
24
|
config.urls 'https://twitter.com/justinbieber',
|
20
25
|
'https://twitter.com/ladygaga'
|
21
|
-
# config.cookies 'jsession' => 'abcdefg1234567890'
|
22
|
-
# config.on_http_status(500,404){}
|
23
|
-
# assertions do
|
24
|
-
# end
|
25
|
-
# config.on_failed_assertion(){}
|
26
26
|
|
27
|
+
# config.steps [
|
28
|
+
# [:GET, 'https://twitter.com/login'],
|
29
|
+
# [:POST,'https://twitter.com/sessions',
|
30
|
+
# {
|
31
|
+
# session: {
|
32
|
+
# username_or_email: 'example',
|
33
|
+
# password:'123456'
|
34
|
+
# }
|
35
|
+
# }
|
36
|
+
# ]
|
37
|
+
# ]
|
27
38
|
|
28
|
-
#
|
39
|
+
# Structure the content
|
29
40
|
name 'h1.fullname'
|
30
41
|
username '.username span.screen-name'
|
31
42
|
links 'span.url a', :list, :attr => 'href'
|
@@ -42,9 +53,4 @@ require 'klepto'
|
|
42
53
|
|
43
54
|
permalink '.time a', :css, :attr => :href
|
44
55
|
end
|
45
|
-
end
|
46
|
-
|
47
|
-
# @resources = @structure.parse! #=> Array[Hash]
|
48
|
-
# @resources.each do |resource|
|
49
|
-
# User.create(resource)
|
50
|
-
# end
|
56
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Klepto::Bot do
|
4
|
+
describe 'Klepto::Bot.new' do
|
5
|
+
describe 'create a bot with a redirect' do
|
6
|
+
before(:each) do
|
7
|
+
@bot = Klepto::Bot.new("https://www.twitter.com/justinbieber"){
|
8
|
+
name 'h1.fullname'
|
9
|
+
config.on_http_status(:redirect){
|
10
|
+
StatusLog.create message: 'redirect'
|
11
|
+
}
|
12
|
+
config.on_http_status(200){
|
13
|
+
StatusLog.create message: '200'
|
14
|
+
}
|
15
|
+
}
|
16
|
+
@structure = @bot.resources
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'should structure the data' do
|
20
|
+
@structure.first[:name].should match(/Justin/i)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'should have dispatched status handlers' do
|
24
|
+
statuses = StatusLog.all.map(&:message)
|
25
|
+
statuses.should include 'redirect'
|
26
|
+
statuses.should include '200'
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe 'crawling multiple pages' do
|
31
|
+
before(:each) do
|
32
|
+
@bot = Klepto::Bot.new("https://twitter.com/justinbieber"){
|
33
|
+
config.urls "https://twitter.com/ladygaga"
|
34
|
+
name 'h1.fullname'
|
35
|
+
}
|
36
|
+
@structure = @bot.resources
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should have both pages data' do
|
40
|
+
@structure.first[:name].should match(/Justin/i)
|
41
|
+
@structure.last[:name].should match(/Lady/i)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe 'creating a bot' do
|
46
|
+
before(:each) do
|
47
|
+
@bot = Klepto::Bot.new("https://twitter.com/justinbieber"){
|
48
|
+
config.headers({
|
49
|
+
'Referer' => 'http://www.twitter.com',
|
50
|
+
'X-Sup-Dawg' => "Yo, What's up?"
|
51
|
+
})
|
52
|
+
|
53
|
+
# Structure that stuff
|
54
|
+
name 'h1.fullname'
|
55
|
+
username "//span[contains(concat(' ',normalize-space(@class),' '),' screen-name ')]", :syntax => :xpath
|
56
|
+
tweet_ids 'li.stream-item', :match => :all, :attr => 'data-item-id'
|
57
|
+
links 'span.url a', :match => :all do |node|
|
58
|
+
node[:href]
|
59
|
+
end
|
60
|
+
|
61
|
+
last_tweet 'li.stream-item', :as => :resource do
|
62
|
+
twitter_id do |node|
|
63
|
+
node['data-item-id']
|
64
|
+
end
|
65
|
+
content '.content p'
|
66
|
+
timestamp '._timestamp', :attr => 'data-time'
|
67
|
+
permalink '.time a', :attr => :href
|
68
|
+
end
|
69
|
+
|
70
|
+
tweets 'li.stream-item', :as => :collection do
|
71
|
+
twitter_id do |node|
|
72
|
+
node['data-item-id']
|
73
|
+
end
|
74
|
+
tweet '.content p', :css
|
75
|
+
timestamp '._timestamp', :attr => 'data-time'
|
76
|
+
permalink '.time a', :css, :attr => :href
|
77
|
+
end
|
78
|
+
|
79
|
+
config.on_http_status('2xx'){
|
80
|
+
StatusLog.create message: '2xx'
|
81
|
+
}
|
82
|
+
|
83
|
+
config.on_http_status(:redirect){
|
84
|
+
StatusLog.create message: 'redirect'
|
85
|
+
}
|
86
|
+
|
87
|
+
config.on_http_status(200){
|
88
|
+
StatusLog.create message: '200'
|
89
|
+
}
|
90
|
+
|
91
|
+
config.after(:each) do |resource|
|
92
|
+
@user = User.new
|
93
|
+
@user.name = resource[:name]
|
94
|
+
@user.username = resource[:username]
|
95
|
+
@user.save
|
96
|
+
|
97
|
+
resource[:tweets].each do |tweet|
|
98
|
+
Tweet.create(tweet)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
}
|
102
|
+
@structure = @bot.resources
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'should structure the data' do
|
106
|
+
@structure.first[:name].should match(/Justin/i)
|
107
|
+
@structure.first[:links].should == ["http://www.youtube.com/justinbieber"]
|
108
|
+
@structure.first[:username].should eq '@justinbieber'
|
109
|
+
@structure.first[:last_tweet][:twitter_id].should == @structure.first[:tweets].first[:twitter_id]
|
110
|
+
end
|
111
|
+
|
112
|
+
it 'should store the data' do
|
113
|
+
User.count.should be(1)
|
114
|
+
Tweet.count.should_not be(0)
|
115
|
+
end
|
116
|
+
|
117
|
+
it 'should have dispatched status handlers' do
|
118
|
+
statuses = StatusLog.all.map(&:message)
|
119
|
+
|
120
|
+
statuses.should_not include 'redirect'
|
121
|
+
statuses.should include '200'
|
122
|
+
statuses.should include '2xx'
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -5,26 +5,41 @@ describe Klepto::Config do
|
|
5
5
|
@config = Klepto::Config.new
|
6
6
|
@config.headers({'Referer' => 'http://example.com'})
|
7
7
|
@config.urls 'http://example.com', 'http://www.iana.org'
|
8
|
+
@config.on_http_status(200){
|
9
|
+
"Its 200"
|
10
|
+
}
|
11
|
+
@config.on_http_status('2xx'){
|
12
|
+
"Its 2xx"
|
13
|
+
}
|
14
|
+
@config.on_http_status('5xx','4xx'){
|
15
|
+
"Its crazy."
|
16
|
+
}
|
8
17
|
end
|
9
18
|
|
10
19
|
it 'should be able to set headers' do
|
11
20
|
@config.headers['Referer'].should eq('http://example.com')
|
12
21
|
end
|
13
22
|
|
23
|
+
it 'should have a 2xx status handler' do
|
24
|
+
@config.instance_variable_get("@status_handlers")['2xx'].first.call.should eq ('Its 2xx')
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should have a 200 status handler' do
|
28
|
+
@config.instance_variable_get("@status_handlers")[200].first.call.should eq ('Its 200')
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'should have a 4xx and 5xx status handler' do
|
32
|
+
@config.instance_variable_get("@status_handlers")['5xx'].first.call.should eq ('Its crazy.')
|
33
|
+
@config.instance_variable_get("@status_handlers")['4xx'].first.call.should eq ('Its crazy.')
|
34
|
+
end
|
35
|
+
|
14
36
|
it 'should be able to set URLs' do
|
15
37
|
@config.urls.should == ['http://example.com', 'http://www.iana.org']
|
16
38
|
end
|
17
39
|
|
40
|
+
pending 'should be able to set before handlers'
|
18
41
|
pending 'should be able to set cookies'
|
19
42
|
pending 'should be able to set steps'
|
20
43
|
pending 'should be able to set assertions'
|
21
|
-
pending 'should be able to set on_http_status handler'
|
22
44
|
pending 'should be able to set on_failed_assertion handler'
|
23
|
-
pending 'should be a sexier config' do
|
24
|
-
# Klepto::Structure.crawl("https://twitter.com/justinbieber"){
|
25
|
-
# config.headers({
|
26
|
-
# "Referer" => "http://example.com"
|
27
|
-
# })
|
28
|
-
# }
|
29
|
-
end
|
30
45
|
end
|
@@ -41,65 +41,4 @@ describe Klepto::Structure do
|
|
41
41
|
@structure[:last_tweet][:twitter_id].should == @structure[:tweets].first[:twitter_id]
|
42
42
|
end
|
43
43
|
end
|
44
|
-
|
45
|
-
describe 'Klepto::Structure.crawl' do
|
46
|
-
before(:each) do
|
47
|
-
config = {
|
48
|
-
:headers => {
|
49
|
-
'Referer' => 'http://www.twitter.com',
|
50
|
-
'X-Sup-Dawg' => "Yo, What's up?"
|
51
|
-
}
|
52
|
-
}
|
53
|
-
@structure = Klepto::Structure.crawl("https://twitter.com/justinbieber", config){
|
54
|
-
# Structure that stuff
|
55
|
-
name 'h1.fullname'
|
56
|
-
username "//span[contains(concat(' ',normalize-space(@class),' '),' screen-name ')]", :syntax => :xpath
|
57
|
-
tweet_ids 'li.stream-item', :match => :all, :attr => 'data-item-id'
|
58
|
-
links 'span.url a', :match => :all do |node|
|
59
|
-
node[:href]
|
60
|
-
end
|
61
|
-
|
62
|
-
last_tweet 'li.stream-item', :as => :resource do
|
63
|
-
twitter_id do |node|
|
64
|
-
node['data-item-id']
|
65
|
-
end
|
66
|
-
content '.content p'
|
67
|
-
timestamp '._timestamp', :attr => 'data-time'
|
68
|
-
permalink '.time a', :attr => :href
|
69
|
-
end
|
70
|
-
|
71
|
-
tweets 'li.stream-item', :as => :collection do
|
72
|
-
twitter_id do |node|
|
73
|
-
node['data-item-id']
|
74
|
-
end
|
75
|
-
tweet '.content p', :css
|
76
|
-
timestamp '._timestamp', :attr => 'data-time'
|
77
|
-
permalink '.time a', :css, :attr => :href
|
78
|
-
end
|
79
|
-
|
80
|
-
after_crawl do |resource|
|
81
|
-
@user = User.new
|
82
|
-
@user.name = resource[:name]
|
83
|
-
@user.username = resource[:username]
|
84
|
-
@user.save
|
85
|
-
|
86
|
-
resource[:tweets].each do |tweet|
|
87
|
-
Tweet.create(tweet)
|
88
|
-
end
|
89
|
-
end
|
90
|
-
}
|
91
|
-
end
|
92
|
-
|
93
|
-
it 'should structure the data' do
|
94
|
-
@structure.first[:name].should match(/Justin/i)
|
95
|
-
@structure.first[:links].should == ["http://www.youtube.com/justinbieber"]
|
96
|
-
@structure.first[:username].should eq '@justinbieber'
|
97
|
-
@structure.first[:last_tweet][:twitter_id].should == @structure.first[:tweets].first[:twitter_id]
|
98
|
-
end
|
99
|
-
|
100
|
-
it 'should store the data' do
|
101
|
-
User.count.should be(1)
|
102
|
-
Tweet.count.should_not be(0)
|
103
|
-
end
|
104
|
-
end
|
105
44
|
end
|
data/spec/orm/active_record.rb
CHANGED
@@ -19,14 +19,24 @@ class TestMigration < ActiveRecord::Migration
|
|
19
19
|
t.string :name
|
20
20
|
t.string :username
|
21
21
|
end
|
22
|
+
|
23
|
+
create_table :status_logs, :force => true do |t|
|
24
|
+
t.string :message
|
25
|
+
end
|
22
26
|
end
|
23
27
|
|
24
28
|
def self.down
|
25
29
|
drop_table :tweets
|
30
|
+
drop_table :status_logs
|
26
31
|
drop_table :users
|
27
32
|
end
|
28
33
|
end
|
29
34
|
|
35
|
+
|
36
|
+
class StatusLog < ActiveRecord::Base
|
37
|
+
|
38
|
+
end
|
39
|
+
|
30
40
|
class Tweet < ActiveRecord::Base
|
31
41
|
validates_presence_of :timestamp, :twitter_id, :permalink, :tweet
|
32
42
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: klepto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-04-
|
12
|
+
date: 2013-04-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: poltergeist
|
16
|
-
requirement: &
|
16
|
+
requirement: &70235243869100 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - =
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.1.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70235243869100
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: capybara
|
27
|
-
requirement: &
|
27
|
+
requirement: &70235243868100 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - =
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 2.0.2
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70235243868100
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: nokogiri
|
38
|
-
requirement: &
|
38
|
+
requirement: &70235243867320 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.5.6
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70235243867320
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: activesupport
|
49
|
-
requirement: &
|
49
|
+
requirement: &70235243866680 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70235243866680
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: multi_json
|
60
|
-
requirement: &
|
60
|
+
requirement: &70235243865240 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,7 +65,7 @@ dependencies:
|
|
65
65
|
version: '1.0'
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70235243865240
|
69
69
|
description: Tearing up web pages into ActiveRecord resources
|
70
70
|
email:
|
71
71
|
- github@coryodaniel.com
|
@@ -83,6 +83,7 @@ files:
|
|
83
83
|
- Rakefile
|
84
84
|
- klepto.gemspec
|
85
85
|
- lib/klepto.rb
|
86
|
+
- lib/klepto/bot.rb
|
86
87
|
- lib/klepto/browser.rb
|
87
88
|
- lib/klepto/config.rb
|
88
89
|
- lib/klepto/structure.rb
|
@@ -97,6 +98,7 @@ files:
|
|
97
98
|
- spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_scrape_the_node_that_the_crawler_is_scoped_to.yml
|
98
99
|
- spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_CSS_scope.yml
|
99
100
|
- spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_desired_syntax.yml
|
101
|
+
- spec/lib/klepto/bot_spec.rb
|
100
102
|
- spec/lib/klepto/browser_spec.rb
|
101
103
|
- spec/lib/klepto/config_spec.rb
|
102
104
|
- spec/lib/klepto/structure_spec.rb
|
@@ -136,6 +138,7 @@ test_files:
|
|
136
138
|
- spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_scrape_the_node_that_the_crawler_is_scoped_to.yml
|
137
139
|
- spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_CSS_scope.yml
|
138
140
|
- spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_desired_syntax.yml
|
141
|
+
- spec/lib/klepto/bot_spec.rb
|
139
142
|
- spec/lib/klepto/browser_spec.rb
|
140
143
|
- spec/lib/klepto/config_spec.rb
|
141
144
|
- spec/lib/klepto/structure_spec.rb
|