graboid 0.3.5 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +0 -1
- data/VERSION +1 -1
- data/graboid.gemspec +31 -38
- data/lib/graboid.rb +12 -2
- data/lib/graboid/entity.rb +63 -44
- data/lib/graboid/scraper.rb +28 -12
- metadata +10 -29
- data/.gitignore +0 -21
- data/examples/reddit_post.rb +0 -35
- data/examples/tumblr_post.rb +0 -33
data/Rakefile
CHANGED
@@ -12,7 +12,6 @@ begin
|
|
12
12
|
gem.authors = ["Christopher Burnett"]
|
13
13
|
gem.add_development_dependency "rspec", ">= 1.2.9"
|
14
14
|
gem.add_dependency "nokogiri"
|
15
|
-
gem.add_dependency "activesupport"
|
16
15
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
17
16
|
end
|
18
17
|
Jeweler::GemcutterTasks.new
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.0
|
data/graboid.gemspec
CHANGED
@@ -1,61 +1,57 @@
|
|
1
1
|
# Generated by jeweler
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{graboid}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.4.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Christopher Burnett"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2011-02-22}
|
13
13
|
s.description = %q{web scraping made easier}
|
14
14
|
s.email = %q{signalstatic@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"LICENSE",
|
17
|
-
|
17
|
+
"README.mdown"
|
18
18
|
]
|
19
19
|
s.files = [
|
20
20
|
".document",
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
"spec/spec_helper.rb"
|
21
|
+
"LICENSE",
|
22
|
+
"README.mdown",
|
23
|
+
"Rakefile",
|
24
|
+
"VERSION",
|
25
|
+
"examples/active_rain_post.rb",
|
26
|
+
"examples/live_journal_post.rb",
|
27
|
+
"examples/ning_post.rb",
|
28
|
+
"graboid.gemspec",
|
29
|
+
"lib/graboid.rb",
|
30
|
+
"lib/graboid/entity.rb",
|
31
|
+
"lib/graboid/scraper.rb",
|
32
|
+
"spec/fixtures/graboid.jpg",
|
33
|
+
"spec/fixtures/posts.html",
|
34
|
+
"spec/fixtures/server.rb",
|
35
|
+
"spec/fixtures/views/posts.erb",
|
36
|
+
"spec/graboid/entity_spec.rb",
|
37
|
+
"spec/graboid/scraper_spec.rb",
|
38
|
+
"spec/graboid_spec.rb",
|
39
|
+
"spec/spec.opts",
|
40
|
+
"spec/spec_helper.rb"
|
42
41
|
]
|
43
42
|
s.homepage = %q{http://github.com/twoism/graboid}
|
44
|
-
s.rdoc_options = ["--charset=UTF-8"]
|
45
43
|
s.require_paths = ["lib"]
|
46
44
|
s.rubygems_version = %q{1.3.7}
|
47
45
|
s.summary = %q{web scraping made easy}
|
48
46
|
s.test_files = [
|
47
|
+
"examples/active_rain_post.rb",
|
48
|
+
"examples/live_journal_post.rb",
|
49
|
+
"examples/ning_post.rb",
|
49
50
|
"spec/fixtures/server.rb",
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
"examples/active_rain_post.rb",
|
55
|
-
"examples/live_journal_post.rb",
|
56
|
-
"examples/ning_post.rb",
|
57
|
-
"examples/reddit_post.rb",
|
58
|
-
"examples/tumblr_post.rb"
|
51
|
+
"spec/graboid/entity_spec.rb",
|
52
|
+
"spec/graboid/scraper_spec.rb",
|
53
|
+
"spec/graboid_spec.rb",
|
54
|
+
"spec/spec_helper.rb"
|
59
55
|
]
|
60
56
|
|
61
57
|
if s.respond_to? :specification_version then
|
@@ -65,16 +61,13 @@ Gem::Specification.new do |s|
|
|
65
61
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
66
62
|
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
67
63
|
s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
|
68
|
-
s.add_runtime_dependency(%q<activesupport>, [">= 0"])
|
69
64
|
else
|
70
65
|
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
71
66
|
s.add_dependency(%q<nokogiri>, [">= 0"])
|
72
|
-
s.add_dependency(%q<activesupport>, [">= 0"])
|
73
67
|
end
|
74
68
|
else
|
75
69
|
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
76
70
|
s.add_dependency(%q<nokogiri>, [">= 0"])
|
77
|
-
s.add_dependency(%q<activesupport>, [">= 0"])
|
78
71
|
end
|
79
72
|
end
|
80
73
|
|
data/lib/graboid.rb
CHANGED
@@ -1,10 +1,20 @@
|
|
1
|
-
%w{rubygems nokogiri open-uri
|
1
|
+
%w{rubygems nokogiri open-uri ostruct}.each { |f| require f }
|
2
2
|
|
3
3
|
dir = Pathname(__FILE__).dirname.expand_path
|
4
4
|
|
5
5
|
require dir + 'graboid/entity'
|
6
6
|
require dir + 'graboid/scraper'
|
7
7
|
|
8
|
+
class String
|
9
|
+
def underscore
|
10
|
+
self.to_s.gsub(/::/, '/').
|
11
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
12
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
13
|
+
tr("-", "_").
|
14
|
+
downcase
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
8
18
|
module Graboid
|
9
19
|
extend self
|
10
20
|
|
@@ -15,4 +25,4 @@ module Graboid
|
|
15
25
|
def user_agent=(agent)
|
16
26
|
@user_agent = agent
|
17
27
|
end
|
18
|
-
end
|
28
|
+
end
|
data/lib/graboid/entity.rb
CHANGED
@@ -1,68 +1,87 @@
|
|
1
1
|
module Graboid
|
2
2
|
module Entity
|
3
|
-
|
3
|
+
|
4
4
|
def self.included klass
|
5
5
|
klass.class_eval do
|
6
6
|
extend ClassMethods
|
7
7
|
include InstanceMethods
|
8
|
-
|
9
|
-
|
8
|
+
|
9
|
+
inherited_attributes :attribute_map
|
10
|
+
@attribute_map = {}
|
10
11
|
end
|
11
12
|
end
|
12
|
-
|
13
|
+
|
13
14
|
module ClassMethods
|
14
|
-
|
15
|
+
|
16
|
+
def inherited_attributes(*args)
|
17
|
+
@inherited_attributes ||= [:inherited_attributes]
|
18
|
+
@inherited_attributes += args
|
19
|
+
args.each do |arg|
|
20
|
+
class_eval %(
|
21
|
+
class << self; attr_accessor :#{arg} end
|
22
|
+
)
|
23
|
+
end
|
24
|
+
@inherited_attributes
|
25
|
+
end
|
26
|
+
|
27
|
+
def inherited(subclass)
|
28
|
+
@inherited_attributes.each do |inheritable_attribute|
|
29
|
+
instance_var = "@#{inheritable_attribute}"
|
30
|
+
subclass.instance_variable_set(instance_var, instance_variable_get(instance_var))
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
15
34
|
def source
|
16
35
|
@source
|
17
36
|
end
|
18
|
-
|
37
|
+
|
19
38
|
def source=(src)
|
20
39
|
@source = src
|
21
40
|
end
|
22
|
-
|
41
|
+
|
23
42
|
def set name, opts={}, &block
|
24
|
-
opts.merge!(:selector => ".#{name}")
|
43
|
+
opts.merge!(:selector => ".#{name}") if opts[:selector].nil?
|
25
44
|
opts.merge!(:processor => block) if block_given?
|
26
|
-
|
45
|
+
|
27
46
|
attribute_map[name] = opts
|
28
47
|
end
|
29
|
-
|
48
|
+
|
30
49
|
alias_method :field, :set
|
31
|
-
|
50
|
+
|
32
51
|
def selector selector
|
33
52
|
@root_selector = selector
|
34
53
|
end
|
35
|
-
|
54
|
+
|
36
55
|
alias_method :root, :selector
|
37
56
|
|
38
57
|
def root_selector
|
39
58
|
@root_selector || inferred_selector
|
40
59
|
end
|
41
|
-
|
60
|
+
|
42
61
|
def inferred_selector
|
43
62
|
@inferred_selector ||= ".#{self.to_s.underscore}"
|
44
63
|
end
|
45
|
-
|
64
|
+
|
46
65
|
def doc
|
47
66
|
eval "Nokogiri::#{self.mode.to_s.upcase}(read_source)"
|
48
67
|
end
|
49
|
-
|
68
|
+
|
50
69
|
def collection
|
51
70
|
@collection ||= []
|
52
71
|
end
|
53
|
-
|
72
|
+
|
54
73
|
def collection=(col)
|
55
74
|
@collection = col
|
56
75
|
end
|
57
|
-
|
76
|
+
|
58
77
|
def attribute_map
|
59
78
|
read_inheritable_attribute :attribute_map
|
60
79
|
end
|
61
|
-
|
80
|
+
|
62
81
|
def extract_instance fragment
|
63
82
|
new(hash_map(fragment))
|
64
83
|
end
|
65
|
-
|
84
|
+
|
66
85
|
def hash_map fragment
|
67
86
|
attribute_map.inject({}) do |extracted_hash, at|
|
68
87
|
selector, processor = at.last[:selector], at.last[:processor]
|
@@ -72,7 +91,7 @@ module Graboid
|
|
72
91
|
extracted_hash
|
73
92
|
end
|
74
93
|
end
|
75
|
-
|
94
|
+
|
76
95
|
def all_fragments
|
77
96
|
return page_fragments if @pager.nil?
|
78
97
|
old_source = self.source
|
@@ -85,13 +104,13 @@ module Graboid
|
|
85
104
|
self.source = old_source
|
86
105
|
self.collection
|
87
106
|
end
|
88
|
-
|
107
|
+
|
89
108
|
def paginate
|
90
109
|
next_page_url = @pager.call(doc) rescue nil
|
91
110
|
self.source = next_page_url
|
92
111
|
self.current_page += 1
|
93
112
|
end
|
94
|
-
|
113
|
+
|
95
114
|
def next_page?
|
96
115
|
if max_pages.zero?
|
97
116
|
return true unless @pager.call(doc).nil?
|
@@ -99,23 +118,23 @@ module Graboid
|
|
99
118
|
current_page <= max_pages-1
|
100
119
|
end
|
101
120
|
end
|
102
|
-
|
121
|
+
|
103
122
|
def page_fragments
|
104
123
|
doc.css(root_selector)
|
105
124
|
end
|
106
|
-
|
125
|
+
|
107
126
|
def all opts={}
|
108
127
|
reset_context
|
109
|
-
self.max_pages = opts[:max_pages]
|
128
|
+
self.max_pages = opts[:max_pages] unless opts[:max_pages].nil?
|
110
129
|
all_fragments.collect{ |frag| extract_instance(frag) }
|
111
130
|
end
|
112
|
-
|
131
|
+
|
113
132
|
def reset_context
|
114
133
|
self.collection = []
|
115
134
|
self.current_page = 0
|
116
135
|
self.max_pages = 0
|
117
136
|
end
|
118
|
-
|
137
|
+
|
119
138
|
def read_source
|
120
139
|
case self.source
|
121
140
|
when /^http[s]?:\/\//
|
@@ -124,36 +143,36 @@ module Graboid
|
|
124
143
|
self.source
|
125
144
|
end
|
126
145
|
end
|
127
|
-
|
146
|
+
|
128
147
|
def pager &block
|
129
148
|
@pager = block
|
130
149
|
end
|
131
|
-
|
150
|
+
|
132
151
|
def mode
|
133
152
|
@mode ||= :html
|
134
153
|
end
|
135
|
-
|
154
|
+
|
136
155
|
def mode=(m)
|
137
156
|
raise ArgumentError unless [:html, :xml].include?(m)
|
138
157
|
@mode = m
|
139
158
|
end
|
140
|
-
|
159
|
+
|
141
160
|
def max_pages
|
142
161
|
@max_pages ||= 0
|
143
162
|
end
|
144
|
-
|
163
|
+
|
145
164
|
def max_pages=num
|
146
165
|
@max_pages = num
|
147
166
|
end
|
148
|
-
|
167
|
+
|
149
168
|
def current_page
|
150
169
|
@current_page ||= 0
|
151
170
|
end
|
152
|
-
|
171
|
+
|
153
172
|
def current_page=num
|
154
173
|
@current_page = num
|
155
174
|
end
|
156
|
-
|
175
|
+
|
157
176
|
instance_eval do
|
158
177
|
[:before, :after].each do |prefix|
|
159
178
|
[:paginate, :extract].each do |suffix|
|
@@ -163,29 +182,29 @@ module Graboid
|
|
163
182
|
end
|
164
183
|
define_method "run_#{method_name}_callbacks" do
|
165
184
|
ivar = instance_variable_get("@#{method_name}")
|
166
|
-
|
185
|
+
class_eval { ivar.call } unless ivar.nil?
|
167
186
|
end
|
168
187
|
end
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
172
191
|
end # ClassMethods
|
173
|
-
|
192
|
+
|
174
193
|
module InstanceMethods
|
175
|
-
|
194
|
+
|
176
195
|
def initialize opts={}
|
177
196
|
opts.each do |k,v|
|
178
|
-
self.class_eval do
|
197
|
+
self.class.class_eval do
|
179
198
|
define_method k do
|
180
199
|
v
|
181
200
|
end
|
182
201
|
end
|
183
202
|
end
|
184
203
|
end
|
185
|
-
|
204
|
+
|
186
205
|
def attribute_map
|
187
206
|
self.class.attribute_map
|
188
207
|
end
|
189
208
|
end # InstanceMethods
|
190
209
|
end
|
191
|
-
end
|
210
|
+
end
|
data/lib/graboid/scraper.rb
CHANGED
@@ -1,19 +1,35 @@
|
|
1
1
|
module Graboid
|
2
|
+
|
2
3
|
module Scraper
|
3
4
|
def self.included klass
|
4
5
|
klass.class_eval do
|
5
6
|
extend ClassMethods
|
6
7
|
include InstanceMethods
|
7
|
-
|
8
|
-
|
9
|
-
|
8
|
+
|
9
|
+
inherited_attributes :attribute_map, :callbacks
|
10
|
+
@attribute_map = {}
|
11
|
+
@callbacks = {}
|
10
12
|
end
|
11
13
|
end
|
12
|
-
|
14
|
+
|
13
15
|
module ClassMethods
|
14
|
-
|
15
|
-
def
|
16
|
-
|
16
|
+
|
17
|
+
def inherited_attributes(*args)
|
18
|
+
@inherited_attributes ||= [:inherited_attributes]
|
19
|
+
@inherited_attributes += args
|
20
|
+
args.each do |arg|
|
21
|
+
class_eval %(
|
22
|
+
class << self; attr_accessor :#{arg} end
|
23
|
+
)
|
24
|
+
end
|
25
|
+
@inherited_attributes
|
26
|
+
end
|
27
|
+
|
28
|
+
def inherited(subclass)
|
29
|
+
@inherited_attributes.each do |inheritable_attribute|
|
30
|
+
instance_var = "@#{inheritable_attribute}"
|
31
|
+
subclass.instance_variable_set(instance_var, instance_variable_get(instance_var))
|
32
|
+
end
|
17
33
|
end
|
18
34
|
|
19
35
|
def callbacks
|
@@ -41,7 +57,7 @@ module Graboid
|
|
41
57
|
alias_method :root, :selector
|
42
58
|
|
43
59
|
def set name, opts={}, &block
|
44
|
-
opts.merge!(:selector => ".#{name}")
|
60
|
+
opts.merge!(:selector => ".#{name}") if opts[:selector].nil?
|
45
61
|
opts.merge!(:processor => block) if block_given?
|
46
62
|
|
47
63
|
attribute_map[name] = opts
|
@@ -60,14 +76,14 @@ module Graboid
|
|
60
76
|
|
61
77
|
module InstanceMethods
|
62
78
|
def initialize opts={}, &block
|
63
|
-
raise ArgumentError
|
79
|
+
raise ArgumentError if opts[:source].nil?
|
64
80
|
self.source = opts[:source]
|
65
81
|
end
|
66
82
|
|
67
83
|
def all opts={}, reload=false
|
68
84
|
return self.collection if reload and !self.collection.empty?
|
69
85
|
reset_context
|
70
|
-
self.max_pages = opts[:max_pages]
|
86
|
+
self.max_pages = opts[:max_pages] unless opts[:max_pages].nil?
|
71
87
|
all_fragments.collect{ |frag| extract_instance(frag) }
|
72
88
|
end
|
73
89
|
|
@@ -202,11 +218,11 @@ module Graboid
|
|
202
218
|
[:paginate, :extract].each do |suffix|
|
203
219
|
method_name = "#{prefix}_#{suffix}"
|
204
220
|
define_method "run_#{method_name}_callbacks" do
|
205
|
-
self.instance_eval &callbacks[method_name.to_sym]
|
221
|
+
self.instance_eval &callbacks[method_name.to_sym] unless callbacks[method_name.to_sym].nil?
|
206
222
|
end
|
207
223
|
end
|
208
224
|
end
|
209
225
|
|
210
226
|
end
|
211
227
|
end
|
212
|
-
end
|
228
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: graboid
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 15
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 4
|
9
|
+
- 0
|
10
|
+
version: 0.4.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Christopher Burnett
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2011-02-22 00:00:00 -08:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -48,20 +48,6 @@ dependencies:
|
|
48
48
|
version: "0"
|
49
49
|
type: :runtime
|
50
50
|
version_requirements: *id002
|
51
|
-
- !ruby/object:Gem::Dependency
|
52
|
-
name: activesupport
|
53
|
-
prerelease: false
|
54
|
-
requirement: &id003 !ruby/object:Gem::Requirement
|
55
|
-
none: false
|
56
|
-
requirements:
|
57
|
-
- - ">="
|
58
|
-
- !ruby/object:Gem::Version
|
59
|
-
hash: 3
|
60
|
-
segments:
|
61
|
-
- 0
|
62
|
-
version: "0"
|
63
|
-
type: :runtime
|
64
|
-
version_requirements: *id003
|
65
51
|
description: web scraping made easier
|
66
52
|
email: signalstatic@gmail.com
|
67
53
|
executables: []
|
@@ -73,7 +59,6 @@ extra_rdoc_files:
|
|
73
59
|
- README.mdown
|
74
60
|
files:
|
75
61
|
- .document
|
76
|
-
- .gitignore
|
77
62
|
- LICENSE
|
78
63
|
- README.mdown
|
79
64
|
- Rakefile
|
@@ -94,15 +79,13 @@ files:
|
|
94
79
|
- spec/graboid_spec.rb
|
95
80
|
- spec/spec.opts
|
96
81
|
- spec/spec_helper.rb
|
97
|
-
- examples/reddit_post.rb
|
98
|
-
- examples/tumblr_post.rb
|
99
82
|
has_rdoc: true
|
100
83
|
homepage: http://github.com/twoism/graboid
|
101
84
|
licenses: []
|
102
85
|
|
103
86
|
post_install_message:
|
104
|
-
rdoc_options:
|
105
|
-
|
87
|
+
rdoc_options: []
|
88
|
+
|
106
89
|
require_paths:
|
107
90
|
- lib
|
108
91
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -131,13 +114,11 @@ signing_key:
|
|
131
114
|
specification_version: 3
|
132
115
|
summary: web scraping made easy
|
133
116
|
test_files:
|
117
|
+
- examples/active_rain_post.rb
|
118
|
+
- examples/live_journal_post.rb
|
119
|
+
- examples/ning_post.rb
|
134
120
|
- spec/fixtures/server.rb
|
135
121
|
- spec/graboid/entity_spec.rb
|
136
122
|
- spec/graboid/scraper_spec.rb
|
137
123
|
- spec/graboid_spec.rb
|
138
124
|
- spec/spec_helper.rb
|
139
|
-
- examples/active_rain_post.rb
|
140
|
-
- examples/live_journal_post.rb
|
141
|
-
- examples/ning_post.rb
|
142
|
-
- examples/reddit_post.rb
|
143
|
-
- examples/tumblr_post.rb
|
data/.gitignore
DELETED
data/examples/reddit_post.rb
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
-
require File.join(dir, 'graboid')
|
3
|
-
|
4
|
-
class RedditEntry
|
5
|
-
include Graboid::Scraper
|
6
|
-
|
7
|
-
selector '.entry'
|
8
|
-
|
9
|
-
set :title
|
10
|
-
set :domain, :selector => '.domain a'
|
11
|
-
|
12
|
-
set :link, :selector => '.title' do |entry|
|
13
|
-
entry.css('a').first['href']
|
14
|
-
end
|
15
|
-
|
16
|
-
page_with do |doc|
|
17
|
-
self.doc.css('p.nextprev a').select{|a| a.text =~ /next/i }.first['href']
|
18
|
-
end
|
19
|
-
|
20
|
-
before_paginate do
|
21
|
-
puts "opening page: #{self.source}"
|
22
|
-
puts "collection size: #{self.collection.length}"
|
23
|
-
puts "#{"*"*100}"
|
24
|
-
end
|
25
|
-
|
26
|
-
end
|
27
|
-
|
28
|
-
@posts = RedditEntry.new( :source => 'http://reddit.com' ).all( :max_pages => 3 )
|
29
|
-
|
30
|
-
@posts.each do |p|
|
31
|
-
puts "title: #{p.title}"
|
32
|
-
puts "domain: #{p.domain}"
|
33
|
-
puts "link: #{p.link}"
|
34
|
-
puts "#{"*"*100}"
|
35
|
-
end
|
data/examples/tumblr_post.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
-
require File.join(dir, 'graboid')
|
3
|
-
|
4
|
-
class TumblrEntry
|
5
|
-
include Graboid::Scraper
|
6
|
-
TUMBLR_CHUNK_SIZE = 20
|
7
|
-
|
8
|
-
selector 'post'
|
9
|
-
|
10
|
-
set :title, :selector => 'regular-title'
|
11
|
-
|
12
|
-
page_with do |doc|
|
13
|
-
next_tumblr_page
|
14
|
-
end
|
15
|
-
|
16
|
-
def next_tumblr_page
|
17
|
-
return nil if self.doc.css('post').empty?
|
18
|
-
"#{self.original_source}?start=#{self.current_page*TUMBLR_CHUNK_SIZE}"
|
19
|
-
end
|
20
|
-
|
21
|
-
before_paginate do
|
22
|
-
puts "opening page: #{self.source}"
|
23
|
-
puts "collection size: #{self.collection.length}"
|
24
|
-
puts "#{"*"*100}"
|
25
|
-
end
|
26
|
-
|
27
|
-
end
|
28
|
-
|
29
|
-
@posts = TumblrEntry.new( :source => 'http://chrisburnett.tumblr.com/api/read' ).all
|
30
|
-
|
31
|
-
@posts.each do |p|
|
32
|
-
puts "title: #{p.title}"
|
33
|
-
end
|