crawlable 0.0.1.7 → 0.0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/lib/crawlable.rb +1 -0
- data/lib/crawlable/feed.rb +64 -15
- data/lib/crawlable/rack.rb +23 -4
- data/lib/crawlable/sitemap.rb +43 -11
- data/lib/engine.rb +19 -0
- data/test/feed_data.atom +67 -0
- data/test/feed_data.rss +68 -0
- data/test/feeds.rb +11 -0
- data/test/test_feed.rb +2 -1
- data/test/test_helper.rb +1 -0
- metadata +6 -3
data/Rakefile
CHANGED
@@ -5,7 +5,7 @@ require 'rake/gempackagetask'
|
|
5
5
|
spec = Gem::Specification.new do |s|
|
6
6
|
s.name = "crawlable"
|
7
7
|
s.authors = ["Lance Pollard"]
|
8
|
-
s.version = "0.0.1.
|
8
|
+
s.version = "0.0.1.8"
|
9
9
|
s.summary = "Crawlable: Super DRY Sitemaps for Rails and Sinatra Apps"
|
10
10
|
s.homepage = "http://github.com/viatropos/crawlable"
|
11
11
|
s.email = "lancejpollard@gmail.com"
|
data/lib/crawlable.rb
CHANGED
data/lib/crawlable/feed.rb
CHANGED
@@ -2,7 +2,16 @@ module Crawlable
|
|
2
2
|
class Feed
|
3
3
|
|
4
4
|
class << self
|
5
|
-
attr_accessor :options, :call_options
|
5
|
+
attr_accessor :options, :call_options, :feeds
|
6
|
+
|
7
|
+
def feeds
|
8
|
+
@feeds ||= []
|
9
|
+
@feeds
|
10
|
+
end
|
11
|
+
|
12
|
+
def paths
|
13
|
+
feeds.map(&:path)
|
14
|
+
end
|
6
15
|
|
7
16
|
def define!(*args, &block)
|
8
17
|
self.options = args.extract_options!#.merge(:run => args.shift)
|
@@ -10,6 +19,21 @@ module Crawlable
|
|
10
19
|
instance_eval(&block) if block_given?
|
11
20
|
end
|
12
21
|
|
22
|
+
def find(path, directory)
|
23
|
+
if path =~ /(\/feed\/?)(atom|rss2|rss|rdf)?$/i
|
24
|
+
feed = $1
|
25
|
+
format = $2
|
26
|
+
|
27
|
+
format = format.blank? ? "rss" : format.to_s
|
28
|
+
file = path.gsub(format, "").split("/").delete_if {|i| i.blank? }[0..-2].join("/")
|
29
|
+
|
30
|
+
return unless self.paths.include?(file)
|
31
|
+
file = file.split("/").join("_")
|
32
|
+
file = File.join(directory, "feeds", "#{file}.#{format}")
|
33
|
+
return file
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
13
37
|
def parse!(path, options = {})
|
14
38
|
path ||= File.join(::Rails.root, 'config/initializers/feeds.rb')
|
15
39
|
self.call_options = options.symbolize_keys
|
@@ -17,16 +41,25 @@ module Crawlable
|
|
17
41
|
self.call_options = nil
|
18
42
|
end
|
19
43
|
|
44
|
+
def write(to, *formats)
|
45
|
+
self.feeds.each { |feed| feed.write(to, *formats) }
|
46
|
+
end
|
47
|
+
|
48
|
+
def process!(from, to, *formats, &block)
|
49
|
+
parse!(from)
|
50
|
+
write(to, *formats)
|
51
|
+
end
|
52
|
+
|
20
53
|
def method_missing(meth, *args, &block)
|
21
54
|
if block_given?
|
22
55
|
if !self.call_options.blank?
|
23
56
|
if call_options.has_key?(meth.to_sym)
|
24
|
-
self.new(meth, *args, &block)
|
57
|
+
self.feeds << self.new(meth, *args, &block)
|
25
58
|
else
|
26
59
|
super(meth, *args, &block)
|
27
60
|
end
|
28
61
|
else
|
29
|
-
self.new(meth, *args, &block)
|
62
|
+
self.feeds << self.new(meth, *args, &block)
|
30
63
|
end
|
31
64
|
else
|
32
65
|
super(meth, *args, &block)
|
@@ -35,18 +68,26 @@ module Crawlable
|
|
35
68
|
|
36
69
|
end
|
37
70
|
|
38
|
-
attr_accessor :title, :url, :description, :master, :copyright, :updated_at
|
71
|
+
attr_accessor :name, :title, :url, :description, :master, :copyright, :updated_at, :path
|
39
72
|
|
40
73
|
def initialize(*args, &block)
|
41
74
|
options = args.extract_options!
|
42
75
|
|
43
|
-
name = args.shift
|
76
|
+
self.name = args.shift
|
44
77
|
|
45
78
|
options.each do |k, v|
|
46
79
|
self.send(k, v) if self.respond_to?(k)
|
47
80
|
end
|
48
81
|
|
49
82
|
instance_eval(&block)
|
83
|
+
|
84
|
+
self
|
85
|
+
end
|
86
|
+
|
87
|
+
def path(value = nil)
|
88
|
+
@path = value if value
|
89
|
+
@path ||= name.to_s
|
90
|
+
@path
|
50
91
|
end
|
51
92
|
|
52
93
|
def copyright(string = nil)
|
@@ -101,7 +142,6 @@ module Crawlable
|
|
101
142
|
xml.description self.description
|
102
143
|
|
103
144
|
self.entries.each do |entry|
|
104
|
-
puts entry.inspect
|
105
145
|
xml.item do
|
106
146
|
xml.title entry[:title]
|
107
147
|
xml.link entry[:url]
|
@@ -130,19 +170,19 @@ module Crawlable
|
|
130
170
|
builder = Nokogiri::XML::Builder.new do |xml|
|
131
171
|
xml.feed "xmlns" => "http://www.w3.org/2005/Atom" do
|
132
172
|
xml.title self.title
|
133
|
-
xml.link "rel" => "self", "href" => url_for(:only_path => false, :controller => 'feeds', :action => 'atom')
|
134
|
-
xml.link "rel" => "alternate", "href" => url_for(:only_path => false, :controller => 'posts')
|
135
|
-
xml.id url_for(:only_path => false, :controller => 'posts')
|
173
|
+
#xml.link "rel" => "self", "href" => url_for(:only_path => false, :controller => 'feeds', :action => 'atom')
|
174
|
+
#xml.link "rel" => "alternate", "href" => url_for(:only_path => false, :controller => 'posts')
|
175
|
+
#xml.id url_for(:only_path => false, :controller => 'posts')
|
136
176
|
xml.updated self.updated_at.strftime "%Y-%m-%dT%H:%M:%SZ" if self.updated_at
|
137
177
|
xml.author { xml.name self.author }
|
138
178
|
|
139
|
-
self.entries.each do |
|
179
|
+
self.entries.each do |entry|
|
140
180
|
xml.entry do
|
141
|
-
xml.title
|
142
|
-
xml.link "rel" => "alternate", "href" => url_for(:only_path => false, :controller => 'posts', :action => 'show', :id =>
|
143
|
-
xml.id url_for(:only_path => false, :controller => 'posts', :action => 'show', :id =>
|
144
|
-
xml.updated
|
145
|
-
xml.author { xml.name
|
181
|
+
xml.title entry[:title]
|
182
|
+
#xml.link "rel" => "alternate", "href" => url_for(:only_path => false, :controller => 'posts', :action => 'show', :id => entry[:id])
|
183
|
+
#xml.id url_for(:only_path => false, :controller => 'posts', :action => 'show', :id => entry[:id])
|
184
|
+
xml.updated entry[:updated_at].strftime "%Y-%m-%dT%H:%M:%SZ"
|
185
|
+
xml.author { xml.name entry[:author] } if entry[:author]
|
146
186
|
xml.summary "Post summary"
|
147
187
|
xml.content "type" => "html" do
|
148
188
|
#xml.text! render(:partial => "posts/post", :post => post)
|
@@ -155,5 +195,14 @@ module Crawlable
|
|
155
195
|
builder.to_xml
|
156
196
|
end
|
157
197
|
|
198
|
+
def write(to, *formats)
|
199
|
+
formats.each do |format|
|
200
|
+
FileUtils.mkdir_p(File.dirname(to))
|
201
|
+
File.open("#{to}.#{format.to_s}", 'wb') do |file|
|
202
|
+
file.puts send("to_#{format.to_s}")
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
158
207
|
end
|
159
208
|
end
|
data/lib/crawlable/rack.rb
CHANGED
@@ -6,14 +6,21 @@ module Crawlable
|
|
6
6
|
end
|
7
7
|
|
8
8
|
def call(env)
|
9
|
-
if
|
10
|
-
|
9
|
+
if using_heroku?(env) || true
|
10
|
+
if file = Crawlable::Sitemap.find(env['REQUEST_PATH'], heroku_writable_directory)
|
11
|
+
return sitemap(file)
|
12
|
+
elsif file = Crawlable::Feed.find(env['REQUEST_PATH'], heroku_writable_directory)
|
13
|
+
return feed(file)
|
14
|
+
end
|
11
15
|
end
|
12
16
|
@app.call(env)
|
13
17
|
end
|
14
18
|
|
15
|
-
def sitemap
|
16
|
-
|
19
|
+
def sitemap(file)
|
20
|
+
[200, { 'Cache-Control' => 'public, max-age=86400', 'Content-Length' => File.size(file).to_s, 'Content-Type' => 'text/xml' }, IO.read(file)]
|
21
|
+
end
|
22
|
+
|
23
|
+
def feed(file)
|
17
24
|
[200, { 'Cache-Control' => 'public, max-age=86400', 'Content-Length' => File.size(file).to_s, 'Content-Type' => 'text/xml' }, IO.read(file)]
|
18
25
|
end
|
19
26
|
|
@@ -21,5 +28,17 @@ module Crawlable
|
|
21
28
|
"#{Rails.root}/tmp"
|
22
29
|
end
|
23
30
|
|
31
|
+
def using_heroku?(env)
|
32
|
+
if env["HEROKU"].nil?
|
33
|
+
if env["HEROKU_PORT"].nil? && ENV["HEROKU_TYPE"].nil?
|
34
|
+
env["HEROKU"] = false
|
35
|
+
else
|
36
|
+
env["HEROKU"] = true
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
env["HEROKU"] == true
|
41
|
+
end
|
42
|
+
|
24
43
|
end
|
25
44
|
end
|
data/lib/crawlable/sitemap.rb
CHANGED
@@ -8,6 +8,16 @@ module Crawlable
|
|
8
8
|
self.instance = self.new(*args, &block)
|
9
9
|
end
|
10
10
|
|
11
|
+
def path
|
12
|
+
self.instance ? self.instance.path : ""
|
13
|
+
end
|
14
|
+
|
15
|
+
def find(path, directory)
|
16
|
+
if path =~ /#{Regexp.escape(self.path)}/i
|
17
|
+
return File.join(directory, self.path)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
11
21
|
def parse!(path)
|
12
22
|
path ||= File.join(::Rails.root, 'config/sitemap.rb')
|
13
23
|
eval(IO.read(path))
|
@@ -17,7 +27,7 @@ module Crawlable
|
|
17
27
|
self.instance.write(to, compress)
|
18
28
|
end
|
19
29
|
|
20
|
-
def process(from, to, compress = false, &block)
|
30
|
+
def process!(from, to, compress = false, &block)
|
21
31
|
parse!(from)
|
22
32
|
write(to, compress)
|
23
33
|
end
|
@@ -37,11 +47,10 @@ module Crawlable
|
|
37
47
|
|
38
48
|
end
|
39
49
|
|
40
|
-
attr_accessor :links, :
|
50
|
+
attr_accessor :links, :sitemap_host, :ping, :yahoo_app_id, :path, :stylesheet
|
41
51
|
|
42
52
|
def initialize(*args, &block)
|
43
|
-
self.
|
44
|
-
raise "Please define a host: 'Sitemap 'http://my-site.com' do ..." if self.host.blank?
|
53
|
+
self.sitemap_host = args.shift
|
45
54
|
options = args.extract_options!
|
46
55
|
|
47
56
|
options.each do |k, v|
|
@@ -49,6 +58,15 @@ module Crawlable
|
|
49
58
|
end
|
50
59
|
|
51
60
|
instance_eval(&block)
|
61
|
+
|
62
|
+
raise "Please define a host: 'Sitemap 'http://my-site.com' do ..." if self.sitemap_host.blank?
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
def path(value = nil)
|
67
|
+
@path = value if value
|
68
|
+
@path ||= "/sitemap.xml"
|
69
|
+
@path
|
52
70
|
end
|
53
71
|
|
54
72
|
def yahoo_app_id(string = nil)
|
@@ -60,9 +78,14 @@ module Crawlable
|
|
60
78
|
@links ||= []
|
61
79
|
end
|
62
80
|
|
63
|
-
def
|
64
|
-
@
|
65
|
-
@
|
81
|
+
def sitemap_host(*args)
|
82
|
+
@sitemap_host = args unless args.empty?
|
83
|
+
@sitemap_host
|
84
|
+
end
|
85
|
+
|
86
|
+
def stylesheet(value = nil)
|
87
|
+
@stylesheet = value if value
|
88
|
+
@stylesheet
|
66
89
|
end
|
67
90
|
|
68
91
|
def ping(*args)
|
@@ -76,12 +99,12 @@ module Crawlable
|
|
76
99
|
|
77
100
|
def link(path, *args, &block)
|
78
101
|
options = args.extract_options!
|
79
|
-
options.assert_valid_keys(:priority, :changes, :updated_at, :
|
102
|
+
options.assert_valid_keys(:priority, :changes, :updated_at, :sitemap_host)
|
80
103
|
options.reverse_merge!(
|
81
104
|
:priority => 0.5,
|
82
105
|
:changes => 'monthly',
|
83
106
|
:updated_at => Time.now,
|
84
|
-
:host => self.
|
107
|
+
:host => self.sitemap_host
|
85
108
|
)
|
86
109
|
|
87
110
|
result = {
|
@@ -146,7 +169,16 @@ module Crawlable
|
|
146
169
|
end
|
147
170
|
end
|
148
171
|
end
|
149
|
-
builder.to_xml
|
172
|
+
xml = builder.to_xml
|
173
|
+
|
174
|
+
# can't add processing instructions with nokogiri
|
175
|
+
xml.gsub!("<?xml version=\"1.0\"?>") do |head|
|
176
|
+
result = head
|
177
|
+
result << "\n"
|
178
|
+
result << "<?xml-stylesheet type=\"text/xsl\" href=\"#{stylesheet}\"?>"
|
179
|
+
end if stylesheet
|
180
|
+
|
181
|
+
xml
|
150
182
|
end
|
151
183
|
|
152
184
|
def write(path, compress)
|
@@ -188,7 +220,7 @@ module Crawlable
|
|
188
220
|
end
|
189
221
|
|
190
222
|
def inspect
|
191
|
-
"<Sitemap @
|
223
|
+
"<Sitemap @sitemap_host='#{sitemap_host.to_s}' @sitemap_path='#{sitemap_path.to_s}' @ping='#{ping.inspect}' @links='#{links.inspect}'/>"
|
192
224
|
end
|
193
225
|
|
194
226
|
end
|
data/lib/engine.rb
CHANGED
@@ -3,6 +3,25 @@ module Crawlable
|
|
3
3
|
|
4
4
|
initializer "authlogic_connect.authentication_hook" do |app|
|
5
5
|
app.middleware.use Crawlable::Rack
|
6
|
+
add_helpers Crawlable::Sitemap, Crawlable::Feed
|
7
|
+
end
|
8
|
+
|
9
|
+
def add_helpers(*clazzes)
|
10
|
+
clazzes.each do |clazz|
|
11
|
+
clazz.class_eval do
|
12
|
+
include ApplicationHelper
|
13
|
+
|
14
|
+
if defined?(::Rails)
|
15
|
+
if ActionPack::VERSION::MAJOR == 3
|
16
|
+
include ::Rails.application.routes.url_helpers
|
17
|
+
else
|
18
|
+
require 'action_controller'
|
19
|
+
include ActionController::UrlWriter
|
20
|
+
end
|
21
|
+
default_url_options[:host] = 'localhost:3000'
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
6
25
|
end
|
7
26
|
|
8
27
|
end
|
data/test/feed_data.atom
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<feed xmlns="http://www.w3.org/2005/Atom">
|
3
|
+
<title>My RSS Feed</title>
|
4
|
+
<author>
|
5
|
+
<name>Lance Pollard</name>
|
6
|
+
</author>
|
7
|
+
<entry>
|
8
|
+
<title>title-0</title>
|
9
|
+
<updated>2010-07-02T06:16:43Z</updated>
|
10
|
+
<summary>Post summary</summary>
|
11
|
+
<content type="html"/>
|
12
|
+
</entry>
|
13
|
+
<entry>
|
14
|
+
<title>title-1</title>
|
15
|
+
<updated>2010-07-02T06:16:43Z</updated>
|
16
|
+
<summary>Post summary</summary>
|
17
|
+
<content type="html"/>
|
18
|
+
</entry>
|
19
|
+
<entry>
|
20
|
+
<title>title-2</title>
|
21
|
+
<updated>2010-07-02T06:16:43Z</updated>
|
22
|
+
<summary>Post summary</summary>
|
23
|
+
<content type="html"/>
|
24
|
+
</entry>
|
25
|
+
<entry>
|
26
|
+
<title>title-3</title>
|
27
|
+
<updated>2010-07-02T06:16:43Z</updated>
|
28
|
+
<summary>Post summary</summary>
|
29
|
+
<content type="html"/>
|
30
|
+
</entry>
|
31
|
+
<entry>
|
32
|
+
<title>title-4</title>
|
33
|
+
<updated>2010-07-02T06:16:43Z</updated>
|
34
|
+
<summary>Post summary</summary>
|
35
|
+
<content type="html"/>
|
36
|
+
</entry>
|
37
|
+
<entry>
|
38
|
+
<title>title-5</title>
|
39
|
+
<updated>2010-07-02T06:16:43Z</updated>
|
40
|
+
<summary>Post summary</summary>
|
41
|
+
<content type="html"/>
|
42
|
+
</entry>
|
43
|
+
<entry>
|
44
|
+
<title>title-6</title>
|
45
|
+
<updated>2010-07-02T06:16:43Z</updated>
|
46
|
+
<summary>Post summary</summary>
|
47
|
+
<content type="html"/>
|
48
|
+
</entry>
|
49
|
+
<entry>
|
50
|
+
<title>title-7</title>
|
51
|
+
<updated>2010-07-02T06:16:43Z</updated>
|
52
|
+
<summary>Post summary</summary>
|
53
|
+
<content type="html"/>
|
54
|
+
</entry>
|
55
|
+
<entry>
|
56
|
+
<title>title-8</title>
|
57
|
+
<updated>2010-07-02T06:16:43Z</updated>
|
58
|
+
<summary>Post summary</summary>
|
59
|
+
<content type="html"/>
|
60
|
+
</entry>
|
61
|
+
<entry>
|
62
|
+
<title>title-9</title>
|
63
|
+
<updated>2010-07-02T06:16:43Z</updated>
|
64
|
+
<summary>Post summary</summary>
|
65
|
+
<content type="html"/>
|
66
|
+
</entry>
|
67
|
+
</feed>
|
data/test/feed_data.rss
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<rss xmlns:dc="http://purl.org/dc/elements/1.1/">
|
3
|
+
<channel>
|
4
|
+
<title>My RSS Feed</title>
|
5
|
+
<link></link>
|
6
|
+
<description>Something nice and tidy</description>
|
7
|
+
<item>
|
8
|
+
<title>title-0</title>
|
9
|
+
<link>/posts/title-0</link>
|
10
|
+
<description></description>
|
11
|
+
<guid>/posts/title-0</guid>
|
12
|
+
</item>
|
13
|
+
<item>
|
14
|
+
<title>title-1</title>
|
15
|
+
<link>/posts/title-1</link>
|
16
|
+
<description></description>
|
17
|
+
<guid>/posts/title-1</guid>
|
18
|
+
</item>
|
19
|
+
<item>
|
20
|
+
<title>title-2</title>
|
21
|
+
<link>/posts/title-2</link>
|
22
|
+
<description></description>
|
23
|
+
<guid>/posts/title-2</guid>
|
24
|
+
</item>
|
25
|
+
<item>
|
26
|
+
<title>title-3</title>
|
27
|
+
<link>/posts/title-3</link>
|
28
|
+
<description></description>
|
29
|
+
<guid>/posts/title-3</guid>
|
30
|
+
</item>
|
31
|
+
<item>
|
32
|
+
<title>title-4</title>
|
33
|
+
<link>/posts/title-4</link>
|
34
|
+
<description></description>
|
35
|
+
<guid>/posts/title-4</guid>
|
36
|
+
</item>
|
37
|
+
<item>
|
38
|
+
<title>title-5</title>
|
39
|
+
<link>/posts/title-5</link>
|
40
|
+
<description></description>
|
41
|
+
<guid>/posts/title-5</guid>
|
42
|
+
</item>
|
43
|
+
<item>
|
44
|
+
<title>title-6</title>
|
45
|
+
<link>/posts/title-6</link>
|
46
|
+
<description></description>
|
47
|
+
<guid>/posts/title-6</guid>
|
48
|
+
</item>
|
49
|
+
<item>
|
50
|
+
<title>title-7</title>
|
51
|
+
<link>/posts/title-7</link>
|
52
|
+
<description></description>
|
53
|
+
<guid>/posts/title-7</guid>
|
54
|
+
</item>
|
55
|
+
<item>
|
56
|
+
<title>title-8</title>
|
57
|
+
<link>/posts/title-8</link>
|
58
|
+
<description></description>
|
59
|
+
<guid>/posts/title-8</guid>
|
60
|
+
</item>
|
61
|
+
<item>
|
62
|
+
<title>title-9</title>
|
63
|
+
<link>/posts/title-9</link>
|
64
|
+
<description></description>
|
65
|
+
<guid>/posts/title-9</guid>
|
66
|
+
</item>
|
67
|
+
</channel>
|
68
|
+
</rss>
|
data/test/feeds.rb
ADDED
data/test/test_feed.rb
CHANGED
data/test/test_helper.rb
CHANGED
@@ -13,6 +13,7 @@ this = File.expand_path(File.dirname(__FILE__))
|
|
13
13
|
require File.expand_path(File.join(this, '/../lib/crawlable'))
|
14
14
|
|
15
15
|
Dir["#{this}/lib/*"].each { |c| require c if File.extname(c) == ".rb" }
|
16
|
+
require "#{this}/feeds"
|
16
17
|
require File.expand_path(File.join(File.dirname(__FILE__), '/../lib/crawlable'))
|
17
18
|
|
18
19
|
ActiveRecord::Base.class_eval do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crawlable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 91
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
9
|
- 1
|
10
|
-
-
|
11
|
-
version: 0.0.1.
|
10
|
+
- 8
|
11
|
+
version: 0.0.1.8
|
12
12
|
platform: ruby
|
13
13
|
authors:
|
14
14
|
- Lance Pollard
|
@@ -54,6 +54,9 @@ files:
|
|
54
54
|
- lib/crawlable.rb
|
55
55
|
- lib/engine.rb
|
56
56
|
- rails/init.rb
|
57
|
+
- test/feed_data.atom
|
58
|
+
- test/feed_data.rss
|
59
|
+
- test/feeds.rb
|
57
60
|
- test/lib/_database.rb
|
58
61
|
- test/lib/post.rb
|
59
62
|
- test/lib/routes.rb
|