crawlable 0.0.1.7 → 0.0.1.8

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -5,7 +5,7 @@ require 'rake/gempackagetask'
5
5
  spec = Gem::Specification.new do |s|
6
6
  s.name = "crawlable"
7
7
  s.authors = ["Lance Pollard"]
8
- s.version = "0.0.1.7"
8
+ s.version = "0.0.1.8"
9
9
  s.summary = "Crawlable: Super DRY Sitemaps for Rails and Sinatra Apps"
10
10
  s.homepage = "http://github.com/viatropos/crawlable"
11
11
  s.email = "lancejpollard@gmail.com"
@@ -6,6 +6,7 @@ require 'open-uri'
6
6
 
7
7
  this = File.dirname(__FILE__)
8
8
  Dir["#{this}/crawlable/*"].each { |c| require c if File.extname(c) == ".rb" }
9
+ require "#{this}/engine"
9
10
 
10
11
  def Sitemap(*args, &block)
11
12
  Crawlable::Sitemap.define!(*args, &block)
@@ -2,7 +2,16 @@ module Crawlable
2
2
  class Feed
3
3
 
4
4
  class << self
5
- attr_accessor :options, :call_options
5
+ attr_accessor :options, :call_options, :feeds
6
+
7
+ def feeds
8
+ @feeds ||= []
9
+ @feeds
10
+ end
11
+
12
+ def paths
13
+ feeds.map(&:path)
14
+ end
6
15
 
7
16
  def define!(*args, &block)
8
17
  self.options = args.extract_options!#.merge(:run => args.shift)
@@ -10,6 +19,21 @@ module Crawlable
10
19
  instance_eval(&block) if block_given?
11
20
  end
12
21
 
22
+ def find(path, directory)
23
+ if path =~ /(\/feed\/?)(atom|rss2|rss|rdf)?$/i
24
+ feed = $1
25
+ format = $2
26
+
27
+ format = format.blank? ? "rss" : format.to_s
28
+ file = path.gsub(format, "").split("/").delete_if {|i| i.blank? }[0..-2].join("/")
29
+
30
+ return unless self.paths.include?(file)
31
+ file = file.split("/").join("_")
32
+ file = File.join(directory, "feeds", "#{file}.#{format}")
33
+ return file
34
+ end
35
+ end
36
+
13
37
  def parse!(path, options = {})
14
38
  path ||= File.join(::Rails.root, 'config/initializers/feeds.rb')
15
39
  self.call_options = options.symbolize_keys
@@ -17,16 +41,25 @@ module Crawlable
17
41
  self.call_options = nil
18
42
  end
19
43
 
44
+ def write(to, *formats)
45
+ self.feeds.each { |feed| feed.write(to, *formats) }
46
+ end
47
+
48
+ def process!(from, to, *formats, &block)
49
+ parse!(from)
50
+ write(to, *formats)
51
+ end
52
+
20
53
  def method_missing(meth, *args, &block)
21
54
  if block_given?
22
55
  if !self.call_options.blank?
23
56
  if call_options.has_key?(meth.to_sym)
24
- self.new(meth, *args, &block)
57
+ self.feeds << self.new(meth, *args, &block)
25
58
  else
26
59
  super(meth, *args, &block)
27
60
  end
28
61
  else
29
- self.new(meth, *args, &block)
62
+ self.feeds << self.new(meth, *args, &block)
30
63
  end
31
64
  else
32
65
  super(meth, *args, &block)
@@ -35,18 +68,26 @@ module Crawlable
35
68
 
36
69
  end
37
70
 
38
- attr_accessor :title, :url, :description, :master, :copyright, :updated_at
71
+ attr_accessor :name, :title, :url, :description, :master, :copyright, :updated_at, :path
39
72
 
40
73
  def initialize(*args, &block)
41
74
  options = args.extract_options!
42
75
 
43
- name = args.shift
76
+ self.name = args.shift
44
77
 
45
78
  options.each do |k, v|
46
79
  self.send(k, v) if self.respond_to?(k)
47
80
  end
48
81
 
49
82
  instance_eval(&block)
83
+
84
+ self
85
+ end
86
+
87
+ def path(value = nil)
88
+ @path = value if value
89
+ @path ||= name.to_s
90
+ @path
50
91
  end
51
92
 
52
93
  def copyright(string = nil)
@@ -101,7 +142,6 @@ module Crawlable
101
142
  xml.description self.description
102
143
 
103
144
  self.entries.each do |entry|
104
- puts entry.inspect
105
145
  xml.item do
106
146
  xml.title entry[:title]
107
147
  xml.link entry[:url]
@@ -130,19 +170,19 @@ module Crawlable
130
170
  builder = Nokogiri::XML::Builder.new do |xml|
131
171
  xml.feed "xmlns" => "http://www.w3.org/2005/Atom" do
132
172
  xml.title self.title
133
- xml.link "rel" => "self", "href" => url_for(:only_path => false, :controller => 'feeds', :action => 'atom')
134
- xml.link "rel" => "alternate", "href" => url_for(:only_path => false, :controller => 'posts')
135
- xml.id url_for(:only_path => false, :controller => 'posts')
173
+ #xml.link "rel" => "self", "href" => url_for(:only_path => false, :controller => 'feeds', :action => 'atom')
174
+ #xml.link "rel" => "alternate", "href" => url_for(:only_path => false, :controller => 'posts')
175
+ #xml.id url_for(:only_path => false, :controller => 'posts')
136
176
  xml.updated self.updated_at.strftime "%Y-%m-%dT%H:%M:%SZ" if self.updated_at
137
177
  xml.author { xml.name self.author }
138
178
 
139
- self.entries.each do |entries|
179
+ self.entries.each do |entry|
140
180
  xml.entry do
141
- xml.title entries.title
142
- xml.link "rel" => "alternate", "href" => url_for(:only_path => false, :controller => 'posts', :action => 'show', :id => entries.id)
143
- xml.id url_for(:only_path => false, :controller => 'posts', :action => 'show', :id => entries.id)
144
- xml.updated entries.updated_at.strftime "%Y-%m-%dT%H:%M:%SZ"
145
- xml.author { xml.name entries.author.name }
181
+ xml.title entry[:title]
182
+ #xml.link "rel" => "alternate", "href" => url_for(:only_path => false, :controller => 'posts', :action => 'show', :id => entry[:id])
183
+ #xml.id url_for(:only_path => false, :controller => 'posts', :action => 'show', :id => entry[:id])
184
+ xml.updated entry[:updated_at].strftime "%Y-%m-%dT%H:%M:%SZ"
185
+ xml.author { xml.name entry[:author] } if entry[:author]
146
186
  xml.summary "Post summary"
147
187
  xml.content "type" => "html" do
148
188
  #xml.text! render(:partial => "posts/post", :post => post)
@@ -155,5 +195,14 @@ module Crawlable
155
195
  builder.to_xml
156
196
  end
157
197
 
198
+ def write(to, *formats)
199
+ formats.each do |format|
200
+ FileUtils.mkdir_p(File.dirname(to))
201
+ File.open("#{to}.#{format.to_s}", 'wb') do |file|
202
+ file.puts send("to_#{format.to_s}")
203
+ end
204
+ end
205
+ end
206
+
158
207
  end
159
208
  end
@@ -6,14 +6,21 @@ module Crawlable
6
6
  end
7
7
 
8
8
  def call(env)
9
- if ENV["HEROKU"]
10
- return sitmap if env['REQUEST_PATH'] =~ /\/sitemap\.xml/i
9
+ if using_heroku?(env) || true
10
+ if file = Crawlable::Sitemap.find(env['REQUEST_PATH'], heroku_writable_directory)
11
+ return sitemap(file)
12
+ elsif file = Crawlable::Feed.find(env['REQUEST_PATH'], heroku_writable_directory)
13
+ return feed(file)
14
+ end
11
15
  end
12
16
  @app.call(env)
13
17
  end
14
18
 
15
- def sitemap
16
- file = File.join(heroku_writable_directory, "sitemap.xml.gz")
19
+ def sitemap(file)
20
+ [200, { 'Cache-Control' => 'public, max-age=86400', 'Content-Length' => File.size(file).to_s, 'Content-Type' => 'text/xml' }, IO.read(file)]
21
+ end
22
+
23
+ def feed(file)
17
24
  [200, { 'Cache-Control' => 'public, max-age=86400', 'Content-Length' => File.size(file).to_s, 'Content-Type' => 'text/xml' }, IO.read(file)]
18
25
  end
19
26
 
@@ -21,5 +28,17 @@ module Crawlable
21
28
  "#{Rails.root}/tmp"
22
29
  end
23
30
 
31
+ def using_heroku?(env)
32
+ if env["HEROKU"].nil?
33
+ if env["HEROKU_PORT"].nil? && ENV["HEROKU_TYPE"].nil?
34
+ env["HEROKU"] = false
35
+ else
36
+ env["HEROKU"] = true
37
+ end
38
+ end
39
+
40
+ env["HEROKU"] == true
41
+ end
42
+
24
43
  end
25
44
  end
@@ -8,6 +8,16 @@ module Crawlable
8
8
  self.instance = self.new(*args, &block)
9
9
  end
10
10
 
11
+ def path
12
+ self.instance ? self.instance.path : ""
13
+ end
14
+
15
+ def find(path, directory)
16
+ if path =~ /#{Regexp.escape(self.path)}/i
17
+ return File.join(directory, self.path)
18
+ end
19
+ end
20
+
11
21
  def parse!(path)
12
22
  path ||= File.join(::Rails.root, 'config/sitemap.rb')
13
23
  eval(IO.read(path))
@@ -17,7 +27,7 @@ module Crawlable
17
27
  self.instance.write(to, compress)
18
28
  end
19
29
 
20
- def process(from, to, compress = false, &block)
30
+ def process!(from, to, compress = false, &block)
21
31
  parse!(from)
22
32
  write(to, compress)
23
33
  end
@@ -37,11 +47,10 @@ module Crawlable
37
47
 
38
48
  end
39
49
 
40
- attr_accessor :links, :host, :ping, :yahoo_app_id
50
+ attr_accessor :links, :sitemap_host, :ping, :yahoo_app_id, :path, :stylesheet
41
51
 
42
52
  def initialize(*args, &block)
43
- self.host = args.shift
44
- raise "Please define a host: 'Sitemap 'http://my-site.com' do ..." if self.host.blank?
53
+ self.sitemap_host = args.shift
45
54
  options = args.extract_options!
46
55
 
47
56
  options.each do |k, v|
@@ -49,6 +58,15 @@ module Crawlable
49
58
  end
50
59
 
51
60
  instance_eval(&block)
61
+
62
+ raise "Please define a host: 'Sitemap 'http://my-site.com' do ..." if self.sitemap_host.blank?
63
+
64
+ end
65
+
66
+ def path(value = nil)
67
+ @path = value if value
68
+ @path ||= "/sitemap.xml"
69
+ @path
52
70
  end
53
71
 
54
72
  def yahoo_app_id(string = nil)
@@ -60,9 +78,14 @@ module Crawlable
60
78
  @links ||= []
61
79
  end
62
80
 
63
- def host(*args)
64
- @host = args unless args.empty?
65
- @host
81
+ def sitemap_host(*args)
82
+ @sitemap_host = args unless args.empty?
83
+ @sitemap_host
84
+ end
85
+
86
+ def stylesheet(value = nil)
87
+ @stylesheet = value if value
88
+ @stylesheet
66
89
  end
67
90
 
68
91
  def ping(*args)
@@ -76,12 +99,12 @@ module Crawlable
76
99
 
77
100
  def link(path, *args, &block)
78
101
  options = args.extract_options!
79
- options.assert_valid_keys(:priority, :changes, :updated_at, :host)
102
+ options.assert_valid_keys(:priority, :changes, :updated_at, :sitemap_host)
80
103
  options.reverse_merge!(
81
104
  :priority => 0.5,
82
105
  :changes => 'monthly',
83
106
  :updated_at => Time.now,
84
- :host => self.host
107
+ :host => self.sitemap_host
85
108
  )
86
109
 
87
110
  result = {
@@ -146,7 +169,16 @@ module Crawlable
146
169
  end
147
170
  end
148
171
  end
149
- builder.to_xml
172
+ xml = builder.to_xml
173
+
174
+ # can't add processing instructions with nokogiri
175
+ xml.gsub!("<?xml version=\"1.0\"?>") do |head|
176
+ result = head
177
+ result << "\n"
178
+ result << "<?xml-stylesheet type=\"text/xsl\" href=\"#{stylesheet}\"?>"
179
+ end if stylesheet
180
+
181
+ xml
150
182
  end
151
183
 
152
184
  def write(path, compress)
@@ -188,7 +220,7 @@ module Crawlable
188
220
  end
189
221
 
190
222
  def inspect
191
- "<Sitemap @host='#{host.to_s}' @sitemap_path='#{sitemap_path.to_s}' @ping='#{ping.inspect}' @links='#{links.inspect}'/>"
223
+ "<Sitemap @sitemap_host='#{sitemap_host.to_s}' @sitemap_path='#{sitemap_path.to_s}' @ping='#{ping.inspect}' @links='#{links.inspect}'/>"
192
224
  end
193
225
 
194
226
  end
@@ -3,6 +3,25 @@ module Crawlable
3
3
 
4
4
  initializer "authlogic_connect.authentication_hook" do |app|
5
5
  app.middleware.use Crawlable::Rack
6
+ add_helpers Crawlable::Sitemap, Crawlable::Feed
7
+ end
8
+
9
+ def add_helpers(*clazzes)
10
+ clazzes.each do |clazz|
11
+ clazz.class_eval do
12
+ include ApplicationHelper
13
+
14
+ if defined?(::Rails)
15
+ if ActionPack::VERSION::MAJOR == 3
16
+ include ::Rails.application.routes.url_helpers
17
+ else
18
+ require 'action_controller'
19
+ include ActionController::UrlWriter
20
+ end
21
+ default_url_options[:host] = 'localhost:3000'
22
+ end
23
+ end
24
+ end
6
25
  end
7
26
 
8
27
  end
@@ -0,0 +1,67 @@
1
+ <?xml version="1.0"?>
2
+ <feed xmlns="http://www.w3.org/2005/Atom">
3
+ <title>My RSS Feed</title>
4
+ <author>
5
+ <name>Lance Pollard</name>
6
+ </author>
7
+ <entry>
8
+ <title>title-0</title>
9
+ <updated>2010-07-02T06:16:43Z</updated>
10
+ <summary>Post summary</summary>
11
+ <content type="html"/>
12
+ </entry>
13
+ <entry>
14
+ <title>title-1</title>
15
+ <updated>2010-07-02T06:16:43Z</updated>
16
+ <summary>Post summary</summary>
17
+ <content type="html"/>
18
+ </entry>
19
+ <entry>
20
+ <title>title-2</title>
21
+ <updated>2010-07-02T06:16:43Z</updated>
22
+ <summary>Post summary</summary>
23
+ <content type="html"/>
24
+ </entry>
25
+ <entry>
26
+ <title>title-3</title>
27
+ <updated>2010-07-02T06:16:43Z</updated>
28
+ <summary>Post summary</summary>
29
+ <content type="html"/>
30
+ </entry>
31
+ <entry>
32
+ <title>title-4</title>
33
+ <updated>2010-07-02T06:16:43Z</updated>
34
+ <summary>Post summary</summary>
35
+ <content type="html"/>
36
+ </entry>
37
+ <entry>
38
+ <title>title-5</title>
39
+ <updated>2010-07-02T06:16:43Z</updated>
40
+ <summary>Post summary</summary>
41
+ <content type="html"/>
42
+ </entry>
43
+ <entry>
44
+ <title>title-6</title>
45
+ <updated>2010-07-02T06:16:43Z</updated>
46
+ <summary>Post summary</summary>
47
+ <content type="html"/>
48
+ </entry>
49
+ <entry>
50
+ <title>title-7</title>
51
+ <updated>2010-07-02T06:16:43Z</updated>
52
+ <summary>Post summary</summary>
53
+ <content type="html"/>
54
+ </entry>
55
+ <entry>
56
+ <title>title-8</title>
57
+ <updated>2010-07-02T06:16:43Z</updated>
58
+ <summary>Post summary</summary>
59
+ <content type="html"/>
60
+ </entry>
61
+ <entry>
62
+ <title>title-9</title>
63
+ <updated>2010-07-02T06:16:43Z</updated>
64
+ <summary>Post summary</summary>
65
+ <content type="html"/>
66
+ </entry>
67
+ </feed>
@@ -0,0 +1,68 @@
1
+ <?xml version="1.0"?>
2
+ <rss xmlns:dc="http://purl.org/dc/elements/1.1/">
3
+ <channel>
4
+ <title>My RSS Feed</title>
5
+ <link></link>
6
+ <description>Something nice and tidy</description>
7
+ <item>
8
+ <title>title-0</title>
9
+ <link>/posts/title-0</link>
10
+ <description></description>
11
+ <guid>/posts/title-0</guid>
12
+ </item>
13
+ <item>
14
+ <title>title-1</title>
15
+ <link>/posts/title-1</link>
16
+ <description></description>
17
+ <guid>/posts/title-1</guid>
18
+ </item>
19
+ <item>
20
+ <title>title-2</title>
21
+ <link>/posts/title-2</link>
22
+ <description></description>
23
+ <guid>/posts/title-2</guid>
24
+ </item>
25
+ <item>
26
+ <title>title-3</title>
27
+ <link>/posts/title-3</link>
28
+ <description></description>
29
+ <guid>/posts/title-3</guid>
30
+ </item>
31
+ <item>
32
+ <title>title-4</title>
33
+ <link>/posts/title-4</link>
34
+ <description></description>
35
+ <guid>/posts/title-4</guid>
36
+ </item>
37
+ <item>
38
+ <title>title-5</title>
39
+ <link>/posts/title-5</link>
40
+ <description></description>
41
+ <guid>/posts/title-5</guid>
42
+ </item>
43
+ <item>
44
+ <title>title-6</title>
45
+ <link>/posts/title-6</link>
46
+ <description></description>
47
+ <guid>/posts/title-6</guid>
48
+ </item>
49
+ <item>
50
+ <title>title-7</title>
51
+ <link>/posts/title-7</link>
52
+ <description></description>
53
+ <guid>/posts/title-7</guid>
54
+ </item>
55
+ <item>
56
+ <title>title-8</title>
57
+ <link>/posts/title-8</link>
58
+ <description></description>
59
+ <guid>/posts/title-8</guid>
60
+ </item>
61
+ <item>
62
+ <title>title-9</title>
63
+ <link>/posts/title-9</link>
64
+ <description></description>
65
+ <guid>/posts/title-9</guid>
66
+ </item>
67
+ </channel>
68
+ </rss>
@@ -0,0 +1,11 @@
1
+ Feed do
2
+ posts do
3
+ title "My RSS Feed"
4
+ author "Lance Pollard"
5
+ description "Something nice and tidy"
6
+
7
+ Post.all.each do |a|
8
+ entry "/posts/#{a.to_param}", :updated_at => a.updated_at, :title => a.title
9
+ end
10
+ end
11
+ end
@@ -10,7 +10,8 @@ class FeedTest < ActiveSupport::TestCase
10
10
  end
11
11
 
12
12
  should "create feed" do
13
- puts Crawlable::Feed.to_rss
13
+ puts Crawlable::Feed.process!("test/feeds.rb", "test/feed_data", :rss, :atom)
14
+ assert true
14
15
  end
15
16
 
16
17
  end
@@ -13,6 +13,7 @@ this = File.expand_path(File.dirname(__FILE__))
13
13
  require File.expand_path(File.join(this, '/../lib/crawlable'))
14
14
 
15
15
  Dir["#{this}/lib/*"].each { |c| require c if File.extname(c) == ".rb" }
16
+ require "#{this}/feeds"
16
17
  require File.expand_path(File.join(File.dirname(__FILE__), '/../lib/crawlable'))
17
18
 
18
19
  ActiveRecord::Base.class_eval do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawlable
3
3
  version: !ruby/object:Gem::Version
4
- hash: 69
4
+ hash: 91
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
9
  - 1
10
- - 7
11
- version: 0.0.1.7
10
+ - 8
11
+ version: 0.0.1.8
12
12
  platform: ruby
13
13
  authors:
14
14
  - Lance Pollard
@@ -54,6 +54,9 @@ files:
54
54
  - lib/crawlable.rb
55
55
  - lib/engine.rb
56
56
  - rails/init.rb
57
+ - test/feed_data.atom
58
+ - test/feed_data.rss
59
+ - test/feeds.rb
57
60
  - test/lib/_database.rb
58
61
  - test/lib/post.rb
59
62
  - test/lib/routes.rb