crawlable 0.0.1.7 → 0.0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -5,7 +5,7 @@ require 'rake/gempackagetask'
5
5
  spec = Gem::Specification.new do |s|
6
6
  s.name = "crawlable"
7
7
  s.authors = ["Lance Pollard"]
8
- s.version = "0.0.1.7"
8
+ s.version = "0.0.1.8"
9
9
  s.summary = "Crawlable: Super DRY Sitemaps for Rails and Sinatra Apps"
10
10
  s.homepage = "http://github.com/viatropos/crawlable"
11
11
  s.email = "lancejpollard@gmail.com"
@@ -6,6 +6,7 @@ require 'open-uri'
6
6
 
7
7
  this = File.dirname(__FILE__)
8
8
  Dir["#{this}/crawlable/*"].each { |c| require c if File.extname(c) == ".rb" }
9
+ require "#{this}/engine"
9
10
 
10
11
  def Sitemap(*args, &block)
11
12
  Crawlable::Sitemap.define!(*args, &block)
@@ -2,7 +2,16 @@ module Crawlable
2
2
  class Feed
3
3
 
4
4
  class << self
5
- attr_accessor :options, :call_options
5
+ attr_accessor :options, :call_options, :feeds
6
+
7
+ def feeds
8
+ @feeds ||= []
9
+ @feeds
10
+ end
11
+
12
+ def paths
13
+ feeds.map(&:path)
14
+ end
6
15
 
7
16
  def define!(*args, &block)
8
17
  self.options = args.extract_options!#.merge(:run => args.shift)
@@ -10,6 +19,21 @@ module Crawlable
10
19
  instance_eval(&block) if block_given?
11
20
  end
12
21
 
22
+ def find(path, directory)
23
+ if path =~ /(\/feed\/?)(atom|rss2|rss|rdf)?$/i
24
+ feed = $1
25
+ format = $2
26
+
27
+ format = format.blank? ? "rss" : format.to_s
28
+ file = path.gsub(format, "").split("/").delete_if {|i| i.blank? }[0..-2].join("/")
29
+
30
+ return unless self.paths.include?(file)
31
+ file = file.split("/").join("_")
32
+ file = File.join(directory, "feeds", "#{file}.#{format}")
33
+ return file
34
+ end
35
+ end
36
+
13
37
  def parse!(path, options = {})
14
38
  path ||= File.join(::Rails.root, 'config/initializers/feeds.rb')
15
39
  self.call_options = options.symbolize_keys
@@ -17,16 +41,25 @@ module Crawlable
17
41
  self.call_options = nil
18
42
  end
19
43
 
44
+ def write(to, *formats)
45
+ self.feeds.each { |feed| feed.write(to, *formats) }
46
+ end
47
+
48
+ def process!(from, to, *formats, &block)
49
+ parse!(from)
50
+ write(to, *formats)
51
+ end
52
+
20
53
  def method_missing(meth, *args, &block)
21
54
  if block_given?
22
55
  if !self.call_options.blank?
23
56
  if call_options.has_key?(meth.to_sym)
24
- self.new(meth, *args, &block)
57
+ self.feeds << self.new(meth, *args, &block)
25
58
  else
26
59
  super(meth, *args, &block)
27
60
  end
28
61
  else
29
- self.new(meth, *args, &block)
62
+ self.feeds << self.new(meth, *args, &block)
30
63
  end
31
64
  else
32
65
  super(meth, *args, &block)
@@ -35,18 +68,26 @@ module Crawlable
35
68
 
36
69
  end
37
70
 
38
- attr_accessor :title, :url, :description, :master, :copyright, :updated_at
71
+ attr_accessor :name, :title, :url, :description, :master, :copyright, :updated_at, :path
39
72
 
40
73
  def initialize(*args, &block)
41
74
  options = args.extract_options!
42
75
 
43
- name = args.shift
76
+ self.name = args.shift
44
77
 
45
78
  options.each do |k, v|
46
79
  self.send(k, v) if self.respond_to?(k)
47
80
  end
48
81
 
49
82
  instance_eval(&block)
83
+
84
+ self
85
+ end
86
+
87
+ def path(value = nil)
88
+ @path = value if value
89
+ @path ||= name.to_s
90
+ @path
50
91
  end
51
92
 
52
93
  def copyright(string = nil)
@@ -101,7 +142,6 @@ module Crawlable
101
142
  xml.description self.description
102
143
 
103
144
  self.entries.each do |entry|
104
- puts entry.inspect
105
145
  xml.item do
106
146
  xml.title entry[:title]
107
147
  xml.link entry[:url]
@@ -130,19 +170,19 @@ module Crawlable
130
170
  builder = Nokogiri::XML::Builder.new do |xml|
131
171
  xml.feed "xmlns" => "http://www.w3.org/2005/Atom" do
132
172
  xml.title self.title
133
- xml.link "rel" => "self", "href" => url_for(:only_path => false, :controller => 'feeds', :action => 'atom')
134
- xml.link "rel" => "alternate", "href" => url_for(:only_path => false, :controller => 'posts')
135
- xml.id url_for(:only_path => false, :controller => 'posts')
173
+ #xml.link "rel" => "self", "href" => url_for(:only_path => false, :controller => 'feeds', :action => 'atom')
174
+ #xml.link "rel" => "alternate", "href" => url_for(:only_path => false, :controller => 'posts')
175
+ #xml.id url_for(:only_path => false, :controller => 'posts')
136
176
  xml.updated self.updated_at.strftime "%Y-%m-%dT%H:%M:%SZ" if self.updated_at
137
177
  xml.author { xml.name self.author }
138
178
 
139
- self.entries.each do |entries|
179
+ self.entries.each do |entry|
140
180
  xml.entry do
141
- xml.title entries.title
142
- xml.link "rel" => "alternate", "href" => url_for(:only_path => false, :controller => 'posts', :action => 'show', :id => entries.id)
143
- xml.id url_for(:only_path => false, :controller => 'posts', :action => 'show', :id => entries.id)
144
- xml.updated entries.updated_at.strftime "%Y-%m-%dT%H:%M:%SZ"
145
- xml.author { xml.name entries.author.name }
181
+ xml.title entry[:title]
182
+ #xml.link "rel" => "alternate", "href" => url_for(:only_path => false, :controller => 'posts', :action => 'show', :id => entry[:id])
183
+ #xml.id url_for(:only_path => false, :controller => 'posts', :action => 'show', :id => entry[:id])
184
+ xml.updated entry[:updated_at].strftime "%Y-%m-%dT%H:%M:%SZ"
185
+ xml.author { xml.name entry[:author] } if entry[:author]
146
186
  xml.summary "Post summary"
147
187
  xml.content "type" => "html" do
148
188
  #xml.text! render(:partial => "posts/post", :post => post)
@@ -155,5 +195,14 @@ module Crawlable
155
195
  builder.to_xml
156
196
  end
157
197
 
198
+ def write(to, *formats)
199
+ formats.each do |format|
200
+ FileUtils.mkdir_p(File.dirname(to))
201
+ File.open("#{to}.#{format.to_s}", 'wb') do |file|
202
+ file.puts send("to_#{format.to_s}")
203
+ end
204
+ end
205
+ end
206
+
158
207
  end
159
208
  end
@@ -6,14 +6,21 @@ module Crawlable
6
6
  end
7
7
 
8
8
  def call(env)
9
- if ENV["HEROKU"]
10
- return sitmap if env['REQUEST_PATH'] =~ /\/sitemap\.xml/i
9
+ if using_heroku?(env) || true
10
+ if file = Crawlable::Sitemap.find(env['REQUEST_PATH'], heroku_writable_directory)
11
+ return sitemap(file)
12
+ elsif file = Crawlable::Feed.find(env['REQUEST_PATH'], heroku_writable_directory)
13
+ return feed(file)
14
+ end
11
15
  end
12
16
  @app.call(env)
13
17
  end
14
18
 
15
- def sitemap
16
- file = File.join(heroku_writable_directory, "sitemap.xml.gz")
19
+ def sitemap(file)
20
+ [200, { 'Cache-Control' => 'public, max-age=86400', 'Content-Length' => File.size(file).to_s, 'Content-Type' => 'text/xml' }, IO.read(file)]
21
+ end
22
+
23
+ def feed(file)
17
24
  [200, { 'Cache-Control' => 'public, max-age=86400', 'Content-Length' => File.size(file).to_s, 'Content-Type' => 'text/xml' }, IO.read(file)]
18
25
  end
19
26
 
@@ -21,5 +28,17 @@ module Crawlable
21
28
  "#{Rails.root}/tmp"
22
29
  end
23
30
 
31
+ def using_heroku?(env)
32
+ if env["HEROKU"].nil?
33
+ if env["HEROKU_PORT"].nil? && ENV["HEROKU_TYPE"].nil?
34
+ env["HEROKU"] = false
35
+ else
36
+ env["HEROKU"] = true
37
+ end
38
+ end
39
+
40
+ env["HEROKU"] == true
41
+ end
42
+
24
43
  end
25
44
  end
@@ -8,6 +8,16 @@ module Crawlable
8
8
  self.instance = self.new(*args, &block)
9
9
  end
10
10
 
11
+ def path
12
+ self.instance ? self.instance.path : ""
13
+ end
14
+
15
+ def find(path, directory)
16
+ if path =~ /#{Regexp.escape(self.path)}/i
17
+ return File.join(directory, self.path)
18
+ end
19
+ end
20
+
11
21
  def parse!(path)
12
22
  path ||= File.join(::Rails.root, 'config/sitemap.rb')
13
23
  eval(IO.read(path))
@@ -17,7 +27,7 @@ module Crawlable
17
27
  self.instance.write(to, compress)
18
28
  end
19
29
 
20
- def process(from, to, compress = false, &block)
30
+ def process!(from, to, compress = false, &block)
21
31
  parse!(from)
22
32
  write(to, compress)
23
33
  end
@@ -37,11 +47,10 @@ module Crawlable
37
47
 
38
48
  end
39
49
 
40
- attr_accessor :links, :host, :ping, :yahoo_app_id
50
+ attr_accessor :links, :sitemap_host, :ping, :yahoo_app_id, :path, :stylesheet
41
51
 
42
52
  def initialize(*args, &block)
43
- self.host = args.shift
44
- raise "Please define a host: 'Sitemap 'http://my-site.com' do ..." if self.host.blank?
53
+ self.sitemap_host = args.shift
45
54
  options = args.extract_options!
46
55
 
47
56
  options.each do |k, v|
@@ -49,6 +58,15 @@ module Crawlable
49
58
  end
50
59
 
51
60
  instance_eval(&block)
61
+
62
+ raise "Please define a host: 'Sitemap 'http://my-site.com' do ..." if self.sitemap_host.blank?
63
+
64
+ end
65
+
66
+ def path(value = nil)
67
+ @path = value if value
68
+ @path ||= "/sitemap.xml"
69
+ @path
52
70
  end
53
71
 
54
72
  def yahoo_app_id(string = nil)
@@ -60,9 +78,14 @@ module Crawlable
60
78
  @links ||= []
61
79
  end
62
80
 
63
- def host(*args)
64
- @host = args unless args.empty?
65
- @host
81
+ def sitemap_host(*args)
82
+ @sitemap_host = args unless args.empty?
83
+ @sitemap_host
84
+ end
85
+
86
+ def stylesheet(value = nil)
87
+ @stylesheet = value if value
88
+ @stylesheet
66
89
  end
67
90
 
68
91
  def ping(*args)
@@ -76,12 +99,12 @@ module Crawlable
76
99
 
77
100
  def link(path, *args, &block)
78
101
  options = args.extract_options!
79
- options.assert_valid_keys(:priority, :changes, :updated_at, :host)
102
+ options.assert_valid_keys(:priority, :changes, :updated_at, :sitemap_host)
80
103
  options.reverse_merge!(
81
104
  :priority => 0.5,
82
105
  :changes => 'monthly',
83
106
  :updated_at => Time.now,
84
- :host => self.host
107
+ :host => self.sitemap_host
85
108
  )
86
109
 
87
110
  result = {
@@ -146,7 +169,16 @@ module Crawlable
146
169
  end
147
170
  end
148
171
  end
149
- builder.to_xml
172
+ xml = builder.to_xml
173
+
174
+ # can't add processing instructions with nokogiri
175
+ xml.gsub!("<?xml version=\"1.0\"?>") do |head|
176
+ result = head
177
+ result << "\n"
178
+ result << "<?xml-stylesheet type=\"text/xsl\" href=\"#{stylesheet}\"?>"
179
+ end if stylesheet
180
+
181
+ xml
150
182
  end
151
183
 
152
184
  def write(path, compress)
@@ -188,7 +220,7 @@ module Crawlable
188
220
  end
189
221
 
190
222
  def inspect
191
- "<Sitemap @host='#{host.to_s}' @sitemap_path='#{sitemap_path.to_s}' @ping='#{ping.inspect}' @links='#{links.inspect}'/>"
223
+ "<Sitemap @sitemap_host='#{sitemap_host.to_s}' @sitemap_path='#{sitemap_path.to_s}' @ping='#{ping.inspect}' @links='#{links.inspect}'/>"
192
224
  end
193
225
 
194
226
  end
@@ -3,6 +3,25 @@ module Crawlable
3
3
 
4
4
  initializer "authlogic_connect.authentication_hook" do |app|
5
5
  app.middleware.use Crawlable::Rack
6
+ add_helpers Crawlable::Sitemap, Crawlable::Feed
7
+ end
8
+
9
+ def add_helpers(*clazzes)
10
+ clazzes.each do |clazz|
11
+ clazz.class_eval do
12
+ include ApplicationHelper
13
+
14
+ if defined?(::Rails)
15
+ if ActionPack::VERSION::MAJOR == 3
16
+ include ::Rails.application.routes.url_helpers
17
+ else
18
+ require 'action_controller'
19
+ include ActionController::UrlWriter
20
+ end
21
+ default_url_options[:host] = 'localhost:3000'
22
+ end
23
+ end
24
+ end
6
25
  end
7
26
 
8
27
  end
@@ -0,0 +1,67 @@
1
+ <?xml version="1.0"?>
2
+ <feed xmlns="http://www.w3.org/2005/Atom">
3
+ <title>My RSS Feed</title>
4
+ <author>
5
+ <name>Lance Pollard</name>
6
+ </author>
7
+ <entry>
8
+ <title>title-0</title>
9
+ <updated>2010-07-02T06:16:43Z</updated>
10
+ <summary>Post summary</summary>
11
+ <content type="html"/>
12
+ </entry>
13
+ <entry>
14
+ <title>title-1</title>
15
+ <updated>2010-07-02T06:16:43Z</updated>
16
+ <summary>Post summary</summary>
17
+ <content type="html"/>
18
+ </entry>
19
+ <entry>
20
+ <title>title-2</title>
21
+ <updated>2010-07-02T06:16:43Z</updated>
22
+ <summary>Post summary</summary>
23
+ <content type="html"/>
24
+ </entry>
25
+ <entry>
26
+ <title>title-3</title>
27
+ <updated>2010-07-02T06:16:43Z</updated>
28
+ <summary>Post summary</summary>
29
+ <content type="html"/>
30
+ </entry>
31
+ <entry>
32
+ <title>title-4</title>
33
+ <updated>2010-07-02T06:16:43Z</updated>
34
+ <summary>Post summary</summary>
35
+ <content type="html"/>
36
+ </entry>
37
+ <entry>
38
+ <title>title-5</title>
39
+ <updated>2010-07-02T06:16:43Z</updated>
40
+ <summary>Post summary</summary>
41
+ <content type="html"/>
42
+ </entry>
43
+ <entry>
44
+ <title>title-6</title>
45
+ <updated>2010-07-02T06:16:43Z</updated>
46
+ <summary>Post summary</summary>
47
+ <content type="html"/>
48
+ </entry>
49
+ <entry>
50
+ <title>title-7</title>
51
+ <updated>2010-07-02T06:16:43Z</updated>
52
+ <summary>Post summary</summary>
53
+ <content type="html"/>
54
+ </entry>
55
+ <entry>
56
+ <title>title-8</title>
57
+ <updated>2010-07-02T06:16:43Z</updated>
58
+ <summary>Post summary</summary>
59
+ <content type="html"/>
60
+ </entry>
61
+ <entry>
62
+ <title>title-9</title>
63
+ <updated>2010-07-02T06:16:43Z</updated>
64
+ <summary>Post summary</summary>
65
+ <content type="html"/>
66
+ </entry>
67
+ </feed>
@@ -0,0 +1,68 @@
1
+ <?xml version="1.0"?>
2
+ <rss xmlns:dc="http://purl.org/dc/elements/1.1/">
3
+ <channel>
4
+ <title>My RSS Feed</title>
5
+ <link></link>
6
+ <description>Something nice and tidy</description>
7
+ <item>
8
+ <title>title-0</title>
9
+ <link>/posts/title-0</link>
10
+ <description></description>
11
+ <guid>/posts/title-0</guid>
12
+ </item>
13
+ <item>
14
+ <title>title-1</title>
15
+ <link>/posts/title-1</link>
16
+ <description></description>
17
+ <guid>/posts/title-1</guid>
18
+ </item>
19
+ <item>
20
+ <title>title-2</title>
21
+ <link>/posts/title-2</link>
22
+ <description></description>
23
+ <guid>/posts/title-2</guid>
24
+ </item>
25
+ <item>
26
+ <title>title-3</title>
27
+ <link>/posts/title-3</link>
28
+ <description></description>
29
+ <guid>/posts/title-3</guid>
30
+ </item>
31
+ <item>
32
+ <title>title-4</title>
33
+ <link>/posts/title-4</link>
34
+ <description></description>
35
+ <guid>/posts/title-4</guid>
36
+ </item>
37
+ <item>
38
+ <title>title-5</title>
39
+ <link>/posts/title-5</link>
40
+ <description></description>
41
+ <guid>/posts/title-5</guid>
42
+ </item>
43
+ <item>
44
+ <title>title-6</title>
45
+ <link>/posts/title-6</link>
46
+ <description></description>
47
+ <guid>/posts/title-6</guid>
48
+ </item>
49
+ <item>
50
+ <title>title-7</title>
51
+ <link>/posts/title-7</link>
52
+ <description></description>
53
+ <guid>/posts/title-7</guid>
54
+ </item>
55
+ <item>
56
+ <title>title-8</title>
57
+ <link>/posts/title-8</link>
58
+ <description></description>
59
+ <guid>/posts/title-8</guid>
60
+ </item>
61
+ <item>
62
+ <title>title-9</title>
63
+ <link>/posts/title-9</link>
64
+ <description></description>
65
+ <guid>/posts/title-9</guid>
66
+ </item>
67
+ </channel>
68
+ </rss>
@@ -0,0 +1,11 @@
1
+ Feed do
2
+ posts do
3
+ title "My RSS Feed"
4
+ author "Lance Pollard"
5
+ description "Something nice and tidy"
6
+
7
+ Post.all.each do |a|
8
+ entry "/posts/#{a.to_param}", :updated_at => a.updated_at, :title => a.title
9
+ end
10
+ end
11
+ end
@@ -10,7 +10,8 @@ class FeedTest < ActiveSupport::TestCase
10
10
  end
11
11
 
12
12
  should "create feed" do
13
- puts Crawlable::Feed.to_rss
13
+ puts Crawlable::Feed.process!("test/feeds.rb", "test/feed_data", :rss, :atom)
14
+ assert true
14
15
  end
15
16
 
16
17
  end
@@ -13,6 +13,7 @@ this = File.expand_path(File.dirname(__FILE__))
13
13
  require File.expand_path(File.join(this, '/../lib/crawlable'))
14
14
 
15
15
  Dir["#{this}/lib/*"].each { |c| require c if File.extname(c) == ".rb" }
16
+ require "#{this}/feeds"
16
17
  require File.expand_path(File.join(File.dirname(__FILE__), '/../lib/crawlable'))
17
18
 
18
19
  ActiveRecord::Base.class_eval do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawlable
3
3
  version: !ruby/object:Gem::Version
4
- hash: 69
4
+ hash: 91
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
9
  - 1
10
- - 7
11
- version: 0.0.1.7
10
+ - 8
11
+ version: 0.0.1.8
12
12
  platform: ruby
13
13
  authors:
14
14
  - Lance Pollard
@@ -54,6 +54,9 @@ files:
54
54
  - lib/crawlable.rb
55
55
  - lib/engine.rb
56
56
  - rails/init.rb
57
+ - test/feed_data.atom
58
+ - test/feed_data.rss
59
+ - test/feeds.rb
57
60
  - test/lib/_database.rb
58
61
  - test/lib/post.rb
59
62
  - test/lib/routes.rb