sitemap_generator 0.2.6 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -192,7 +192,6 @@ Notes
192
192
  Known Bugs
193
193
  ========
194
194
 
195
- - Sitemaps.org [states][sitemaps_org] that no Sitemap XML file should be more than 10Mb uncompressed. The plugin will warn you about this, but does nothing to avoid it (like move some URLs into a later file).
196
195
  - There's no check on the size of a URL which [isn't supposed to exceed 2,048 bytes][sitemaps_xml].
197
196
  - Currently only supports one Sitemap Index file, which can contain 50,000 Sitemap files which can each contain 50,000 urls, so it _only_ supports up to 2,500,000,000 (2.5 billion) urls. I personally have no need of support for more urls, but plugin could be improved to support this.
198
197
 
data/Rakefile CHANGED
@@ -22,6 +22,28 @@ rescue LoadError
22
22
  puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
23
23
  end
24
24
 
25
+ #
26
+ # Helper methods
27
+ #
28
+ module Helpers
29
+ extend self
30
+
31
+ # Return a full local path to path fragment <tt>path</tt>
32
+ def local_path(path)
33
+ File.join(File.dirname(__FILE__), path)
34
+ end
35
+
36
+ # Copy all of the local files into <tt>path</tt> after completely cleaning it
37
+ def prepare_path(path)
38
+ rm_rf path
39
+ mkdir_p path
40
+ cp_r(FileList["[A-Z]*", "{bin,lib,rails,templates,tasks}"], path)
41
+ end
42
+ end
43
+
44
+ #
45
+ # Tasks
46
+ #
25
47
  task :default => :test
26
48
 
27
49
  namespace :test do
@@ -37,29 +59,19 @@ namespace :test do
37
59
  namespace :prepare do
38
60
  task :gem do
39
61
  ENV["SITEMAP_RAILS"] = 'gem'
40
- prepare_path(local_path('spec/mock_app_gem/vendor/gems/sitemap_generator-1.2.3'))
41
- rm_rf(local_path('spec/mock_app_gem/public/sitemap*'))
62
+ Helpers.prepare_path(Helpers.local_path('spec/mock_app_gem/vendor/gems/sitemap_generator-1.2.3'))
63
+ rm_rf(Helpers.local_path('spec/mock_app_gem/public/sitemap*'))
42
64
  end
43
65
 
44
66
  task :plugin do
45
67
  ENV["SITEMAP_RAILS"] = 'plugin'
46
- prepare_path(local_path('spec/mock_app_plugin/vendor/plugins/sitemap_generator-1.2.3'))
47
- rm_rf(local_path('spec/mock_app_plugin/public/sitemap*'))
68
+ Helpers.prepare_path(Helpers.local_path('spec/mock_app_plugin/vendor/plugins/sitemap_generator-1.2.3'))
69
+ rm_rf(Helpers.local_path('spec/mock_app_plugin/public/sitemap*'))
48
70
  end
49
71
 
50
72
  task :rails3 do
51
73
  ENV["SITEMAP_RAILS"] = 'rails3'
52
- rm_rf(local_path('spec/mock_rails3_gem/public/sitemap*'))
53
- end
54
-
55
- def local_path(path)
56
- File.join(File.dirname(__FILE__), path)
57
- end
58
-
59
- def prepare_path(path)
60
- rm_rf path
61
- mkdir_p path
62
- cp_r(FileList["[A-Z]*", "{bin,lib,rails,templates,tasks}"], path)
74
+ rm_rf(Helpers.local_path('spec/mock_rails3_gem/public/sitemap*'))
63
75
  end
64
76
  end
65
77
  end
@@ -86,4 +98,4 @@ Rake::RDocTask.new(:rdoc) do |rdoc|
86
98
  rdoc.options << '--line-numbers' << '--inline-source'
87
99
  rdoc.rdoc_files.include('README.md')
88
100
  rdoc.rdoc_files.include('lib/**/*.rb')
89
- end
101
+ end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.6
1
+ 0.3.0
@@ -0,0 +1,10 @@
1
+ module SitemapGenerator
2
+ module Builder
3
+ module Helper
4
+
5
+ def w3c_date(date)
6
+ date.utc.strftime("%Y-%m-%dT%H:%M:%S+00:00")
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,114 @@
1
+ require 'sitemap_generator/builder/helper'
2
+ require 'builder'
3
+ require 'zlib'
4
+
5
+ module SitemapGenerator
6
+ module Builder
7
+ class SitemapFile
8
+ include SitemapGenerator::Builder::Helper
9
+
10
+ attr_accessor :sitemap_path, :public_path, :filesize, :link_count, :hostname
11
+
12
+ # <tt>public_path</tt> full path of the directory to write sitemaps in.
13
+ # Usually your Rails <tt>public/</tt> directory.
14
+ #
15
+ # <tt>sitemap_path</tt> relative path including filename of the sitemap
16
+ # file relative to <tt>public_path</tt>
17
+ #
18
+ # <tt>hostname</tt> hostname including protocol to use in all links
19
+ # e.g. http://en.google.ca
20
+ def initialize(public_path, sitemap_path, hostname)
21
+ self.sitemap_path = sitemap_path
22
+ self.public_path = public_path
23
+ self.hostname = hostname
24
+ self.link_count = 0
25
+
26
+ @xml_content = '' # XML urlset content
27
+ @xml_wrapper_start = %q[<?xml version="1.0" encoding="UTF-8"?><urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">]
28
+ @xml_wrapper_end = %q[</urlset>]
29
+ self.filesize = @xml_wrapper_start.bytesize + @xml_wrapper_end.bytesize
30
+ end
31
+
32
+ def lastmod
33
+ File.mtime(self.full_path) rescue nil
34
+ end
35
+
36
+ def empty?
37
+ self.link_count == 0
38
+ end
39
+
40
+ def full_url
41
+ URI.join(self.hostname, self.sitemap_path).to_s
42
+ end
43
+
44
+ def full_path
45
+ @full_path ||= File.join(self.public_path, self.sitemap_path)
46
+ end
47
+
48
+ # Return a boolean indicating whether the sitemap file can fit another link
49
+ # of <tt>bytes</tt> bytes in size.
50
+ def file_can_fit?(bytes)
51
+ (self.filesize + bytes) < SitemapGenerator::MAX_SITEMAP_FILESIZE && self.link_count < SitemapGenerator::MAX_SITEMAP_LINKS
52
+ end
53
+
54
+ # Add a link to the sitemap file and return a boolean indicating whether the
55
+ # link was added.
56
+ #
57
+ # If a link cannot be added, the file is too large or the link limit has been reached.
58
+ def add_link(link)
59
+ xml = build_xml(::Builder::XmlMarkup.new, link)
60
+ unless file_can_fit?(xml.bytesize)
61
+ self.finalize!
62
+ return false
63
+ end
64
+
65
+ @xml_content << xml
66
+ self.filesize += xml.bytesize
67
+ self.link_count += 1
68
+ true
69
+ end
70
+ alias_method :<<, :add_link
71
+
72
+ # Return XML as a String
73
+ def build_xml(builder, link)
74
+ builder.url do
75
+ builder.loc link[:loc]
76
+ builder.lastmod w3c_date(link[:lastmod]) if link[:lastmod]
77
+ builder.changefreq link[:changefreq] if link[:changefreq]
78
+ builder.priority link[:priority] if link[:priority]
79
+
80
+ unless link[:images].blank?
81
+ link[:images].each do |image|
82
+ builder.image:image do
83
+ builder.image :loc, image[:loc]
84
+ builder.image :caption, image[:caption] if image[:caption]
85
+ builder.image :geo_location, image[:geo_location] if image[:geo_location]
86
+ builder.image :title, image[:title] if image[:title]
87
+ builder.image :license, image[:license] if image[:license]
88
+ end
89
+ end
90
+ end
91
+ end
92
+ builder << ''
93
+ end
94
+
95
+ # Insert the content into the XML "wrapper" and write and close the file.
96
+ #
97
+ # All the xml content in the instance is cleared, but attributes like
98
+ # <tt>filesize</tt> are still available.
99
+ def finalize!
100
+ return if self.frozen?
101
+
102
+ open(self.full_path, 'w') do |file|
103
+ gz = Zlib::GzipWriter.new(file)
104
+ gz.write @xml_wrapper_start
105
+ gz.write @xml_content
106
+ gz.write @xml_wrapper_end
107
+ gz.close
108
+ end
109
+ @xml_content = @xml_wrapper_start = @xml_wrapper_end = ''
110
+ self.freeze
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,24 @@
1
+ module SitemapGenerator
2
+ module Builder
3
+ class SitemapIndexFile < SitemapFile
4
+
5
+ def initialize(*args)
6
+ super(*args)
7
+
8
+ @ml_content = '' # XML urlset content
9
+ @xml_wrapper_start = %q[<?xml version="1.0" encoding="UTF-8"?><sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">]
10
+ @xml_wrapper_end = %q[</sitemapindex>]
11
+ self.filesize = @xml_wrapper_start.bytesize + @xml_wrapper_end.bytesize
12
+ end
13
+
14
+ # Return XML as a String
15
+ def build_xml(builder, link)
16
+ builder.url do
17
+ builder.loc link[:loc]
18
+ builder.lastmod w3c_date(link[:lastmod]) if link[:lastmod]
19
+ end
20
+ builder << ''
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,9 @@
1
+ require 'sitemap_generator/builder/helper'
2
+ require 'sitemap_generator/builder/sitemap_file'
3
+ require 'sitemap_generator/builder/sitemap_index_file'
4
+
5
+ module SitemapGenerator
6
+ module Builder
7
+
8
+ end
9
+ end
@@ -0,0 +1,28 @@
1
+ module SitemapGenerator
2
+
3
+ # Evaluate a sitemap config file within the context of a class that includes the
4
+ # Rails URL helpers.
5
+ class Interpreter
6
+
7
+ if SitemapGenerator::Utilities.rails3?
8
+ include ::Rails.application.routes.url_helpers
9
+ else
10
+ require 'action_controller'
11
+ include ActionController::UrlWriter
12
+ end
13
+
14
+ def initialize(sitemap_config_file=nil)
15
+ sitemap_config_file ||= File.join(::Rails.root, 'config/sitemap.rb')
16
+ eval(open(sitemap_config_file).read)
17
+ end
18
+
19
+ # KJV do we need this? We should be using path_* helpers.
20
+ # def self.default_url_options(options = nil)
21
+ # { :host => SitemapGenerator::Sitemap.default_host }
22
+ # end
23
+
24
+ def self.run
25
+ new
26
+ end
27
+ end
28
+ end
@@ -1,30 +1,35 @@
1
1
  module SitemapGenerator
2
- class Link
3
- class << self
4
- def generate(path, options = {})
5
- options.assert_valid_keys(:priority, :changefreq, :lastmod, :host, :images)
6
- options.reverse_merge!(:priority => 0.5, :changefreq => 'weekly', :lastmod => Time.now, :host => Sitemap.default_host, :images => [])
7
- {
8
- :path => path,
9
- :priority => options[:priority],
10
- :changefreq => options[:changefreq],
11
- :lastmod => options[:lastmod],
12
- :host => options[:host],
13
- :loc => URI.join(options[:host], path).to_s,
14
- :images => prepare_images(options[:images], options[:host])
15
- }
2
+ module Link
3
+ extend self
4
+
5
+ # Return a Hash of options suitable to pass to a SitemapGenerator::Builder::SitemapFile instance.
6
+ def generate(path, options = {})
7
+ if path.is_a?(SitemapGenerator::Builder::SitemapFile)
8
+ options.reverse_merge!(:host => path.hostname, :lastmod => path.lastmod)
9
+ path = path.sitemap_path
16
10
  end
17
11
 
18
- # Maximum 1000 images. <tt>loc</tt> is required.
19
- # ?? Does the image URL have to be on the same host?
20
- def prepare_images(images, host)
21
- images.delete_if { |key,value| key[:loc] == nil }
22
- images.each do |r|
23
- r.assert_valid_keys(:loc, :caption, :geo_location, :title, :license)
24
- r[:loc] = URI.join(host, r[:loc]).to_s
25
- end
26
- images[0..(SitemapGenerator::MAX_IMAGES-1)]
12
+ options.assert_valid_keys(:priority, :changefreq, :lastmod, :host, :images)
13
+ options.reverse_merge!(:priority => 0.5, :changefreq => 'weekly', :lastmod => Time.now, :host => Sitemap.default_host, :images => [])
14
+ {
15
+ :path => path,
16
+ :priority => options[:priority],
17
+ :changefreq => options[:changefreq],
18
+ :lastmod => options[:lastmod],
19
+ :host => options[:host],
20
+ :loc => URI.join(options[:host], path).to_s,
21
+ :images => prepare_images(options[:images], options[:host])
22
+ }
23
+ end
24
+
25
+ # Return an Array of image option Hashes suitable to be parsed by SitemapGenerator::Builder::SitemapFile
26
+ def prepare_images(images, host)
27
+ images.delete_if { |key,value| key[:loc] == nil }
28
+ images.each do |r|
29
+ r.assert_valid_keys(:loc, :caption, :geo_location, :title, :license)
30
+ r[:loc] = URI.join(host, r[:loc]).to_s
27
31
  end
32
+ images[0..(SitemapGenerator::MAX_SITEMAP_IMAGES-1)]
28
33
  end
29
34
  end
30
35
  end
@@ -1,166 +1,173 @@
1
1
  require 'builder'
2
2
  require 'action_view'
3
3
 
4
+ # A LinkSet provisions a bunch of links to sitemap files. It also writes the index file
5
+ # which lists all the sitemap files written.
4
6
  module SitemapGenerator
5
7
  class LinkSet
6
- include SitemapGenerator::Helper
7
- include ActionView::Helpers::NumberHelper
8
-
9
- attr_accessor :default_host, :yahoo_app_id, :links
10
- attr_accessor :sitemaps
11
- attr_accessor :max_entries
12
- attr_accessor :link_count
13
-
14
- alias :sitemap_files :sitemaps
15
-
16
- # Create new link set instance.
17
- def initialize
18
- self.links = []
19
- self.sitemaps = []
20
- self.max_entries = SitemapGenerator::MAX_ENTRIES
21
- self.link_count = 0
22
- end
8
+ include ActionView::Helpers::NumberHelper # for number_with_delimiter
23
9
 
24
- # Add default links to sitemap files.
25
- def add_default_links
26
- links.push Link.generate('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
27
- links.push Link.generate("/#{index_file}", :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
28
- self.link_count += 2
29
- end
10
+ attr_accessor :default_host, :public_path, :sitemaps_path
11
+ attr_accessor :sitemap, :sitemaps, :sitemap_index
12
+ attr_accessor :verbose, :yahoo_app_id
30
13
 
31
- # Add links to sitemap files passing a block.
32
- def add_links
33
- raise ArgumentError, "Default hostname not set" if default_host.blank?
34
- add_default_links if first_link?
35
- yield Mapper.new(self)
36
- end
14
+ # Evaluate the sitemap config file and write all sitemaps.
15
+ #
16
+ # This should be refactored so that we can have multiple instances
17
+ # of LinkSet.
18
+ def create
19
+ require 'sitemap_generator/interpreter'
37
20
 
38
- # Add links from mapper to sitemap files.
39
- def add_link(link)
40
- write_upcoming if enough_links?
41
- links.push link
42
- self.link_count += 1
43
- end
21
+ start_time = Time.now
22
+ SitemapGenerator::Interpreter.run
23
+ finalize!
24
+ end_time = Time.now
44
25
 
45
- # Write links to sitemap file.
46
- def write
47
- write_pending
26
+ puts "\nSitemap stats: #{number_with_delimiter(self.link_count)} links / #{self.sitemaps.size} files / " + ("%dm%02ds" % (end_time - start_time).divmod(60)) if verbose
48
27
  end
49
28
 
50
- # Write links to upcoming sitemap file.
51
- def write_upcoming
52
- write_sitemap(upcoming_file)
29
+ # <tt>public_path</tt> (optional) full path to the directory to write sitemaps in.
30
+ # Defaults to your Rails <tt>public/</tt> directory.
31
+ #
32
+ # <tt>sitemaps_path</tt> (optional) path fragment within public to write sitemaps
33
+ # to e.g. 'en/'. Sitemaps are written to <tt>public_path</tt> + <tt>sitemaps_path</tt>
34
+ #
35
+ # <tt>default_host</tt> hostname including protocol to use in all sitemap links
36
+ # e.g. http://en.google.ca
37
+ def initialize(public_path = nil, sitemaps_path = nil, default_host = nil)
38
+ public_path = File.join(::Rails.root, 'public/') if public_path.nil?
39
+ self.default_host = default_host
40
+ self.public_path = public_path
41
+ self.sitemaps_path = sitemaps_path
42
+
43
+ # Completed sitemaps
44
+ self.sitemaps = []
53
45
  end
54
46
 
55
- # Write pending links to sitemap, write index file if needed.
56
- def write_pending
57
- write_upcoming
58
- write_index
47
+ def link_count
48
+ self.sitemaps.map(&:link_count).inject(:+)
59
49
  end
60
50
 
61
- # Write links to sitemap file.
62
- def write_sitemap(file = upcoming_file)
63
- slice_index = 0
64
- buffer = ""
65
- xml = Builder::XmlMarkup.new(:target => buffer)
66
- eval(SitemapGenerator.templates.sitemap_xml, binding)
67
- filename = File.join(Rails.root, "public", file)
68
- write_file(filename, buffer)
69
- show_progress("Sitemap", filename, buffer) if verbose
70
- if slice_index==0
71
- links.clear
72
- else
73
- links.slice! slice_index, links.size
74
- end
51
+ # Called within the user's eval'ed sitemap config file. Add links to sitemap files
52
+ # passing a block.
53
+ #
54
+ # TODO: Refactor. The call chain is confusing and convoluted here.
55
+ def add_links
56
+ raise ArgumentError, "Default hostname not set" if default_host.blank?
75
57
 
76
- sitemaps.push filename
77
- end
58
+ # I'd rather have these calls in <tt>create</tt> but we have to wait
59
+ # for <tt>default_host</tt> to be set by the user's sitemap config
60
+ new_sitemap
61
+ add_default_links
78
62
 
79
- # Write sitemap links to sitemap index file.
80
- def write_index
81
- buffer = ""
82
- xml = Builder::XmlMarkup.new(:target => buffer)
83
- eval(SitemapGenerator.templates.sitemap_index, binding)
84
- filename = File.join(Rails.root, "public", index_file)
85
- write_file(filename, buffer)
86
- show_progress("Sitemap Index", filename, buffer) if verbose
87
- links.clear
88
- sitemaps.clear
63
+ yield Mapper.new(self)
89
64
  end
90
65
 
91
- # Return sitemap or sitemap index main name.
92
- def index_file
93
- "sitemap_index.xml.gz"
66
+ # Called from Mapper.
67
+ #
68
+ # Add a link to the current sitemap.
69
+ def add_link(link)
70
+ unless self.sitemap << link
71
+ new_sitemap
72
+ self.sitemap << link
73
+ end
94
74
  end
95
75
 
96
- # Return upcoming sitemap name with index.
97
- def upcoming_file
98
- "sitemap#{upcoming_index}.xml.gz" unless enough_sitemaps?
99
- end
76
+ # Add the current sitemap to the <tt>sitemaps</tt> Array and
77
+ # start a new sitemap.
78
+ #
79
+ # If the current sitemap is nil or empty it is not added.
80
+ def new_sitemap
81
+ unless self.sitemap_index
82
+ self.sitemap_index = SitemapGenerator::Builder::SitemapIndexFile.new(public_path, sitemap_index_path, default_host)
83
+ end
100
84
 
101
- # Return upcoming sitemap index, first is 1.
102
- def upcoming_index
103
- sitemaps.length + 1 unless enough_sitemaps?
104
- end
85
+ unless self.sitemap
86
+ self.sitemap = SitemapGenerator::Builder::SitemapFile.new(public_path, new_sitemap_path, default_host)
87
+ end
105
88
 
106
- # Return true if upcoming is first sitemap.
107
- def first_sitemap?
108
- sitemaps.empty?
109
- end
89
+ # Mark the sitemap as complete and add it to the sitemap index
90
+ unless self.sitemap.empty?
91
+ self.sitemap.finalize!
92
+ self.sitemap_index << Link.generate(self.sitemap)
93
+ self.sitemaps << self.sitemap
94
+ show_progress(self.sitemap) if verbose
110
95
 
111
- # Return true if sitemap index needed.
112
- def multiple_sitemaps?
113
- !first_sitemap?
96
+ self.sitemap = SitemapGenerator::Builder::SitemapFile.new(public_path, new_sitemap_path, default_host)
97
+ end
114
98
  end
115
99
 
116
- # Return true if more sitemaps can be added.
117
- def more_sitemaps?
118
- sitemaps.length < max_entries
100
+ # Report progress line.
101
+ def show_progress(sitemap)
102
+ uncompressed_size = number_to_human_size(sitemap.filesize)
103
+ compressed_size = number_to_human_size(File.size?(sitemap.full_path))
104
+ puts "+ #{sitemap.sitemap_path} #{sitemap.link_count} links / #{uncompressed_size} / #{compressed_size} gzipped"
119
105
  end
120
106
 
121
- # Return true if no sitemaps can be added.
122
- def enough_sitemaps?
123
- !more_sitemaps?
107
+ # Finalize all sitemap files
108
+ def finalize!
109
+ new_sitemap
110
+ self.sitemap_index.finalize!
124
111
  end
125
112
 
126
- # Return true if this is the first link added.
127
- def first_link?
128
- links.empty? && first_sitemap?
129
- end
113
+ # Ping search engines.
114
+ #
115
+ # @see http://en.wikipedia.org/wiki/Sitemap_index
116
+ def ping_search_engines
117
+ require 'open-uri'
118
+
119
+ sitemap_index_url = CGI.escape(self.sitemap_index.full_url)
120
+ search_engines = {
121
+ :google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{sitemap_index_url}",
122
+ :yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{sitemap_index_url}&appid=#{yahoo_app_id}",
123
+ :ask => "http://submissions.ask.com/ping?sitemap=#{sitemap_index_url}",
124
+ :bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{sitemap_index_url}",
125
+ :sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{sitemap_index_url}"
126
+ }
127
+
128
+ puts "\n" if verbose
129
+ search_engines.each do |engine, link|
130
+ next if engine == :yahoo && !self.yahoo_app_id
131
+ begin
132
+ open(link)
133
+ puts "Successful ping of #{engine.to_s.titleize}" if verbose
134
+ rescue Timeout::Error, StandardError => e
135
+ puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect} (URL #{link})" if verbose
136
+ end
137
+ end
130
138
 
131
- # Return true if more links can be added.
132
- def more_links?
133
- links.length < max_entries
134
- end
139
+ if !self.yahoo_app_id && verbose
140
+ puts "\n"
141
+ puts <<-END.gsub(/^\s+/, '')
142
+ To ping Yahoo you require a Yahoo AppID. Add it to your config/sitemap.rb with:
135
143
 
136
- # Return true if no further links can be added.
137
- def enough_links?
138
- !more_links?
139
- end
144
+ SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"
140
145
 
141
- # Commit buffer to gzipped file.
142
- def write_file(name, buffer)
143
- Zlib::GzipWriter.open(name) { |gz| gz.write buffer }
146
+ For more information see http://developer.yahoo.com/search/siteexplorer/V1/updateNotification.html
147
+ END
148
+ end
144
149
  end
145
150
 
146
- # Report progress line.
147
- def show_progress(title, filename, buffer)
148
- puts "+ #{filename}"
149
- puts "** #{title} too big! The uncompressed size exceeds 10Mb" if buffer.size > 10.megabytes
151
+ protected
152
+
153
+ def add_default_links
154
+ self.sitemap << Link.generate('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
155
+ self.sitemap << Link.generate(self.sitemap_index, :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
150
156
  end
151
157
 
152
- # Ping search engines passing sitemap location.
153
- def ping_search_engines
154
- super index_file
158
+ # Return the current sitemap filename with index.
159
+ #
160
+ # The index depends on the length of the <tt>sitemaps</tt> array.
161
+ def new_sitemap_path
162
+ File.join(self.sitemaps_path || '', "sitemap#{self.sitemaps.length + 1}.xml.gz")
155
163
  end
156
164
 
157
- # Create sitemap files in output directory.
158
- def create_files(verbose = true)
159
- start_time = Time.now
160
- load_sitemap_rb
161
- write
162
- stop_time = Time.now
163
- puts "Sitemap stats: #{number_with_delimiter(SitemapGenerator::Sitemap.link_count)} links, " + ("%dm%02ds" % (stop_time - start_time).divmod(60)) if verbose
165
+ # Return the current sitemap index filename.
166
+ #
167
+ # At the moment we only support one index file which can link to
168
+ # up to 50,000 sitemap files.
169
+ def sitemap_index_path
170
+ File.join(self.sitemaps_path || '', 'sitemap_index.xml.gz')
164
171
  end
165
172
  end
166
- end
173
+ end
@@ -7,8 +7,6 @@ module SitemapGenerator
7
7
  # Define an accessor method for each template file.
8
8
  class Templates
9
9
  FILES = {
10
- :sitemap_index => 'sitemap_index.builder',
11
- :sitemap_xml => 'xml_sitemap.builder',
12
10
  :sitemap_sample => 'sitemap.rb',
13
11
  }
14
12
 
@@ -28,7 +26,7 @@ module SitemapGenerator
28
26
 
29
27
  # Return the full path to a template.
30
28
  #
31
- # <tt>file</tt> template symbol e.g. <tt>:sitemap_index</tt>
29
+ # <tt>file</tt> template symbol e.g. <tt>:sitemap_sample</tt>
32
30
  def template_path(template)
33
31
  File.join(@root, 'templates', self.class::FILES[template])
34
32
  end
@@ -3,14 +3,14 @@ module SitemapGenerator
3
3
  extend self
4
4
 
5
5
  # Copy templates/sitemap.rb to config if not there yet.
6
- def install_sitemap_rb
6
+ def install_sitemap_rb(verbose=false)
7
7
  if File.exist?(File.join(RAILS_ROOT, 'config/sitemap.rb'))
8
- puts "already exists: config/sitemap.rb, file not copied"
8
+ puts "already exists: config/sitemap.rb, file not copied" if verbose
9
9
  else
10
10
  FileUtils.cp(
11
11
  SitemapGenerator.templates.template_path(:sitemap_sample),
12
12
  File.join(RAILS_ROOT, 'config/sitemap.rb'))
13
- puts "created: config/sitemap.rb"
13
+ puts "created: config/sitemap.rb" if verbose
14
14
  end
15
15
  end
16
16
 
@@ -25,5 +25,30 @@ module SitemapGenerator
25
25
  def clean_files
26
26
  FileUtils.rm(Dir[File.join(RAILS_ROOT, 'public/sitemap*.xml.gz')])
27
27
  end
28
+
29
+ # Returns whether this environment is using ActionPack
30
+ # version 3.0.0 or greater.
31
+ #
32
+ # @return [Boolean]
33
+ def self.rails3?
34
+ # The ActionPack module is always loaded automatically in Rails >= 3
35
+ return false unless defined?(ActionPack) && defined?(ActionPack::VERSION)
36
+
37
+ version =
38
+ if defined?(ActionPack::VERSION::MAJOR)
39
+ ActionPack::VERSION::MAJOR
40
+ else
41
+ # Rails 1.2
42
+ ActionPack::VERSION::Major
43
+ end
44
+
45
+ # 3.0.0.beta1 acts more like ActionPack 2
46
+ # for purposes of this method
47
+ # (checking whether block helpers require = or -).
48
+ # This extra check can be removed when beta2 is out.
49
+ version >= 3 &&
50
+ !(defined?(ActionPack::VERSION::TINY) &&
51
+ ActionPack::VERSION::TINY == "0.beta")
52
+ end
28
53
  end
29
54
  end
@@ -1,19 +1,19 @@
1
+ require 'sitemap_generator/builder'
1
2
  require 'sitemap_generator/mapper'
2
3
  require 'sitemap_generator/link'
3
- require 'sitemap_generator/rails_helper'
4
- require 'sitemap_generator/helper'
5
4
  require 'sitemap_generator/link_set'
6
- require 'sitemap_generator/helper'
7
5
  require 'sitemap_generator/templates'
8
6
  require 'sitemap_generator/utilities'
9
-
10
- require 'sitemap_generator/railtie' if SitemapGenerator::RailsHelper.rails3?
7
+ require 'sitemap_generator/railtie' if SitemapGenerator::Utilities.rails3?
11
8
 
12
9
  module SitemapGenerator
13
10
  silence_warnings do
14
11
  VERSION = File.read(File.dirname(__FILE__) + "/../VERSION").strip
15
- MAX_ENTRIES = 50_000
16
- MAX_IMAGES = 1_000
12
+ MAX_SITEMAP_FILES = 50_000 # max sitemap links per index file
13
+ MAX_SITEMAP_LINKS = 50_000 # max links per sitemap
14
+ MAX_SITEMAP_IMAGES = 1_000 # max images per url
15
+ MAX_SITEMAP_FILESIZE = 10.megabytes # bytes
16
+
17
17
  Sitemap = LinkSet.new
18
18
  end
19
19
 
@@ -1,4 +1,3 @@
1
- require 'zlib'
2
1
  begin
3
2
  require 'sitemap_generator'
4
3
  rescue LoadError, NameError
@@ -8,7 +7,7 @@ end
8
7
  namespace :sitemap do
9
8
  desc "Install a default config/sitemap.rb file"
10
9
  task :install do
11
- SitemapGenerator::Utilities.install_sitemap_rb
10
+ SitemapGenerator::Utilities.install_sitemap_rb(verbose)
12
11
  end
13
12
 
14
13
  desc "Delete all Sitemap files in public/ directory"
@@ -25,19 +24,8 @@ namespace :sitemap do
25
24
  task 'refresh:no_ping' => ['sitemap:create']
26
25
 
27
26
  task :create => [:environment] do
28
- # TODO: Move away from auto-instantiating SitemapGenerator::Sitemap
29
- # and move to a more natural Sitemap.new or similar.
30
- if SitemapGenerator::RailsHelper.rails3?
31
- SitemapGenerator::Sitemap.class_eval do
32
- include Rails.application.routes.url_helpers
33
- end
34
- else
35
- require 'action_controller'
36
- SitemapGenerator::Sitemap.class_eval do
37
- include ActionController::UrlWriter
38
- end
39
- end
40
- SitemapGenerator::Sitemap.create_files
27
+ SitemapGenerator::Sitemap.verbose = verbose
28
+ SitemapGenerator::Sitemap.create
41
29
  end
42
30
  end
43
31
 
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 2
8
- - 6
9
- version: 0.2.6
7
+ - 3
8
+ - 0
9
+ version: 0.3.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Adam Salter
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-05-17 00:00:00 -07:00
18
+ date: 2010-05-21 00:00:00 -07:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -44,11 +44,14 @@ files:
44
44
  - Rakefile
45
45
  - VERSION
46
46
  - lib/sitemap_generator.rb
47
- - lib/sitemap_generator/helper.rb
47
+ - lib/sitemap_generator/builder.rb
48
+ - lib/sitemap_generator/builder/helper.rb
49
+ - lib/sitemap_generator/builder/sitemap_file.rb
50
+ - lib/sitemap_generator/builder/sitemap_index_file.rb
51
+ - lib/sitemap_generator/interpreter.rb
48
52
  - lib/sitemap_generator/link.rb
49
53
  - lib/sitemap_generator/link_set.rb
50
54
  - lib/sitemap_generator/mapper.rb
51
- - lib/sitemap_generator/rails_helper.rb
52
55
  - lib/sitemap_generator/railtie.rb
53
56
  - lib/sitemap_generator/tasks.rb
54
57
  - lib/sitemap_generator/templates.rb
@@ -57,8 +60,6 @@ files:
57
60
  - rails/uninstall.rb
58
61
  - tasks/sitemap_generator_tasks.rake
59
62
  - templates/sitemap.rb
60
- - templates/sitemap_index.builder
61
- - templates/xml_sitemap.builder
62
63
  has_rdoc: true
63
64
  homepage: http://github.com/kjvarga/sitemap_generator
64
65
  licenses: []
@@ -1,55 +0,0 @@
1
- module SitemapGenerator
2
- # UrlHelpers are included by the rake tasks. This is not ideal, but should
3
- # suffice until things are better organized.
4
- module Helper
5
- def self.included(base)
6
- base.class_eval do
7
- def self.default_url_options(options = nil)
8
- { :host => SitemapGenerator::Sitemap.default_host }
9
- end
10
- end
11
- end
12
-
13
- def load_sitemap_rb
14
- sitemap_mapper_file = File.join(Rails.root, 'config/sitemap.rb')
15
- eval(open(sitemap_mapper_file).read)
16
- end
17
-
18
- def url_with_hostname(path)
19
- URI.join(SitemapGenerator::Sitemap.default_host, path).to_s
20
- end
21
-
22
- def w3c_date(date)
23
- date.utc.strftime("%Y-%m-%dT%H:%M:%S+00:00")
24
- end
25
-
26
- def ping_search_engines(sitemap_index)
27
- require 'open-uri'
28
- index_location = CGI.escape(url_with_hostname(sitemap_index))
29
- # engines list from http://en.wikipedia.org/wiki/Sitemap_index
30
- yahoo_app_id = SitemapGenerator::Sitemap.yahoo_app_id
31
- {:google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{index_location}",
32
- :yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{index_location}&appid=#{yahoo_app_id}",
33
- :ask => "http://submissions.ask.com/ping?sitemap=#{index_location}",
34
- :bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{index_location}",
35
- :sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{index_location}"}.each do |engine, link|
36
- begin
37
- unless SitemapGenerator::Sitemap.yahoo_app_id == false
38
- open(link)
39
- puts "Successful ping of #{engine.to_s.titleize}" if verbose
40
- end
41
- rescue Timeout::Error, StandardError => e
42
- puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect}" if verbose
43
- puts <<-END if engine == :yahoo && verbose
44
- Yahoo requires an 'AppID' for more than one ping per "timeframe", you can either:
45
- - remove yahoo from the ping list (config/sitemap.rb):
46
- SitemapGenerator::Sitemap.yahoo_app_id = false
47
- - or add your Yahoo AppID to the generator (config/sitemap.rb):
48
- SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"
49
- For more information: http://developer.yahoo.com/search/siteexplorer/V1/updateNotification.html
50
- END
51
- end
52
- end
53
- end
54
- end
55
- end
@@ -1,28 +0,0 @@
1
- module SitemapGenerator
2
- module RailsHelper
3
- # Returns whether this environment is using ActionPack
4
- # version 3.0.0 or greater.
5
- #
6
- # @return [Boolean]
7
- def self.rails3?
8
- # The ActionPack module is always loaded automatically in Rails >= 3
9
- return false unless defined?(ActionPack) && defined?(ActionPack::VERSION)
10
-
11
- version =
12
- if defined?(ActionPack::VERSION::MAJOR)
13
- ActionPack::VERSION::MAJOR
14
- else
15
- # Rails 1.2
16
- ActionPack::VERSION::Major
17
- end
18
-
19
- # 3.0.0.beta1 acts more like ActionPack 2
20
- # for purposes of this method
21
- # (checking whether block helpers require = or -).
22
- # This extra check can be removed when beta2 is out.
23
- version >= 3 &&
24
- !(defined?(ActionPack::VERSION::TINY) &&
25
- ActionPack::VERSION::TINY == "0.beta")
26
- end
27
- end
28
- end
@@ -1,23 +0,0 @@
1
- # encoding: utf-8
2
- # <?xml version="1.0" encoding="UTF-8"?>
3
- # <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
4
- # <sitemap>
5
- # <loc>http://www.example.com/sitemap1.xml.gz</loc>
6
- # <lastmod>2004-10-01T18:23:17+00:00</lastmod>
7
- # </sitemap>
8
- # <sitemap>
9
- # <loc>http://www.example.com/sitemap2.xml.gz</loc>
10
- # <lastmod>2005-01-01</lastmod>
11
- # </sitemap>
12
- # </sitemapindex>
13
-
14
- xml.instruct!
15
- xml.sitemapindex "xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9" do
16
- sitemap_files.each do |file|
17
- xml.sitemap do
18
- xml.loc url_with_hostname(File.basename(file))
19
- xml.lastmod w3c_date(File.mtime(file))
20
- end
21
- end
22
- end
23
-
@@ -1,38 +0,0 @@
1
- # encoding: utf-8
2
- xml.instruct!
3
- xml.urlset "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
4
- "xsi:schemaLocation" => "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd",
5
- "xmlns:image" => "http://www.google.com/schemas/sitemap-image/1.1",
6
- "xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9" do
7
-
8
- links.each_with_index do |link,index|
9
- buffer_url = ""
10
- url = Builder::XmlMarkup.new(:target=>buffer_url)
11
- url.url do
12
- url.loc link[:loc]
13
- url.lastmod w3c_date(link[:lastmod]) if link[:lastmod]
14
- url.changefreq link[:changefreq] if link[:changefreq]
15
- url.priority link[:priority] if link[:priority]
16
-
17
- unless link[:images].blank?
18
- link[:images].each do |image|
19
- url.image:image do
20
- url.image :loc, image[:loc]
21
- url.image :caption, image[:caption] if image[:caption]
22
- url.image :geo_location, image[:geo_location] if image[:geo_location]
23
- url.image :title, image[:title] if image[:title]
24
- url.image :license, image[:license] if image[:license]
25
- end
26
- end
27
- end
28
- end
29
-
30
- if (buffer+buffer_url).size < 10.megabytes
31
- xml << buffer_url
32
- else
33
- slice_index = index
34
- break
35
- end
36
- end
37
- end
38
-