sitemap_generator 0.2.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -192,7 +192,6 @@ Notes
192
192
  Known Bugs
193
193
  ========
194
194
 
195
- - Sitemaps.org [states][sitemaps_org] that no Sitemap XML file should be more than 10Mb uncompressed. The plugin will warn you about this, but does nothing to avoid it (like move some URLs into a later file).
196
195
  - There's no check on the size of a URL which [isn't supposed to exceed 2,048 bytes][sitemaps_xml].
197
196
  - Currently only supports one Sitemap Index file, which can contain 50,000 Sitemap files which can each contain 50,000 urls, so it _only_ supports up to 2,500,000,000 (2.5 billion) urls. I personally have no need of support for more urls, but plugin could be improved to support this.
198
197
 
data/Rakefile CHANGED
@@ -22,6 +22,28 @@ rescue LoadError
22
22
  puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
23
23
  end
24
24
 
25
+ #
26
+ # Helper methods
27
+ #
28
+ module Helpers
29
+ extend self
30
+
31
+ # Return a full local path to path fragment <tt>path</tt>
32
+ def local_path(path)
33
+ File.join(File.dirname(__FILE__), path)
34
+ end
35
+
36
+ # Copy all of the local files into <tt>path</tt> after completely cleaning it
37
+ def prepare_path(path)
38
+ rm_rf path
39
+ mkdir_p path
40
+ cp_r(FileList["[A-Z]*", "{bin,lib,rails,templates,tasks}"], path)
41
+ end
42
+ end
43
+
44
+ #
45
+ # Tasks
46
+ #
25
47
  task :default => :test
26
48
 
27
49
  namespace :test do
@@ -37,29 +59,19 @@ namespace :test do
37
59
  namespace :prepare do
38
60
  task :gem do
39
61
  ENV["SITEMAP_RAILS"] = 'gem'
40
- prepare_path(local_path('spec/mock_app_gem/vendor/gems/sitemap_generator-1.2.3'))
41
- rm_rf(local_path('spec/mock_app_gem/public/sitemap*'))
62
+ Helpers.prepare_path(Helpers.local_path('spec/mock_app_gem/vendor/gems/sitemap_generator-1.2.3'))
63
+ rm_rf(Helpers.local_path('spec/mock_app_gem/public/sitemap*'))
42
64
  end
43
65
 
44
66
  task :plugin do
45
67
  ENV["SITEMAP_RAILS"] = 'plugin'
46
- prepare_path(local_path('spec/mock_app_plugin/vendor/plugins/sitemap_generator-1.2.3'))
47
- rm_rf(local_path('spec/mock_app_plugin/public/sitemap*'))
68
+ Helpers.prepare_path(Helpers.local_path('spec/mock_app_plugin/vendor/plugins/sitemap_generator-1.2.3'))
69
+ rm_rf(Helpers.local_path('spec/mock_app_plugin/public/sitemap*'))
48
70
  end
49
71
 
50
72
  task :rails3 do
51
73
  ENV["SITEMAP_RAILS"] = 'rails3'
52
- rm_rf(local_path('spec/mock_rails3_gem/public/sitemap*'))
53
- end
54
-
55
- def local_path(path)
56
- File.join(File.dirname(__FILE__), path)
57
- end
58
-
59
- def prepare_path(path)
60
- rm_rf path
61
- mkdir_p path
62
- cp_r(FileList["[A-Z]*", "{bin,lib,rails,templates,tasks}"], path)
74
+ rm_rf(Helpers.local_path('spec/mock_rails3_gem/public/sitemap*'))
63
75
  end
64
76
  end
65
77
  end
@@ -86,4 +98,4 @@ Rake::RDocTask.new(:rdoc) do |rdoc|
86
98
  rdoc.options << '--line-numbers' << '--inline-source'
87
99
  rdoc.rdoc_files.include('README.md')
88
100
  rdoc.rdoc_files.include('lib/**/*.rb')
89
- end
101
+ end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.6
1
+ 0.3.0
@@ -0,0 +1,10 @@
1
+ module SitemapGenerator
2
+ module Builder
3
+ module Helper
4
+
5
+ def w3c_date(date)
6
+ date.utc.strftime("%Y-%m-%dT%H:%M:%S+00:00")
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,114 @@
1
+ require 'sitemap_generator/builder/helper'
2
+ require 'builder'
3
+ require 'zlib'
4
+
5
+ module SitemapGenerator
6
+ module Builder
7
+ class SitemapFile
8
+ include SitemapGenerator::Builder::Helper
9
+
10
+ attr_accessor :sitemap_path, :public_path, :filesize, :link_count, :hostname
11
+
12
+ # <tt>public_path</tt> full path of the directory to write sitemaps in.
13
+ # Usually your Rails <tt>public/</tt> directory.
14
+ #
15
+ # <tt>sitemap_path</tt> relative path including filename of the sitemap
16
+ # file relative to <tt>public_path</tt>
17
+ #
18
+ # <tt>hostname</tt> hostname including protocol to use in all links
19
+ # e.g. http://en.google.ca
20
+ def initialize(public_path, sitemap_path, hostname)
21
+ self.sitemap_path = sitemap_path
22
+ self.public_path = public_path
23
+ self.hostname = hostname
24
+ self.link_count = 0
25
+
26
+ @xml_content = '' # XML urlset content
27
+ @xml_wrapper_start = %q[<?xml version="1.0" encoding="UTF-8"?><urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">]
28
+ @xml_wrapper_end = %q[</urlset>]
29
+ self.filesize = @xml_wrapper_start.bytesize + @xml_wrapper_end.bytesize
30
+ end
31
+
32
+ def lastmod
33
+ File.mtime(self.full_path) rescue nil
34
+ end
35
+
36
+ def empty?
37
+ self.link_count == 0
38
+ end
39
+
40
+ def full_url
41
+ URI.join(self.hostname, self.sitemap_path).to_s
42
+ end
43
+
44
+ def full_path
45
+ @full_path ||= File.join(self.public_path, self.sitemap_path)
46
+ end
47
+
48
+ # Return a boolean indicating whether the sitemap file can fit another link
49
+ # of <tt>bytes</tt> bytes in size.
50
+ def file_can_fit?(bytes)
51
+ (self.filesize + bytes) < SitemapGenerator::MAX_SITEMAP_FILESIZE && self.link_count < SitemapGenerator::MAX_SITEMAP_LINKS
52
+ end
53
+
54
+ # Add a link to the sitemap file and return a boolean indicating whether the
55
+ # link was added.
56
+ #
57
+ # If a link cannot be added, the file is too large or the link limit has been reached.
58
+ def add_link(link)
59
+ xml = build_xml(::Builder::XmlMarkup.new, link)
60
+ unless file_can_fit?(xml.bytesize)
61
+ self.finalize!
62
+ return false
63
+ end
64
+
65
+ @xml_content << xml
66
+ self.filesize += xml.bytesize
67
+ self.link_count += 1
68
+ true
69
+ end
70
+ alias_method :<<, :add_link
71
+
72
+ # Return XML as a String
73
+ def build_xml(builder, link)
74
+ builder.url do
75
+ builder.loc link[:loc]
76
+ builder.lastmod w3c_date(link[:lastmod]) if link[:lastmod]
77
+ builder.changefreq link[:changefreq] if link[:changefreq]
78
+ builder.priority link[:priority] if link[:priority]
79
+
80
+ unless link[:images].blank?
81
+ link[:images].each do |image|
82
+ builder.image:image do
83
+ builder.image :loc, image[:loc]
84
+ builder.image :caption, image[:caption] if image[:caption]
85
+ builder.image :geo_location, image[:geo_location] if image[:geo_location]
86
+ builder.image :title, image[:title] if image[:title]
87
+ builder.image :license, image[:license] if image[:license]
88
+ end
89
+ end
90
+ end
91
+ end
92
+ builder << ''
93
+ end
94
+
95
+ # Insert the content into the XML "wrapper" and write and close the file.
96
+ #
97
+ # All the xml content in the instance is cleared, but attributes like
98
+ # <tt>filesize</tt> are still available.
99
+ def finalize!
100
+ return if self.frozen?
101
+
102
+ open(self.full_path, 'w') do |file|
103
+ gz = Zlib::GzipWriter.new(file)
104
+ gz.write @xml_wrapper_start
105
+ gz.write @xml_content
106
+ gz.write @xml_wrapper_end
107
+ gz.close
108
+ end
109
+ @xml_content = @xml_wrapper_start = @xml_wrapper_end = ''
110
+ self.freeze
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,24 @@
1
+ module SitemapGenerator
2
+ module Builder
3
+ class SitemapIndexFile < SitemapFile
4
+
5
+ def initialize(*args)
6
+ super(*args)
7
+
8
+ @ml_content = '' # XML urlset content
9
+ @xml_wrapper_start = %q[<?xml version="1.0" encoding="UTF-8"?><sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">]
10
+ @xml_wrapper_end = %q[</sitemapindex>]
11
+ self.filesize = @xml_wrapper_start.bytesize + @xml_wrapper_end.bytesize
12
+ end
13
+
14
+ # Return XML as a String
15
+ def build_xml(builder, link)
16
+ builder.url do
17
+ builder.loc link[:loc]
18
+ builder.lastmod w3c_date(link[:lastmod]) if link[:lastmod]
19
+ end
20
+ builder << ''
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,9 @@
1
+ require 'sitemap_generator/builder/helper'
2
+ require 'sitemap_generator/builder/sitemap_file'
3
+ require 'sitemap_generator/builder/sitemap_index_file'
4
+
5
+ module SitemapGenerator
6
+ module Builder
7
+
8
+ end
9
+ end
@@ -0,0 +1,28 @@
1
+ module SitemapGenerator
2
+
3
+ # Evaluate a sitemap config file within the context of a class that includes the
4
+ # Rails URL helpers.
5
+ class Interpreter
6
+
7
+ if SitemapGenerator::Utilities.rails3?
8
+ include ::Rails.application.routes.url_helpers
9
+ else
10
+ require 'action_controller'
11
+ include ActionController::UrlWriter
12
+ end
13
+
14
+ def initialize(sitemap_config_file=nil)
15
+ sitemap_config_file ||= File.join(::Rails.root, 'config/sitemap.rb')
16
+ eval(open(sitemap_config_file).read)
17
+ end
18
+
19
+ # KJV do we need this? We should be using path_* helpers.
20
+ # def self.default_url_options(options = nil)
21
+ # { :host => SitemapGenerator::Sitemap.default_host }
22
+ # end
23
+
24
+ def self.run
25
+ new
26
+ end
27
+ end
28
+ end
@@ -1,30 +1,35 @@
1
1
  module SitemapGenerator
2
- class Link
3
- class << self
4
- def generate(path, options = {})
5
- options.assert_valid_keys(:priority, :changefreq, :lastmod, :host, :images)
6
- options.reverse_merge!(:priority => 0.5, :changefreq => 'weekly', :lastmod => Time.now, :host => Sitemap.default_host, :images => [])
7
- {
8
- :path => path,
9
- :priority => options[:priority],
10
- :changefreq => options[:changefreq],
11
- :lastmod => options[:lastmod],
12
- :host => options[:host],
13
- :loc => URI.join(options[:host], path).to_s,
14
- :images => prepare_images(options[:images], options[:host])
15
- }
2
+ module Link
3
+ extend self
4
+
5
+ # Return a Hash of options suitable to pass to a SitemapGenerator::Builder::SitemapFile instance.
6
+ def generate(path, options = {})
7
+ if path.is_a?(SitemapGenerator::Builder::SitemapFile)
8
+ options.reverse_merge!(:host => path.hostname, :lastmod => path.lastmod)
9
+ path = path.sitemap_path
16
10
  end
17
11
 
18
- # Maximum 1000 images. <tt>loc</tt> is required.
19
- # ?? Does the image URL have to be on the same host?
20
- def prepare_images(images, host)
21
- images.delete_if { |key,value| key[:loc] == nil }
22
- images.each do |r|
23
- r.assert_valid_keys(:loc, :caption, :geo_location, :title, :license)
24
- r[:loc] = URI.join(host, r[:loc]).to_s
25
- end
26
- images[0..(SitemapGenerator::MAX_IMAGES-1)]
12
+ options.assert_valid_keys(:priority, :changefreq, :lastmod, :host, :images)
13
+ options.reverse_merge!(:priority => 0.5, :changefreq => 'weekly', :lastmod => Time.now, :host => Sitemap.default_host, :images => [])
14
+ {
15
+ :path => path,
16
+ :priority => options[:priority],
17
+ :changefreq => options[:changefreq],
18
+ :lastmod => options[:lastmod],
19
+ :host => options[:host],
20
+ :loc => URI.join(options[:host], path).to_s,
21
+ :images => prepare_images(options[:images], options[:host])
22
+ }
23
+ end
24
+
25
+ # Return an Array of image option Hashes suitable to be parsed by SitemapGenerator::Builder::SitemapFile
26
+ def prepare_images(images, host)
27
+ images.delete_if { |key,value| key[:loc] == nil }
28
+ images.each do |r|
29
+ r.assert_valid_keys(:loc, :caption, :geo_location, :title, :license)
30
+ r[:loc] = URI.join(host, r[:loc]).to_s
27
31
  end
32
+ images[0..(SitemapGenerator::MAX_SITEMAP_IMAGES-1)]
28
33
  end
29
34
  end
30
35
  end
@@ -1,166 +1,173 @@
1
1
  require 'builder'
2
2
  require 'action_view'
3
3
 
4
+ # A LinkSet provisions a bunch of links to sitemap files. It also writes the index file
5
+ # which lists all the sitemap files written.
4
6
  module SitemapGenerator
5
7
  class LinkSet
6
- include SitemapGenerator::Helper
7
- include ActionView::Helpers::NumberHelper
8
-
9
- attr_accessor :default_host, :yahoo_app_id, :links
10
- attr_accessor :sitemaps
11
- attr_accessor :max_entries
12
- attr_accessor :link_count
13
-
14
- alias :sitemap_files :sitemaps
15
-
16
- # Create new link set instance.
17
- def initialize
18
- self.links = []
19
- self.sitemaps = []
20
- self.max_entries = SitemapGenerator::MAX_ENTRIES
21
- self.link_count = 0
22
- end
8
+ include ActionView::Helpers::NumberHelper # for number_with_delimiter
23
9
 
24
- # Add default links to sitemap files.
25
- def add_default_links
26
- links.push Link.generate('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
27
- links.push Link.generate("/#{index_file}", :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
28
- self.link_count += 2
29
- end
10
+ attr_accessor :default_host, :public_path, :sitemaps_path
11
+ attr_accessor :sitemap, :sitemaps, :sitemap_index
12
+ attr_accessor :verbose, :yahoo_app_id
30
13
 
31
- # Add links to sitemap files passing a block.
32
- def add_links
33
- raise ArgumentError, "Default hostname not set" if default_host.blank?
34
- add_default_links if first_link?
35
- yield Mapper.new(self)
36
- end
14
+ # Evaluate the sitemap config file and write all sitemaps.
15
+ #
16
+ # This should be refactored so that we can have multiple instances
17
+ # of LinkSet.
18
+ def create
19
+ require 'sitemap_generator/interpreter'
37
20
 
38
- # Add links from mapper to sitemap files.
39
- def add_link(link)
40
- write_upcoming if enough_links?
41
- links.push link
42
- self.link_count += 1
43
- end
21
+ start_time = Time.now
22
+ SitemapGenerator::Interpreter.run
23
+ finalize!
24
+ end_time = Time.now
44
25
 
45
- # Write links to sitemap file.
46
- def write
47
- write_pending
26
+ puts "\nSitemap stats: #{number_with_delimiter(self.link_count)} links / #{self.sitemaps.size} files / " + ("%dm%02ds" % (end_time - start_time).divmod(60)) if verbose
48
27
  end
49
28
 
50
- # Write links to upcoming sitemap file.
51
- def write_upcoming
52
- write_sitemap(upcoming_file)
29
+ # <tt>public_path</tt> (optional) full path to the directory to write sitemaps in.
30
+ # Defaults to your Rails <tt>public/</tt> directory.
31
+ #
32
+ # <tt>sitemaps_path</tt> (optional) path fragment within public to write sitemaps
33
+ # to e.g. 'en/'. Sitemaps are written to <tt>public_path</tt> + <tt>sitemaps_path</tt>
34
+ #
35
+ # <tt>default_host</tt> hostname including protocol to use in all sitemap links
36
+ # e.g. http://en.google.ca
37
+ def initialize(public_path = nil, sitemaps_path = nil, default_host = nil)
38
+ public_path = File.join(::Rails.root, 'public/') if public_path.nil?
39
+ self.default_host = default_host
40
+ self.public_path = public_path
41
+ self.sitemaps_path = sitemaps_path
42
+
43
+ # Completed sitemaps
44
+ self.sitemaps = []
53
45
  end
54
46
 
55
- # Write pending links to sitemap, write index file if needed.
56
- def write_pending
57
- write_upcoming
58
- write_index
47
+ def link_count
48
+ self.sitemaps.map(&:link_count).inject(:+)
59
49
  end
60
50
 
61
- # Write links to sitemap file.
62
- def write_sitemap(file = upcoming_file)
63
- slice_index = 0
64
- buffer = ""
65
- xml = Builder::XmlMarkup.new(:target => buffer)
66
- eval(SitemapGenerator.templates.sitemap_xml, binding)
67
- filename = File.join(Rails.root, "public", file)
68
- write_file(filename, buffer)
69
- show_progress("Sitemap", filename, buffer) if verbose
70
- if slice_index==0
71
- links.clear
72
- else
73
- links.slice! slice_index, links.size
74
- end
51
+ # Called within the user's eval'ed sitemap config file. Add links to sitemap files
52
+ # passing a block.
53
+ #
54
+ # TODO: Refactor. The call chain is confusing and convoluted here.
55
+ def add_links
56
+ raise ArgumentError, "Default hostname not set" if default_host.blank?
75
57
 
76
- sitemaps.push filename
77
- end
58
+ # I'd rather have these calls in <tt>create</tt> but we have to wait
59
+ # for <tt>default_host</tt> to be set by the user's sitemap config
60
+ new_sitemap
61
+ add_default_links
78
62
 
79
- # Write sitemap links to sitemap index file.
80
- def write_index
81
- buffer = ""
82
- xml = Builder::XmlMarkup.new(:target => buffer)
83
- eval(SitemapGenerator.templates.sitemap_index, binding)
84
- filename = File.join(Rails.root, "public", index_file)
85
- write_file(filename, buffer)
86
- show_progress("Sitemap Index", filename, buffer) if verbose
87
- links.clear
88
- sitemaps.clear
63
+ yield Mapper.new(self)
89
64
  end
90
65
 
91
- # Return sitemap or sitemap index main name.
92
- def index_file
93
- "sitemap_index.xml.gz"
66
+ # Called from Mapper.
67
+ #
68
+ # Add a link to the current sitemap.
69
+ def add_link(link)
70
+ unless self.sitemap << link
71
+ new_sitemap
72
+ self.sitemap << link
73
+ end
94
74
  end
95
75
 
96
- # Return upcoming sitemap name with index.
97
- def upcoming_file
98
- "sitemap#{upcoming_index}.xml.gz" unless enough_sitemaps?
99
- end
76
+ # Add the current sitemap to the <tt>sitemaps</tt> Array and
77
+ # start a new sitemap.
78
+ #
79
+ # If the current sitemap is nil or empty it is not added.
80
+ def new_sitemap
81
+ unless self.sitemap_index
82
+ self.sitemap_index = SitemapGenerator::Builder::SitemapIndexFile.new(public_path, sitemap_index_path, default_host)
83
+ end
100
84
 
101
- # Return upcoming sitemap index, first is 1.
102
- def upcoming_index
103
- sitemaps.length + 1 unless enough_sitemaps?
104
- end
85
+ unless self.sitemap
86
+ self.sitemap = SitemapGenerator::Builder::SitemapFile.new(public_path, new_sitemap_path, default_host)
87
+ end
105
88
 
106
- # Return true if upcoming is first sitemap.
107
- def first_sitemap?
108
- sitemaps.empty?
109
- end
89
+ # Mark the sitemap as complete and add it to the sitemap index
90
+ unless self.sitemap.empty?
91
+ self.sitemap.finalize!
92
+ self.sitemap_index << Link.generate(self.sitemap)
93
+ self.sitemaps << self.sitemap
94
+ show_progress(self.sitemap) if verbose
110
95
 
111
- # Return true if sitemap index needed.
112
- def multiple_sitemaps?
113
- !first_sitemap?
96
+ self.sitemap = SitemapGenerator::Builder::SitemapFile.new(public_path, new_sitemap_path, default_host)
97
+ end
114
98
  end
115
99
 
116
- # Return true if more sitemaps can be added.
117
- def more_sitemaps?
118
- sitemaps.length < max_entries
100
+ # Report progress line.
101
+ def show_progress(sitemap)
102
+ uncompressed_size = number_to_human_size(sitemap.filesize)
103
+ compressed_size = number_to_human_size(File.size?(sitemap.full_path))
104
+ puts "+ #{sitemap.sitemap_path} #{sitemap.link_count} links / #{uncompressed_size} / #{compressed_size} gzipped"
119
105
  end
120
106
 
121
- # Return true if no sitemaps can be added.
122
- def enough_sitemaps?
123
- !more_sitemaps?
107
+ # Finalize all sitemap files
108
+ def finalize!
109
+ new_sitemap
110
+ self.sitemap_index.finalize!
124
111
  end
125
112
 
126
- # Return true if this is the first link added.
127
- def first_link?
128
- links.empty? && first_sitemap?
129
- end
113
+ # Ping search engines.
114
+ #
115
+ # @see http://en.wikipedia.org/wiki/Sitemap_index
116
+ def ping_search_engines
117
+ require 'open-uri'
118
+
119
+ sitemap_index_url = CGI.escape(self.sitemap_index.full_url)
120
+ search_engines = {
121
+ :google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{sitemap_index_url}",
122
+ :yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{sitemap_index_url}&appid=#{yahoo_app_id}",
123
+ :ask => "http://submissions.ask.com/ping?sitemap=#{sitemap_index_url}",
124
+ :bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{sitemap_index_url}",
125
+ :sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{sitemap_index_url}"
126
+ }
127
+
128
+ puts "\n" if verbose
129
+ search_engines.each do |engine, link|
130
+ next if engine == :yahoo && !self.yahoo_app_id
131
+ begin
132
+ open(link)
133
+ puts "Successful ping of #{engine.to_s.titleize}" if verbose
134
+ rescue Timeout::Error, StandardError => e
135
+ puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect} (URL #{link})" if verbose
136
+ end
137
+ end
130
138
 
131
- # Return true if more links can be added.
132
- def more_links?
133
- links.length < max_entries
134
- end
139
+ if !self.yahoo_app_id && verbose
140
+ puts "\n"
141
+ puts <<-END.gsub(/^\s+/, '')
142
+ To ping Yahoo you require a Yahoo AppID. Add it to your config/sitemap.rb with:
135
143
 
136
- # Return true if no further links can be added.
137
- def enough_links?
138
- !more_links?
139
- end
144
+ SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"
140
145
 
141
- # Commit buffer to gzipped file.
142
- def write_file(name, buffer)
143
- Zlib::GzipWriter.open(name) { |gz| gz.write buffer }
146
+ For more information see http://developer.yahoo.com/search/siteexplorer/V1/updateNotification.html
147
+ END
148
+ end
144
149
  end
145
150
 
146
- # Report progress line.
147
- def show_progress(title, filename, buffer)
148
- puts "+ #{filename}"
149
- puts "** #{title} too big! The uncompressed size exceeds 10Mb" if buffer.size > 10.megabytes
151
+ protected
152
+
153
+ def add_default_links
154
+ self.sitemap << Link.generate('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
155
+ self.sitemap << Link.generate(self.sitemap_index, :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
150
156
  end
151
157
 
152
- # Ping search engines passing sitemap location.
153
- def ping_search_engines
154
- super index_file
158
+ # Return the current sitemap filename with index.
159
+ #
160
+ # The index depends on the length of the <tt>sitemaps</tt> array.
161
+ def new_sitemap_path
162
+ File.join(self.sitemaps_path || '', "sitemap#{self.sitemaps.length + 1}.xml.gz")
155
163
  end
156
164
 
157
- # Create sitemap files in output directory.
158
- def create_files(verbose = true)
159
- start_time = Time.now
160
- load_sitemap_rb
161
- write
162
- stop_time = Time.now
163
- puts "Sitemap stats: #{number_with_delimiter(SitemapGenerator::Sitemap.link_count)} links, " + ("%dm%02ds" % (stop_time - start_time).divmod(60)) if verbose
165
+ # Return the current sitemap index filename.
166
+ #
167
+ # At the moment we only support one index file which can link to
168
+ # up to 50,000 sitemap files.
169
+ def sitemap_index_path
170
+ File.join(self.sitemaps_path || '', 'sitemap_index.xml.gz')
164
171
  end
165
172
  end
166
- end
173
+ end
@@ -7,8 +7,6 @@ module SitemapGenerator
7
7
  # Define an accessor method for each template file.
8
8
  class Templates
9
9
  FILES = {
10
- :sitemap_index => 'sitemap_index.builder',
11
- :sitemap_xml => 'xml_sitemap.builder',
12
10
  :sitemap_sample => 'sitemap.rb',
13
11
  }
14
12
 
@@ -28,7 +26,7 @@ module SitemapGenerator
28
26
 
29
27
  # Return the full path to a template.
30
28
  #
31
- # <tt>file</tt> template symbol e.g. <tt>:sitemap_index</tt>
29
+ # <tt>file</tt> template symbol e.g. <tt>:sitemap_sample</tt>
32
30
  def template_path(template)
33
31
  File.join(@root, 'templates', self.class::FILES[template])
34
32
  end
@@ -3,14 +3,14 @@ module SitemapGenerator
3
3
  extend self
4
4
 
5
5
  # Copy templates/sitemap.rb to config if not there yet.
6
- def install_sitemap_rb
6
+ def install_sitemap_rb(verbose=false)
7
7
  if File.exist?(File.join(RAILS_ROOT, 'config/sitemap.rb'))
8
- puts "already exists: config/sitemap.rb, file not copied"
8
+ puts "already exists: config/sitemap.rb, file not copied" if verbose
9
9
  else
10
10
  FileUtils.cp(
11
11
  SitemapGenerator.templates.template_path(:sitemap_sample),
12
12
  File.join(RAILS_ROOT, 'config/sitemap.rb'))
13
- puts "created: config/sitemap.rb"
13
+ puts "created: config/sitemap.rb" if verbose
14
14
  end
15
15
  end
16
16
 
@@ -25,5 +25,30 @@ module SitemapGenerator
25
25
  def clean_files
26
26
  FileUtils.rm(Dir[File.join(RAILS_ROOT, 'public/sitemap*.xml.gz')])
27
27
  end
28
+
29
+ # Returns whether this environment is using ActionPack
30
+ # version 3.0.0 or greater.
31
+ #
32
+ # @return [Boolean]
33
+ def self.rails3?
34
+ # The ActionPack module is always loaded automatically in Rails >= 3
35
+ return false unless defined?(ActionPack) && defined?(ActionPack::VERSION)
36
+
37
+ version =
38
+ if defined?(ActionPack::VERSION::MAJOR)
39
+ ActionPack::VERSION::MAJOR
40
+ else
41
+ # Rails 1.2
42
+ ActionPack::VERSION::Major
43
+ end
44
+
45
+ # 3.0.0.beta1 acts more like ActionPack 2
46
+ # for purposes of this method
47
+ # (checking whether block helpers require = or -).
48
+ # This extra check can be removed when beta2 is out.
49
+ version >= 3 &&
50
+ !(defined?(ActionPack::VERSION::TINY) &&
51
+ ActionPack::VERSION::TINY == "0.beta")
52
+ end
28
53
  end
29
54
  end
@@ -1,19 +1,19 @@
1
+ require 'sitemap_generator/builder'
1
2
  require 'sitemap_generator/mapper'
2
3
  require 'sitemap_generator/link'
3
- require 'sitemap_generator/rails_helper'
4
- require 'sitemap_generator/helper'
5
4
  require 'sitemap_generator/link_set'
6
- require 'sitemap_generator/helper'
7
5
  require 'sitemap_generator/templates'
8
6
  require 'sitemap_generator/utilities'
9
-
10
- require 'sitemap_generator/railtie' if SitemapGenerator::RailsHelper.rails3?
7
+ require 'sitemap_generator/railtie' if SitemapGenerator::Utilities.rails3?
11
8
 
12
9
  module SitemapGenerator
13
10
  silence_warnings do
14
11
  VERSION = File.read(File.dirname(__FILE__) + "/../VERSION").strip
15
- MAX_ENTRIES = 50_000
16
- MAX_IMAGES = 1_000
12
+ MAX_SITEMAP_FILES = 50_000 # max sitemap links per index file
13
+ MAX_SITEMAP_LINKS = 50_000 # max links per sitemap
14
+ MAX_SITEMAP_IMAGES = 1_000 # max images per url
15
+ MAX_SITEMAP_FILESIZE = 10.megabytes # bytes
16
+
17
17
  Sitemap = LinkSet.new
18
18
  end
19
19
 
@@ -1,4 +1,3 @@
1
- require 'zlib'
2
1
  begin
3
2
  require 'sitemap_generator'
4
3
  rescue LoadError, NameError
@@ -8,7 +7,7 @@ end
8
7
  namespace :sitemap do
9
8
  desc "Install a default config/sitemap.rb file"
10
9
  task :install do
11
- SitemapGenerator::Utilities.install_sitemap_rb
10
+ SitemapGenerator::Utilities.install_sitemap_rb(verbose)
12
11
  end
13
12
 
14
13
  desc "Delete all Sitemap files in public/ directory"
@@ -25,19 +24,8 @@ namespace :sitemap do
25
24
  task 'refresh:no_ping' => ['sitemap:create']
26
25
 
27
26
  task :create => [:environment] do
28
- # TODO: Move away from auto-instantiating SitemapGenerator::Sitemap
29
- # and move to a more natural Sitemap.new or similar.
30
- if SitemapGenerator::RailsHelper.rails3?
31
- SitemapGenerator::Sitemap.class_eval do
32
- include Rails.application.routes.url_helpers
33
- end
34
- else
35
- require 'action_controller'
36
- SitemapGenerator::Sitemap.class_eval do
37
- include ActionController::UrlWriter
38
- end
39
- end
40
- SitemapGenerator::Sitemap.create_files
27
+ SitemapGenerator::Sitemap.verbose = verbose
28
+ SitemapGenerator::Sitemap.create
41
29
  end
42
30
  end
43
31
 
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 2
8
- - 6
9
- version: 0.2.6
7
+ - 3
8
+ - 0
9
+ version: 0.3.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Adam Salter
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-05-17 00:00:00 -07:00
18
+ date: 2010-05-21 00:00:00 -07:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -44,11 +44,14 @@ files:
44
44
  - Rakefile
45
45
  - VERSION
46
46
  - lib/sitemap_generator.rb
47
- - lib/sitemap_generator/helper.rb
47
+ - lib/sitemap_generator/builder.rb
48
+ - lib/sitemap_generator/builder/helper.rb
49
+ - lib/sitemap_generator/builder/sitemap_file.rb
50
+ - lib/sitemap_generator/builder/sitemap_index_file.rb
51
+ - lib/sitemap_generator/interpreter.rb
48
52
  - lib/sitemap_generator/link.rb
49
53
  - lib/sitemap_generator/link_set.rb
50
54
  - lib/sitemap_generator/mapper.rb
51
- - lib/sitemap_generator/rails_helper.rb
52
55
  - lib/sitemap_generator/railtie.rb
53
56
  - lib/sitemap_generator/tasks.rb
54
57
  - lib/sitemap_generator/templates.rb
@@ -57,8 +60,6 @@ files:
57
60
  - rails/uninstall.rb
58
61
  - tasks/sitemap_generator_tasks.rake
59
62
  - templates/sitemap.rb
60
- - templates/sitemap_index.builder
61
- - templates/xml_sitemap.builder
62
63
  has_rdoc: true
63
64
  homepage: http://github.com/kjvarga/sitemap_generator
64
65
  licenses: []
@@ -1,55 +0,0 @@
1
- module SitemapGenerator
2
- # UrlHelpers are included by the rake tasks. This is not ideal, but should
3
- # suffice until things are better organized.
4
- module Helper
5
- def self.included(base)
6
- base.class_eval do
7
- def self.default_url_options(options = nil)
8
- { :host => SitemapGenerator::Sitemap.default_host }
9
- end
10
- end
11
- end
12
-
13
- def load_sitemap_rb
14
- sitemap_mapper_file = File.join(Rails.root, 'config/sitemap.rb')
15
- eval(open(sitemap_mapper_file).read)
16
- end
17
-
18
- def url_with_hostname(path)
19
- URI.join(SitemapGenerator::Sitemap.default_host, path).to_s
20
- end
21
-
22
- def w3c_date(date)
23
- date.utc.strftime("%Y-%m-%dT%H:%M:%S+00:00")
24
- end
25
-
26
- def ping_search_engines(sitemap_index)
27
- require 'open-uri'
28
- index_location = CGI.escape(url_with_hostname(sitemap_index))
29
- # engines list from http://en.wikipedia.org/wiki/Sitemap_index
30
- yahoo_app_id = SitemapGenerator::Sitemap.yahoo_app_id
31
- {:google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{index_location}",
32
- :yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{index_location}&appid=#{yahoo_app_id}",
33
- :ask => "http://submissions.ask.com/ping?sitemap=#{index_location}",
34
- :bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{index_location}",
35
- :sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{index_location}"}.each do |engine, link|
36
- begin
37
- unless SitemapGenerator::Sitemap.yahoo_app_id == false
38
- open(link)
39
- puts "Successful ping of #{engine.to_s.titleize}" if verbose
40
- end
41
- rescue Timeout::Error, StandardError => e
42
- puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect}" if verbose
43
- puts <<-END if engine == :yahoo && verbose
44
- Yahoo requires an 'AppID' for more than one ping per "timeframe", you can either:
45
- - remove yahoo from the ping list (config/sitemap.rb):
46
- SitemapGenerator::Sitemap.yahoo_app_id = false
47
- - or add your Yahoo AppID to the generator (config/sitemap.rb):
48
- SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"
49
- For more information: http://developer.yahoo.com/search/siteexplorer/V1/updateNotification.html
50
- END
51
- end
52
- end
53
- end
54
- end
55
- end
@@ -1,28 +0,0 @@
1
- module SitemapGenerator
2
- module RailsHelper
3
- # Returns whether this environment is using ActionPack
4
- # version 3.0.0 or greater.
5
- #
6
- # @return [Boolean]
7
- def self.rails3?
8
- # The ActionPack module is always loaded automatically in Rails >= 3
9
- return false unless defined?(ActionPack) && defined?(ActionPack::VERSION)
10
-
11
- version =
12
- if defined?(ActionPack::VERSION::MAJOR)
13
- ActionPack::VERSION::MAJOR
14
- else
15
- # Rails 1.2
16
- ActionPack::VERSION::Major
17
- end
18
-
19
- # 3.0.0.beta1 acts more like ActionPack 2
20
- # for purposes of this method
21
- # (checking whether block helpers require = or -).
22
- # This extra check can be removed when beta2 is out.
23
- version >= 3 &&
24
- !(defined?(ActionPack::VERSION::TINY) &&
25
- ActionPack::VERSION::TINY == "0.beta")
26
- end
27
- end
28
- end
@@ -1,23 +0,0 @@
1
- # encoding: utf-8
2
- # <?xml version="1.0" encoding="UTF-8"?>
3
- # <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
4
- # <sitemap>
5
- # <loc>http://www.example.com/sitemap1.xml.gz</loc>
6
- # <lastmod>2004-10-01T18:23:17+00:00</lastmod>
7
- # </sitemap>
8
- # <sitemap>
9
- # <loc>http://www.example.com/sitemap2.xml.gz</loc>
10
- # <lastmod>2005-01-01</lastmod>
11
- # </sitemap>
12
- # </sitemapindex>
13
-
14
- xml.instruct!
15
- xml.sitemapindex "xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9" do
16
- sitemap_files.each do |file|
17
- xml.sitemap do
18
- xml.loc url_with_hostname(File.basename(file))
19
- xml.lastmod w3c_date(File.mtime(file))
20
- end
21
- end
22
- end
23
-
@@ -1,38 +0,0 @@
1
- # encoding: utf-8
2
- xml.instruct!
3
- xml.urlset "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
4
- "xsi:schemaLocation" => "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd",
5
- "xmlns:image" => "http://www.google.com/schemas/sitemap-image/1.1",
6
- "xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9" do
7
-
8
- links.each_with_index do |link,index|
9
- buffer_url = ""
10
- url = Builder::XmlMarkup.new(:target=>buffer_url)
11
- url.url do
12
- url.loc link[:loc]
13
- url.lastmod w3c_date(link[:lastmod]) if link[:lastmod]
14
- url.changefreq link[:changefreq] if link[:changefreq]
15
- url.priority link[:priority] if link[:priority]
16
-
17
- unless link[:images].blank?
18
- link[:images].each do |image|
19
- url.image:image do
20
- url.image :loc, image[:loc]
21
- url.image :caption, image[:caption] if image[:caption]
22
- url.image :geo_location, image[:geo_location] if image[:geo_location]
23
- url.image :title, image[:title] if image[:title]
24
- url.image :license, image[:license] if image[:license]
25
- end
26
- end
27
- end
28
- end
29
-
30
- if (buffer+buffer_url).size < 10.megabytes
31
- xml << buffer_url
32
- else
33
- slice_index = index
34
- break
35
- end
36
- end
37
- end
38
-