xml-sitemap 1.1.3 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -114,6 +114,11 @@ index.render
114
114
  index.render_to('/path/to/file.xml')
115
115
  ```
116
116
 
117
+ ## Authors & Contributors
118
+
119
+ - [Dan Sosedoff](https://github.com/sosedoff) (author)
120
+ - [Dan Healy](https://github.com/danhealy)
121
+
117
122
  ## License
118
123
 
119
124
  Copyright © 2010-2011 Dan Sosedoff.
data/lib/xml-sitemap.rb CHANGED
@@ -2,8 +2,14 @@ require 'time'
2
2
  require 'date'
3
3
  require 'zlib'
4
4
  require 'builder'
5
+ begin
6
+ require 'nokogiri'
7
+ rescue LoadError
8
+ end
5
9
 
6
10
  require 'xml-sitemap/options'
11
+ require 'xml-sitemap/render_engine'
12
+ require 'xml-sitemap/item'
7
13
  require 'xml-sitemap/map'
8
14
  require 'xml-sitemap/index'
9
15
 
@@ -0,0 +1,42 @@
1
+ module XmlSitemap
2
+ class Item
3
+ DEFAULT_PRIORITY = 0.5
4
+
5
+ # ISO8601 regex from here: http://www.pelagodesign.com/blog/2009/05/20/iso-8601-date-validation-that-doesnt-suck/
6
+ ISO8601_REGEX = /^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$/
7
+
8
+ attr_reader :target, :updated, :priority, :changefreq, :validate_time
9
+
10
+ def initialize(target, opts={})
11
+ @target = target.to_s.strip
12
+ @updated = opts[:updated] || Time.now
13
+ @priority = opts[:priority] || DEFAULT_PRIORITY
14
+ @changefreq = opts[:period] || :weekly
15
+ @validate_time = (opts[:validate_time] != false)
16
+
17
+ unless @updated.kind_of?(Time) || @updated.kind_of?(Date) || @updated.kind_of?(String)
18
+ raise ArgumentError, "Time, Date, or ISO8601 String required for :updated!"
19
+ end
20
+
21
+ if @validate_time && @updated.kind_of?(String) && !(@updated =~ ISO8601_REGEX)
22
+ raise ArgumentError, "String provided to :updated did not match ISO8601 standard!"
23
+ end
24
+
25
+ unless XmlSitemap::PERIODS.include?(@changefreq)
26
+ raise ArgumentError, "Invalid :period value '#{@changefreq}'"
27
+ end
28
+
29
+ @updated = @updated.to_time if @updated.kind_of?(Date)
30
+ end
31
+
32
+ # Returns the timestamp value for rendere
33
+ #
34
+ def lastmod_value
35
+ if @updated.kind_of?(Time)
36
+ @updated.utc.iso8601
37
+ else
38
+ @updated.to_s
39
+ end
40
+ end
41
+ end
42
+ end
@@ -1,33 +1,25 @@
1
- module XmlSitemap
2
- class Item
3
- DEFAULT_PRIORITY = 0.5
4
-
5
- attr_reader :target, :updated, :priority, :changefreq
6
-
7
- def initialize(target, opts={})
8
- @target = target.to_s.strip
9
- @updated = opts[:updated] || Time.now
10
- @priority = opts[:priority] || DEFAULT_PRIORITY
11
- @changefreq = opts[:period] || :weekly
12
-
13
- # allow only date or time object
14
- unless @updated.kind_of?(Time) || @updated.kind_of?(Date)
15
- raise ArgumentError, "Time or Date required for :updated!"
16
- end
17
-
18
- # use full time and date only!
19
- @updated = @updated.to_time if @updated.kind_of?(Date)
20
- end
21
- end
22
-
1
+ module XmlSitemap
23
2
  class Map
3
+ include XmlSitemap::RenderEngine
4
+
24
5
  attr_reader :domain, :items
25
6
  attr_reader :buffer
26
7
  attr_reader :created_at
27
8
  attr_reader :root
28
9
  attr_reader :group
29
10
 
30
- # Creates new Map class for specified domain
11
+ # Initializa a new Map instance
12
+ #
13
+ # domain - Primary domain for the map (required)
14
+ # opts - Map options
15
+ #
16
+ # opts[:home] - Automatic homepage creation. To disable set to false. (default: true)
17
+ # opts[:secure] - Force HTTPS for all items. (default: false)
18
+ # opts[:time] - Set default lastmod timestamp for items (default: current time)
19
+ # opts[:group] - Group name for sitemap index. (default: sitemap)
20
+ # opts[:root] - Force all links to fall under the main domain.
21
+ # You can add full urls (not paths) if set to false. (default: true)
22
+ #
31
23
  def initialize(domain, opts={})
32
24
  @domain = domain.to_s.strip
33
25
  raise ArgumentError, 'Domain required!' if @domain.empty?
@@ -44,15 +36,18 @@ module XmlSitemap
44
36
  yield self if block_given?
45
37
  end
46
38
 
47
- # Yields Map class for easier access
48
- def generate
49
- raise ArgumentError, 'Block required' unless block_given?
50
- yield self
51
- end
52
-
53
- # Add new item to sitemap list
39
+ # Adds a new item to the map
40
+ #
41
+ # target - Path or url
42
+ # opts - Item options
43
+ #
44
+ # opts[:updated] - Lastmod property of the item
45
+ # opts[:period] - Update frequency. (default - :weekly)
46
+ # opts[:priority] - Item priority. (default: 0.5)
47
+ # opts[:validate_time] - Skip time validation if want to insert raw strings.
48
+ #
54
49
  def add(target, opts={})
55
- raise RuntimeError, 'Only less than 50k records allowed!' if @items.size >= 50000
50
+ raise RuntimeError, 'Only up to 50k records allowed!' if @items.size > 50000
56
51
  raise ArgumentError, 'Target required!' if target.nil?
57
52
  raise ArgumentError, 'Target is empty!' if target.to_s.strip.empty?
58
53
 
@@ -69,42 +64,47 @@ module XmlSitemap
69
64
  end
70
65
 
71
66
  # Get map items count
67
+ #
72
68
  def size
73
69
  @items.size
74
70
  end
75
71
 
76
72
  # Returns true if sitemap does not have any items
73
+ #
77
74
  def empty?
78
75
  @items.empty?
79
76
  end
80
77
 
81
78
  # Generate full url for path
79
+ #
82
80
  def url(path='')
83
81
  "#{@secure ? 'https' : 'http'}://#{@domain}#{path}"
84
82
  end
85
83
 
86
84
  # Get full url for index
85
+ #
87
86
  def index_url(offset)
88
87
  "http://#{@domain}/#{@group}-#{offset}.xml"
89
88
  end
90
89
 
91
90
  # Render XML
92
- def render
93
- xml = Builder::XmlMarkup.new(:indent => 2)
94
- xml.instruct!(:xml, :version => '1.0', :encoding => 'UTF-8')
95
- xml.urlset(XmlSitemap::MAP_SCHEMA_OPTIONS) { |s|
96
- @items.each do |item|
97
- s.url do |u|
98
- u.loc item.target
99
- u.lastmod item.updated.utc.iso8601
100
- u.changefreq item.changefreq.to_s
101
- u.priority item.priority.to_s
102
- end
103
- end
104
- }.to_s
91
+ #
92
+ # method - Pick a render engine (:builder, :nokogiri, :string).
93
+ # Default is :string
94
+ #
95
+ def render(method = :string)
96
+ case method
97
+ when :nokogiri
98
+ render_nokogiri
99
+ when :builder
100
+ render_builder
101
+ else
102
+ render_string
103
+ end
105
104
  end
106
105
 
107
106
  # Render XML sitemap into the file
107
+ #
108
108
  def render_to(path, options={})
109
109
  overwrite = options[:overwrite] == true || true
110
110
  compress = options[:gzip] == true || false
@@ -130,6 +130,7 @@ module XmlSitemap
130
130
  protected
131
131
 
132
132
  # Process target path or url
133
+ #
133
134
  def process_target(str)
134
135
  if @root == true
135
136
  url(str =~ /^\// ? str : "/#{str}")
@@ -0,0 +1,70 @@
1
+ module XmlSitemap
2
+ module RenderEngine
3
+ private
4
+
5
+ # Render with Nokogiri gem
6
+ #
7
+ def render_nokogiri
8
+ unless defined? Nokogiri
9
+ raise ArgumentError, "Nokogiri not found!"
10
+ end
11
+ builder = Nokogiri::XML::Builder.new(:encoding => "UTF-8") do |xml|
12
+ xml.urlset(XmlSitemap::MAP_SCHEMA_OPTIONS) { |s|
13
+ @items.each do |item|
14
+ s.url do |u|
15
+ u.loc item.target
16
+ u.lastmod item.lastmod_value
17
+ u.changefreq item.changefreq.to_s
18
+ u.priority item.priority.to_s
19
+ end
20
+ end
21
+ }
22
+ end
23
+ builder.to_xml
24
+ end
25
+
26
+ # Render with Builder gem
27
+ #
28
+ def render_bulder
29
+ xml = Builder::XmlMarkup.new(:indent => 2)
30
+ xml.instruct!(:xml, :version => '1.0', :encoding => 'UTF-8')
31
+ xml.urlset(XmlSitemap::MAP_SCHEMA_OPTIONS) { |s|
32
+ @items.each do |item|
33
+ s.url do |u|
34
+ u.loc item.target
35
+ u.lastmod item.lastmod_value
36
+ u.changefreq item.changefreq.to_s
37
+ u.priority item.priority.to_s
38
+ end
39
+ end
40
+ }.to_s
41
+ end
42
+
43
+ # Render with plain strings
44
+ #
45
+ def render_string
46
+ result = '<?xml version="1.0" encoding="UTF-8"?>' + "\n<urlset"
47
+
48
+ XmlSitemap::MAP_SCHEMA_OPTIONS.each do |key, val|
49
+ result += ' ' + key + '="' + val + '"'
50
+ end
51
+
52
+ result += ">\n"
53
+
54
+ item_results = []
55
+ @items.each do |item|
56
+ item_string = " <url>\n"
57
+ item_string += " <loc>#{CGI::escapeHTML(item.target)}</loc>\n"
58
+ item_string += " <lastmod>#{item.lastmod_value}</lastmod>\n"
59
+ item_string += " <changefreq>#{item.changefreq}</changefreq>\n"
60
+ item_string += " <priority>#{item.priority}</priority>\n"
61
+ item_string += " </url>\n"
62
+
63
+ item_results << item_string
64
+ end
65
+
66
+ result = result + item_results.join("") + "</urlset>\n"
67
+ result
68
+ end
69
+ end
70
+ end
@@ -1,3 +1,3 @@
1
1
  module XmlSitemap
2
- VERSION = '1.1.3'
2
+ VERSION = '1.2.0'
3
3
  end
data/spec/item_spec.rb ADDED
@@ -0,0 +1,8 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'XmlSitemap::Item' do
4
+ it 'should raise ArgumentError if invalid :period value was passed' do
5
+ proc { XmlSitemap::Item.new('hello', :period => :foobar) }.
6
+ should raise_error ArgumentError, "Invalid :period value 'foobar'"
7
+ end
8
+ end
data/spec/map_spec.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require 'benchmark'
1
2
  require 'spec_helper'
2
3
 
3
4
  describe XmlSitemap::Map do
@@ -52,10 +53,43 @@ describe XmlSitemap::Map do
52
53
  map.add('world', :updated => @extra_time).updated.should == Time.gm(2011, 7, 1, 0, 0, 1)
53
54
  end
54
55
 
56
+ it 'should help me test performance' do
57
+ pending "comment this line to run benchmarks, takes roughly 30 seconds"
58
+ map = XmlSitemap::Map.new('foobar.com', :time => @base_time)
59
+ 50000.times do |i|
60
+ map.add("hello#{i}")
61
+ end
62
+
63
+ Benchmark.bm do |x|
64
+ x.report("render(:builder)") { map.render(:builder) }
65
+ x.report("render(:nokogiri)") { map.render(:nokogiri) }
66
+ x.report("render(:string)") { map.render(:string) }
67
+ end
68
+ end
69
+
55
70
  it 'should raise Argument error if no time or date were provided' do
71
+ map = XmlSitemap::Map.new('foobar.com', :time => @base_time)
72
+ proc { map.add('hello', :updated => 5) }.
73
+ should raise_error ArgumentError, "Time, Date, or ISO8601 String required for :updated!"
74
+ end
75
+
76
+ it 'should not raise Argument error if a iso8601 string is provided' do
77
+ map = XmlSitemap::Map.new('foobar.com', :time => @base_time)
78
+ proc { map.add('hello', :updated => "2011-09-12T23:18:49Z") }.
79
+ should_not raise_error
80
+ map.add('world', :updated => @extra_time.utc.iso8601).updated.should == Time.gm(2011, 7, 1, 0, 0, 1).utc.iso8601
81
+ end
82
+
83
+ it 'should not raise Argument error if a string is provided with :validate_time => false' do
84
+ map = XmlSitemap::Map.new('foobar.com', :time => @base_time)
85
+ proc { map.add('hello', :validate_time => false, :updated => 'invalid data') }.
86
+ should_not raise_error
87
+ end
88
+
89
+ it 'should raise Argument error if an invalid string is provided' do
56
90
  map = XmlSitemap::Map.new('foobar.com', :time => @base_time)
57
91
  proc { map.add('hello', :updated => 'invalid data') }.
58
- should raise_error ArgumentError, "Time or Date required for :updated!"
92
+ should raise_error ArgumentError, "String provided to :updated did not match ISO8601 standard!"
59
93
  end
60
94
 
61
95
  it 'should have properly encoded entities' do
@@ -64,11 +98,27 @@ describe XmlSitemap::Map do
64
98
  map.render.should == fixture('encoded_map.xml')
65
99
  end
66
100
 
101
+ it 'should have properly encoded entities using nokogiri render' do
102
+ map = XmlSitemap::Map.new('foobar.com', :time => @base_time)
103
+ map.add('/path?a=b&c=d&e=sample string')
104
+ s = map.render(:nokogiri)
105
+ # ignore ordering of urlset attributes by dropping first two lines
106
+ s.split("\n")[2..-1].join("\n").should == fixture('encoded_map.xml').split("\n")[2..-1].join("\n")
107
+ end
108
+
109
+ it 'should have properly encoded entities using string render' do
110
+ map = XmlSitemap::Map.new('foobar.com', :time => @base_time)
111
+ map.add('/path?a=b&c=d&e=sample string')
112
+ s = map.render(:string)
113
+ # ignore ordering of urlset attributes by dropping first two lines
114
+ s.split("\n")[2..-1].join("\n").should == fixture('encoded_map.xml').split("\n")[2..-1].join("\n")
115
+ end
116
+
67
117
  it 'should not allow more than 50k records' do
68
118
  map = XmlSitemap::Map.new('foobar.com')
69
119
  proc {
70
- 1.upto(50000) { |i| map.add("url#{i}") }
71
- }.should raise_error RuntimeError, 'Only less than 50k records allowed!'
120
+ 1.upto(50001) { |i| map.add("url#{i}") }
121
+ }.should raise_error RuntimeError, 'Only up to 50k records allowed!'
72
122
  end
73
123
 
74
124
  it 'should not allow urls longer than 2048 characters' do
@@ -120,4 +170,4 @@ describe XmlSitemap::Map do
120
170
  File.delete(path) if File.exists?(path)
121
171
  File.delete(path_gzip) if File.exists?(path_gzip)
122
172
  end
123
- end
173
+ end
data/xml-sitemap.gemspec CHANGED
@@ -12,6 +12,7 @@ Gem::Specification.new do |s|
12
12
  s.add_development_dependency 'rake', '~> 0.8'
13
13
  s.add_development_dependency 'rspec', '~> 2.6'
14
14
  s.add_development_dependency 'simplecov', '~> 0.4'
15
+ s.add_development_dependency 'nokogiri', '~> 1.5.0'
15
16
 
16
17
  s.add_runtime_dependency 'builder', '>= 2.0'
17
18
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xml-sitemap
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.3
4
+ version: 1.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-08 00:00:00.000000000Z
12
+ date: 2011-09-14 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &2154904800 !ruby/object:Gem::Requirement
16
+ requirement: &2162591460 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.8'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2154904800
24
+ version_requirements: *2162591460
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &2154904300 !ruby/object:Gem::Requirement
27
+ requirement: &2162590440 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '2.6'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2154904300
35
+ version_requirements: *2162590440
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: simplecov
38
- requirement: &2154903840 !ruby/object:Gem::Requirement
38
+ requirement: &2162589300 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,21 @@ dependencies:
43
43
  version: '0.4'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2154903840
46
+ version_requirements: *2162589300
47
+ - !ruby/object:Gem::Dependency
48
+ name: nokogiri
49
+ requirement: &2162588020 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.5.0
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *2162588020
47
58
  - !ruby/object:Gem::Dependency
48
59
  name: builder
49
- requirement: &2154903380 !ruby/object:Gem::Requirement
60
+ requirement: &2162586640 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - ! '>='
@@ -54,7 +65,7 @@ dependencies:
54
65
  version: '2.0'
55
66
  type: :runtime
56
67
  prerelease: false
57
- version_requirements: *2154903380
68
+ version_requirements: *2162586640
58
69
  description: Provides a wrapper to generate XML sitemaps and sitemap indexes.
59
70
  email:
60
71
  - dan.sosedoff@gmail.com
@@ -70,8 +81,10 @@ files:
70
81
  - Rakefile
71
82
  - lib/xml-sitemap.rb
72
83
  - lib/xml-sitemap/index.rb
84
+ - lib/xml-sitemap/item.rb
73
85
  - lib/xml-sitemap/map.rb
74
86
  - lib/xml-sitemap/options.rb
87
+ - lib/xml-sitemap/render_engine.rb
75
88
  - lib/xml-sitemap/version.rb
76
89
  - spec/fixtures/empty_index.xml
77
90
  - spec/fixtures/encoded_map.xml
@@ -80,6 +93,7 @@ files:
80
93
  - spec/fixtures/saved_map.xml
81
94
  - spec/fixtures/simple_map.xml
82
95
  - spec/index_spec.rb
96
+ - spec/item_spec.rb
83
97
  - spec/map_spec.rb
84
98
  - spec/spec_helper.rb
85
99
  - spec/xmlsitemap_spec.rb
@@ -116,6 +130,7 @@ test_files:
116
130
  - spec/fixtures/saved_map.xml
117
131
  - spec/fixtures/simple_map.xml
118
132
  - spec/index_spec.rb
133
+ - spec/item_spec.rb
119
134
  - spec/map_spec.rb
120
135
  - spec/spec_helper.rb
121
136
  - spec/xmlsitemap_spec.rb