xml-sitemap 1.1.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -114,6 +114,11 @@ index.render
114
114
  index.render_to('/path/to/file.xml')
115
115
  ```
116
116
 
117
+ ## Authors & Contributors
118
+
119
+ - [Dan Sosedoff](https://github.com/sosedoff) (author)
120
+ - [Dan Healy](https://github.com/danhealy)
121
+
117
122
  ## License
118
123
 
119
124
  Copyright © 2010-2011 Dan Sosedoff.
data/lib/xml-sitemap.rb CHANGED
@@ -2,8 +2,14 @@ require 'time'
2
2
  require 'date'
3
3
  require 'zlib'
4
4
  require 'builder'
5
+ begin
6
+ require 'nokogiri'
7
+ rescue LoadError
8
+ end
5
9
 
6
10
  require 'xml-sitemap/options'
11
+ require 'xml-sitemap/render_engine'
12
+ require 'xml-sitemap/item'
7
13
  require 'xml-sitemap/map'
8
14
  require 'xml-sitemap/index'
9
15
 
@@ -0,0 +1,42 @@
1
+ module XmlSitemap
2
+ class Item
3
+ DEFAULT_PRIORITY = 0.5
4
+
5
+ # ISO8601 regex from here: http://www.pelagodesign.com/blog/2009/05/20/iso-8601-date-validation-that-doesnt-suck/
6
+ ISO8601_REGEX = /^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$/
7
+
8
+ attr_reader :target, :updated, :priority, :changefreq, :validate_time
9
+
10
+ def initialize(target, opts={})
11
+ @target = target.to_s.strip
12
+ @updated = opts[:updated] || Time.now
13
+ @priority = opts[:priority] || DEFAULT_PRIORITY
14
+ @changefreq = opts[:period] || :weekly
15
+ @validate_time = (opts[:validate_time] != false)
16
+
17
+ unless @updated.kind_of?(Time) || @updated.kind_of?(Date) || @updated.kind_of?(String)
18
+ raise ArgumentError, "Time, Date, or ISO8601 String required for :updated!"
19
+ end
20
+
21
+ if @validate_time && @updated.kind_of?(String) && !(@updated =~ ISO8601_REGEX)
22
+ raise ArgumentError, "String provided to :updated did not match ISO8601 standard!"
23
+ end
24
+
25
+ unless XmlSitemap::PERIODS.include?(@changefreq)
26
+ raise ArgumentError, "Invalid :period value '#{@changefreq}'"
27
+ end
28
+
29
+ @updated = @updated.to_time if @updated.kind_of?(Date)
30
+ end
31
+
32
+ # Returns the timestamp value for rendere
33
+ #
34
+ def lastmod_value
35
+ if @updated.kind_of?(Time)
36
+ @updated.utc.iso8601
37
+ else
38
+ @updated.to_s
39
+ end
40
+ end
41
+ end
42
+ end
@@ -1,33 +1,25 @@
1
- module XmlSitemap
2
- class Item
3
- DEFAULT_PRIORITY = 0.5
4
-
5
- attr_reader :target, :updated, :priority, :changefreq
6
-
7
- def initialize(target, opts={})
8
- @target = target.to_s.strip
9
- @updated = opts[:updated] || Time.now
10
- @priority = opts[:priority] || DEFAULT_PRIORITY
11
- @changefreq = opts[:period] || :weekly
12
-
13
- # allow only date or time object
14
- unless @updated.kind_of?(Time) || @updated.kind_of?(Date)
15
- raise ArgumentError, "Time or Date required for :updated!"
16
- end
17
-
18
- # use full time and date only!
19
- @updated = @updated.to_time if @updated.kind_of?(Date)
20
- end
21
- end
22
-
1
+ module XmlSitemap
23
2
  class Map
3
+ include XmlSitemap::RenderEngine
4
+
24
5
  attr_reader :domain, :items
25
6
  attr_reader :buffer
26
7
  attr_reader :created_at
27
8
  attr_reader :root
28
9
  attr_reader :group
29
10
 
30
- # Creates new Map class for specified domain
11
+ # Initializa a new Map instance
12
+ #
13
+ # domain - Primary domain for the map (required)
14
+ # opts - Map options
15
+ #
16
+ # opts[:home] - Automatic homepage creation. To disable set to false. (default: true)
17
+ # opts[:secure] - Force HTTPS for all items. (default: false)
18
+ # opts[:time] - Set default lastmod timestamp for items (default: current time)
19
+ # opts[:group] - Group name for sitemap index. (default: sitemap)
20
+ # opts[:root] - Force all links to fall under the main domain.
21
+ # You can add full urls (not paths) if set to false. (default: true)
22
+ #
31
23
  def initialize(domain, opts={})
32
24
  @domain = domain.to_s.strip
33
25
  raise ArgumentError, 'Domain required!' if @domain.empty?
@@ -44,15 +36,18 @@ module XmlSitemap
44
36
  yield self if block_given?
45
37
  end
46
38
 
47
- # Yields Map class for easier access
48
- def generate
49
- raise ArgumentError, 'Block required' unless block_given?
50
- yield self
51
- end
52
-
53
- # Add new item to sitemap list
39
+ # Adds a new item to the map
40
+ #
41
+ # target - Path or url
42
+ # opts - Item options
43
+ #
44
+ # opts[:updated] - Lastmod property of the item
45
+ # opts[:period] - Update frequency. (default - :weekly)
46
+ # opts[:priority] - Item priority. (default: 0.5)
47
+ # opts[:validate_time] - Skip time validation if want to insert raw strings.
48
+ #
54
49
  def add(target, opts={})
55
- raise RuntimeError, 'Only less than 50k records allowed!' if @items.size >= 50000
50
+ raise RuntimeError, 'Only up to 50k records allowed!' if @items.size > 50000
56
51
  raise ArgumentError, 'Target required!' if target.nil?
57
52
  raise ArgumentError, 'Target is empty!' if target.to_s.strip.empty?
58
53
 
@@ -69,42 +64,47 @@ module XmlSitemap
69
64
  end
70
65
 
71
66
  # Get map items count
67
+ #
72
68
  def size
73
69
  @items.size
74
70
  end
75
71
 
76
72
  # Returns true if sitemap does not have any items
73
+ #
77
74
  def empty?
78
75
  @items.empty?
79
76
  end
80
77
 
81
78
  # Generate full url for path
79
+ #
82
80
  def url(path='')
83
81
  "#{@secure ? 'https' : 'http'}://#{@domain}#{path}"
84
82
  end
85
83
 
86
84
  # Get full url for index
85
+ #
87
86
  def index_url(offset)
88
87
  "http://#{@domain}/#{@group}-#{offset}.xml"
89
88
  end
90
89
 
91
90
  # Render XML
92
- def render
93
- xml = Builder::XmlMarkup.new(:indent => 2)
94
- xml.instruct!(:xml, :version => '1.0', :encoding => 'UTF-8')
95
- xml.urlset(XmlSitemap::MAP_SCHEMA_OPTIONS) { |s|
96
- @items.each do |item|
97
- s.url do |u|
98
- u.loc item.target
99
- u.lastmod item.updated.utc.iso8601
100
- u.changefreq item.changefreq.to_s
101
- u.priority item.priority.to_s
102
- end
103
- end
104
- }.to_s
91
+ #
92
+ # method - Pick a render engine (:builder, :nokogiri, :string).
93
+ # Default is :string
94
+ #
95
+ def render(method = :string)
96
+ case method
97
+ when :nokogiri
98
+ render_nokogiri
99
+ when :builder
100
+ render_builder
101
+ else
102
+ render_string
103
+ end
105
104
  end
106
105
 
107
106
  # Render XML sitemap into the file
107
+ #
108
108
  def render_to(path, options={})
109
109
  overwrite = options[:overwrite] == true || true
110
110
  compress = options[:gzip] == true || false
@@ -130,6 +130,7 @@ module XmlSitemap
130
130
  protected
131
131
 
132
132
  # Process target path or url
133
+ #
133
134
  def process_target(str)
134
135
  if @root == true
135
136
  url(str =~ /^\// ? str : "/#{str}")
@@ -0,0 +1,70 @@
1
+ module XmlSitemap
2
+ module RenderEngine
3
+ private
4
+
5
+ # Render with Nokogiri gem
6
+ #
7
+ def render_nokogiri
8
+ unless defined? Nokogiri
9
+ raise ArgumentError, "Nokogiri not found!"
10
+ end
11
+ builder = Nokogiri::XML::Builder.new(:encoding => "UTF-8") do |xml|
12
+ xml.urlset(XmlSitemap::MAP_SCHEMA_OPTIONS) { |s|
13
+ @items.each do |item|
14
+ s.url do |u|
15
+ u.loc item.target
16
+ u.lastmod item.lastmod_value
17
+ u.changefreq item.changefreq.to_s
18
+ u.priority item.priority.to_s
19
+ end
20
+ end
21
+ }
22
+ end
23
+ builder.to_xml
24
+ end
25
+
26
+ # Render with Builder gem
27
+ #
28
+ def render_bulder
29
+ xml = Builder::XmlMarkup.new(:indent => 2)
30
+ xml.instruct!(:xml, :version => '1.0', :encoding => 'UTF-8')
31
+ xml.urlset(XmlSitemap::MAP_SCHEMA_OPTIONS) { |s|
32
+ @items.each do |item|
33
+ s.url do |u|
34
+ u.loc item.target
35
+ u.lastmod item.lastmod_value
36
+ u.changefreq item.changefreq.to_s
37
+ u.priority item.priority.to_s
38
+ end
39
+ end
40
+ }.to_s
41
+ end
42
+
43
+ # Render with plain strings
44
+ #
45
+ def render_string
46
+ result = '<?xml version="1.0" encoding="UTF-8"?>' + "\n<urlset"
47
+
48
+ XmlSitemap::MAP_SCHEMA_OPTIONS.each do |key, val|
49
+ result += ' ' + key + '="' + val + '"'
50
+ end
51
+
52
+ result += ">\n"
53
+
54
+ item_results = []
55
+ @items.each do |item|
56
+ item_string = " <url>\n"
57
+ item_string += " <loc>#{CGI::escapeHTML(item.target)}</loc>\n"
58
+ item_string += " <lastmod>#{item.lastmod_value}</lastmod>\n"
59
+ item_string += " <changefreq>#{item.changefreq}</changefreq>\n"
60
+ item_string += " <priority>#{item.priority}</priority>\n"
61
+ item_string += " </url>\n"
62
+
63
+ item_results << item_string
64
+ end
65
+
66
+ result = result + item_results.join("") + "</urlset>\n"
67
+ result
68
+ end
69
+ end
70
+ end
@@ -1,3 +1,3 @@
1
1
  module XmlSitemap
2
- VERSION = '1.1.3'
2
+ VERSION = '1.2.0'
3
3
  end
data/spec/item_spec.rb ADDED
@@ -0,0 +1,8 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'XmlSitemap::Item' do
4
+ it 'should raise ArgumentError if invalid :period value was passed' do
5
+ proc { XmlSitemap::Item.new('hello', :period => :foobar) }.
6
+ should raise_error ArgumentError, "Invalid :period value 'foobar'"
7
+ end
8
+ end
data/spec/map_spec.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require 'benchmark'
1
2
  require 'spec_helper'
2
3
 
3
4
  describe XmlSitemap::Map do
@@ -52,10 +53,43 @@ describe XmlSitemap::Map do
52
53
  map.add('world', :updated => @extra_time).updated.should == Time.gm(2011, 7, 1, 0, 0, 1)
53
54
  end
54
55
 
56
+ it 'should help me test performance' do
57
+ pending "comment this line to run benchmarks, takes roughly 30 seconds"
58
+ map = XmlSitemap::Map.new('foobar.com', :time => @base_time)
59
+ 50000.times do |i|
60
+ map.add("hello#{i}")
61
+ end
62
+
63
+ Benchmark.bm do |x|
64
+ x.report("render(:builder)") { map.render(:builder) }
65
+ x.report("render(:nokogiri)") { map.render(:nokogiri) }
66
+ x.report("render(:string)") { map.render(:string) }
67
+ end
68
+ end
69
+
55
70
  it 'should raise Argument error if no time or date were provided' do
71
+ map = XmlSitemap::Map.new('foobar.com', :time => @base_time)
72
+ proc { map.add('hello', :updated => 5) }.
73
+ should raise_error ArgumentError, "Time, Date, or ISO8601 String required for :updated!"
74
+ end
75
+
76
+ it 'should not raise Argument error if a iso8601 string is provided' do
77
+ map = XmlSitemap::Map.new('foobar.com', :time => @base_time)
78
+ proc { map.add('hello', :updated => "2011-09-12T23:18:49Z") }.
79
+ should_not raise_error
80
+ map.add('world', :updated => @extra_time.utc.iso8601).updated.should == Time.gm(2011, 7, 1, 0, 0, 1).utc.iso8601
81
+ end
82
+
83
+ it 'should not raise Argument error if a string is provided with :validate_time => false' do
84
+ map = XmlSitemap::Map.new('foobar.com', :time => @base_time)
85
+ proc { map.add('hello', :validate_time => false, :updated => 'invalid data') }.
86
+ should_not raise_error
87
+ end
88
+
89
+ it 'should raise Argument error if an invalid string is provided' do
56
90
  map = XmlSitemap::Map.new('foobar.com', :time => @base_time)
57
91
  proc { map.add('hello', :updated => 'invalid data') }.
58
- should raise_error ArgumentError, "Time or Date required for :updated!"
92
+ should raise_error ArgumentError, "String provided to :updated did not match ISO8601 standard!"
59
93
  end
60
94
 
61
95
  it 'should have properly encoded entities' do
@@ -64,11 +98,27 @@ describe XmlSitemap::Map do
64
98
  map.render.should == fixture('encoded_map.xml')
65
99
  end
66
100
 
101
+ it 'should have properly encoded entities using nokogiri render' do
102
+ map = XmlSitemap::Map.new('foobar.com', :time => @base_time)
103
+ map.add('/path?a=b&c=d&e=sample string')
104
+ s = map.render(:nokogiri)
105
+ # ignore ordering of urlset attributes by dropping first two lines
106
+ s.split("\n")[2..-1].join("\n").should == fixture('encoded_map.xml').split("\n")[2..-1].join("\n")
107
+ end
108
+
109
+ it 'should have properly encoded entities using string render' do
110
+ map = XmlSitemap::Map.new('foobar.com', :time => @base_time)
111
+ map.add('/path?a=b&c=d&e=sample string')
112
+ s = map.render(:string)
113
+ # ignore ordering of urlset attributes by dropping first two lines
114
+ s.split("\n")[2..-1].join("\n").should == fixture('encoded_map.xml').split("\n")[2..-1].join("\n")
115
+ end
116
+
67
117
  it 'should not allow more than 50k records' do
68
118
  map = XmlSitemap::Map.new('foobar.com')
69
119
  proc {
70
- 1.upto(50000) { |i| map.add("url#{i}") }
71
- }.should raise_error RuntimeError, 'Only less than 50k records allowed!'
120
+ 1.upto(50001) { |i| map.add("url#{i}") }
121
+ }.should raise_error RuntimeError, 'Only up to 50k records allowed!'
72
122
  end
73
123
 
74
124
  it 'should not allow urls longer than 2048 characters' do
@@ -120,4 +170,4 @@ describe XmlSitemap::Map do
120
170
  File.delete(path) if File.exists?(path)
121
171
  File.delete(path_gzip) if File.exists?(path_gzip)
122
172
  end
123
- end
173
+ end
data/xml-sitemap.gemspec CHANGED
@@ -12,6 +12,7 @@ Gem::Specification.new do |s|
12
12
  s.add_development_dependency 'rake', '~> 0.8'
13
13
  s.add_development_dependency 'rspec', '~> 2.6'
14
14
  s.add_development_dependency 'simplecov', '~> 0.4'
15
+ s.add_development_dependency 'nokogiri', '~> 1.5.0'
15
16
 
16
17
  s.add_runtime_dependency 'builder', '>= 2.0'
17
18
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xml-sitemap
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.3
4
+ version: 1.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-08 00:00:00.000000000Z
12
+ date: 2011-09-14 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &2154904800 !ruby/object:Gem::Requirement
16
+ requirement: &2162591460 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.8'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2154904800
24
+ version_requirements: *2162591460
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &2154904300 !ruby/object:Gem::Requirement
27
+ requirement: &2162590440 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '2.6'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2154904300
35
+ version_requirements: *2162590440
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: simplecov
38
- requirement: &2154903840 !ruby/object:Gem::Requirement
38
+ requirement: &2162589300 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,21 @@ dependencies:
43
43
  version: '0.4'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2154903840
46
+ version_requirements: *2162589300
47
+ - !ruby/object:Gem::Dependency
48
+ name: nokogiri
49
+ requirement: &2162588020 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.5.0
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *2162588020
47
58
  - !ruby/object:Gem::Dependency
48
59
  name: builder
49
- requirement: &2154903380 !ruby/object:Gem::Requirement
60
+ requirement: &2162586640 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - ! '>='
@@ -54,7 +65,7 @@ dependencies:
54
65
  version: '2.0'
55
66
  type: :runtime
56
67
  prerelease: false
57
- version_requirements: *2154903380
68
+ version_requirements: *2162586640
58
69
  description: Provides a wrapper to generate XML sitemaps and sitemap indexes.
59
70
  email:
60
71
  - dan.sosedoff@gmail.com
@@ -70,8 +81,10 @@ files:
70
81
  - Rakefile
71
82
  - lib/xml-sitemap.rb
72
83
  - lib/xml-sitemap/index.rb
84
+ - lib/xml-sitemap/item.rb
73
85
  - lib/xml-sitemap/map.rb
74
86
  - lib/xml-sitemap/options.rb
87
+ - lib/xml-sitemap/render_engine.rb
75
88
  - lib/xml-sitemap/version.rb
76
89
  - spec/fixtures/empty_index.xml
77
90
  - spec/fixtures/encoded_map.xml
@@ -80,6 +93,7 @@ files:
80
93
  - spec/fixtures/saved_map.xml
81
94
  - spec/fixtures/simple_map.xml
82
95
  - spec/index_spec.rb
96
+ - spec/item_spec.rb
83
97
  - spec/map_spec.rb
84
98
  - spec/spec_helper.rb
85
99
  - spec/xmlsitemap_spec.rb
@@ -116,6 +130,7 @@ test_files:
116
130
  - spec/fixtures/saved_map.xml
117
131
  - spec/fixtures/simple_map.xml
118
132
  - spec/index_spec.rb
133
+ - spec/item_spec.rb
119
134
  - spec/map_spec.rb
120
135
  - spec/spec_helper.rb
121
136
  - spec/xmlsitemap_spec.rb