massive_sitemap 2.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg
5
+ .rvmrc
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ rvm:
2
+ - 1.9.2
3
+ script: "bundle exec rake spec"
data/CHANGELOG.md ADDED
@@ -0,0 +1,28 @@
1
+ # Changes
2
+
3
+ ## vx.x.x - ???
4
+
5
+ ## v2.0.x - ???
6
+
7
+ * updated/fixed Ping
8
+ * updated Docu
9
+ * switch to writer chain
10
+ * add BigSitemap API
11
+
12
+ ## v2.0.0 - 13-02-2012
13
+ _inital release_
14
+
15
+ * restructured gem completely based on BigSitemap gem
16
+ * seperated logic in two major parts:
17
+ * Builder -> creates content
18
+ * Writer -> stores content
19
+ * added several implementations/specifiaction of builder/writer
20
+ * added generator for default setup
21
+ * added specs
22
+ * writer overwrite detection
23
+ * added Index generation
24
+ * don't init new writer all the time
25
+ * move inited status to writer
26
+ * move index build into indexer and resource handling/selection into writer
27
+ * manifest handling:
28
+ * moved Amazon S3 integration to [massive_sitemap-writer-s3](https://github.com/rngtng/massive_sitemap-writer-s3)
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in massive_sitemap.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,24 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ massive_sitemap (0.0.1)
5
+
6
+ GEM
7
+ remote: http://rubygems.org/
8
+ specs:
9
+ diff-lcs (1.1.3)
10
+ rspec (2.8.0)
11
+ rspec-core (~> 2.8.0)
12
+ rspec-expectations (~> 2.8.0)
13
+ rspec-mocks (~> 2.8.0)
14
+ rspec-core (2.8.0)
15
+ rspec-expectations (2.8.0)
16
+ diff-lcs (~> 1.1.2)
17
+ rspec-mocks (2.8.0)
18
+
19
+ PLATFORMS
20
+ ruby
21
+
22
+ DEPENDENCIES
23
+ massive_sitemap!
24
+ rspec
data/README.md ADDED
@@ -0,0 +1,15 @@
1
+ # MassiveSitemap
2
+
3
+ [![](http://travis-ci.org/rngtng/massive_sitemap.png)](http://travis-ci.org/rngtng/massive_sitemap)
4
+
5
+ Build painfree sitemaps for websites with millions of pages
6
+
7
+ MassiveSitemap is a successor project of [BigSitemap](https://github.com/alexrabarts/big_sitemap), a [Sitemap](http://sitemaps.org) generator for websites with millions of pages.
8
+ It implements various generation stategies, e.g. to split large Sitemaps into multiple files, gzip files to minimize bandwidth usage, or incremental updates. It offers API is very similar to _BigSitemap_ and therefor can be set up with just a few lines of code and is compatible with just about any framework.
9
+
10
+ ## Usage
11
+
12
+ * clear structure
13
+ * allows extension (S3)
14
+
15
+ MassiveSitemap - build huge sitemaps painfree. Differential updates keeps generation time short and reduces load on DB. It's heavealy inspired by BigSitemaps and offers compatiable API
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require 'bundler/gem_tasks'
2
+
3
+ require 'rspec/core/rake_task'
4
+ RSpec::Core::RakeTask.new(:spec) do |t|
5
+ t.pattern = "./spec/**/*_spec.rb" # don't need this, it's default.
6
+ end
7
+
8
+ task :default => :spec
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 2.0.0.rc1
@@ -0,0 +1,104 @@
1
+ module MassiveSitemap
2
+ module Builder
3
+
4
+ class Base
5
+ OPTS = {
6
+ :base_url => nil,
7
+ :indent_by => 2
8
+ }
9
+
10
+ HEADER_NAME = 'urlset'
11
+ HEADER_ATTRIBUTES = {
12
+ 'xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9',
13
+ 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
14
+ 'xsi:schemaLocation' => "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
15
+ }
16
+
17
+ attr_reader :options
18
+
19
+ def initialize(writer, options = {}, &block)
20
+ @writer = writer
21
+ @options = OPTS.merge(options)
22
+ @opened_tags = []
23
+
24
+ if block
25
+ instance_eval(&block)
26
+ close!
27
+ end
28
+ end
29
+
30
+ def self.generate(writer, options = {}, &block)
31
+ self.new(writer, options, &block)
32
+ end
33
+
34
+ def add(path, attrs = {})
35
+ add_url! File.join(base_url, path), attrs
36
+ rescue MassiveSitemap::Writer::File::FileExistsException => e
37
+ end
38
+
39
+ def init!(&block)
40
+ unless @writer.inited?
41
+ @writer.init!
42
+ header!(&block)
43
+ end
44
+ end
45
+
46
+ def close!(indent = true)
47
+ if name = @opened_tags.pop
48
+ @writer.print "\n" + ' ' * options[:indent_by] * @opened_tags.size if indent
49
+ @writer.print "</#{name}>"
50
+ if @opened_tags.size == 0
51
+ @writer.close!
52
+ true
53
+ end
54
+ end
55
+ end
56
+
57
+ private
58
+ def header!(&block)
59
+ @writer.print '<?xml version="1.0" encoding="UTF-8"?>'
60
+ tag! self.class::HEADER_NAME, self.class::HEADER_ATTRIBUTES, &block
61
+ end
62
+
63
+ def add_url!(location, attrs = {})
64
+ init!
65
+
66
+ tag! 'url' do
67
+ tag! 'loc', location
68
+ tag! 'lastmod', attrs[:last_modified].utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if attrs[:last_modified]
69
+ tag! 'changefreq', attrs[:change_frequency] if attrs[:change_frequency]
70
+ tag! 'priority', attrs[:priority].to_s if attrs[:priority]
71
+ end
72
+ end
73
+
74
+ def tag!(name, content = nil, attrs = {}, &block)
75
+ attrs = content if content.is_a? Hash
76
+ open!(name, attrs)
77
+ if content.is_a? String
78
+ @writer.print content.gsub('&', '&amp;')
79
+ close!(false)
80
+ else
81
+ if block
82
+ instance_eval(&block)
83
+ close!
84
+ end
85
+ end
86
+ end
87
+
88
+ def open!(name, attrs = {})
89
+ attrs = attrs.map { |attr, value| %Q( #{attr}="#{value}") }.join('')
90
+ @writer.print "\n" + ' ' * options[:indent_by] * @opened_tags.size
91
+ @opened_tags << name
92
+ @writer.print "<#{name}#{attrs}>"
93
+ end
94
+
95
+ private
96
+ def base_url
97
+ schema, host = @options[:base_url].scan(/^(https?:\/\/)?(.+?)\/?$/).flatten
98
+ "#{schema || 'http://'}#{host}/"
99
+ rescue
100
+ ""
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,29 @@
1
+ require "massive_sitemap/builder/base"
2
+
3
+ module MassiveSitemap
4
+ module Builder
5
+ class Index < Base
6
+ HEADER_NAME = 'sitemapindex'
7
+ HEADER_ATTRIBUTES = {
8
+ :xmlns => 'http://www.sitemaps.org/schemas/sitemap/0.9'
9
+ }
10
+
11
+ def initialize(writer, options = {}, &block)
12
+ super(writer, options) do
13
+ writer.each do |path, last_modified|
14
+ add path, :last_modified => last_modified
15
+ end
16
+ end
17
+ end
18
+
19
+ def add_url!(location, attrs = {})
20
+ init!
21
+
22
+ tag! 'sitemap' do
23
+ tag! 'loc', location
24
+ tag! 'lastmod', attrs[:last_modified].utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if attrs[:last_modified]
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,53 @@
1
+ require "massive_sitemap/builder/base"
2
+ # writer only has print and puts as interface
3
+
4
+ module MassiveSitemap
5
+ module Builder
6
+ class Rotating < Base
7
+ NUM_URLS = 1..50_000
8
+
9
+ def initialize(writer, options = {}, &block)
10
+ @max_urls = options[:max_per_sitemap] || NUM_URLS.max
11
+ @rotations = 0
12
+ @urls = 0
13
+
14
+ unless NUM_URLS.member?(@max_urls)
15
+ raise ArgumentError, %Q(":max_per_sitemap" must be greater than #{NUM_URLS.min} and smaller than #{NUM_URLS.max})
16
+ end
17
+
18
+ super
19
+ end
20
+
21
+ # On rotation, close current file, and reopen a new one
22
+ # with same file name but -<counter> appendend
23
+ def init!(&block)
24
+ unless @writer.inited?
25
+ @urls = 0
26
+ filename = filename_with_rotation(@writer.options[:filename], @rotations)
27
+ @rotations += 1
28
+ @writer.init! :filename => filename
29
+ header!(&block)
30
+ end
31
+ end
32
+
33
+ def add_url!(location, attrs = {})
34
+ if @urls >= @max_urls
35
+ close!
36
+ end
37
+ super
38
+ @urls += 1
39
+ end
40
+
41
+ private
42
+ def filename_with_rotation(filename, rotation = nil)
43
+ filename, _, ext = split_filename(filename)
44
+ rotation = (rotation.to_i > 0) ? "-#{rotation}" : nil
45
+ [filename, rotation, ext].join
46
+ end
47
+
48
+ def split_filename(filename)
49
+ filename.to_s.scan(/^([^.]*?)(-[0-9]+)?(\..+)?$/).flatten
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,12 @@
1
+ require "massive_sitemap/builder/base"
2
+
3
+ module MassiveSitemap
4
+ module Builder
5
+
6
+ def new(writer, options = {}, &block)
7
+ Base.new(writer, options, &block)
8
+ end
9
+ module_function :new
10
+
11
+ end
12
+ end
@@ -0,0 +1,21 @@
1
+ module MassiveSitemap
2
+ class Ping
3
+ PING = {
4
+ :google => 'http://www.google.comwebmasters/tools/ping?sitemap=%s';
5
+ :bing => 'http://www.bing.com/webmaster/ping.aspx?siteMap=%s',
6
+ :ask => 'http://submissions.ask.com/ping?sitemap=%s'
7
+ }
8
+
9
+ def self.ping_search_engines(sitemap_uri, engines = [])
10
+ require 'net/http'
11
+ require 'uri'
12
+ require 'cgi'
13
+
14
+ sitemap_uri = CGI::escape(sitemap_uri)
15
+
16
+ Array(engines).each do |engine_url|
17
+ Net::HTTP.get URI.parse(engine_url % sitemap_uri)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,60 @@
1
+
2
+ module MassiveSitemap
3
+ module Writer
4
+ class Base
5
+ OPTS = {}
6
+
7
+ attr_reader :options
8
+
9
+ def initialize(options = {})
10
+ @options = self.class::OPTS.merge(options)
11
+ @stream = nil
12
+ end
13
+
14
+ # Interface
15
+ def open_stream
16
+ @string ||= StringIO.new
17
+ end
18
+
19
+ def close_stream(stream)
20
+ end
21
+
22
+ def init?
23
+ true
24
+ end
25
+
26
+ def streams
27
+ []
28
+ end
29
+
30
+ # API
31
+ def init!(options = {})
32
+ close!
33
+ @options.merge!(options)
34
+ if init?
35
+ @stream = open_stream
36
+ end
37
+ end
38
+
39
+ def close!
40
+ if inited?
41
+ close_stream(@stream)
42
+ @stream = nil
43
+ end
44
+ end
45
+
46
+ def inited?
47
+ @stream
48
+ end
49
+
50
+ def print(string)
51
+ @stream.print(string) if inited?
52
+ end
53
+
54
+ def each(&block)
55
+ streams.each(&block)
56
+ end
57
+ end
58
+
59
+ end
60
+ end
@@ -0,0 +1,59 @@
1
+ require 'fileutils'
2
+ require "massive_sitemap/writer/base"
3
+
4
+ # Write into File
5
+
6
+ module MassiveSitemap
7
+ module Writer
8
+ class File < Base
9
+
10
+ class FileExistsException < IOError; end
11
+
12
+ OPTS = {
13
+ :document_full => '.',
14
+ :force_overwrite => false,
15
+ :filename => "sitemap.xml",
16
+ :index_filename => "sitemap_index.xml",
17
+ }
18
+
19
+ def open_stream
20
+ ::File.open(tmp_filename, 'w:ASCII-8BIT')
21
+ end
22
+
23
+ def close_stream(stream)
24
+ stream.close
25
+ # Move from tmp_file into acutal file
26
+ ::File.delete(filename) if ::File.exists?(filename)
27
+ ::File.rename(tmp_filename, filename)
28
+ end
29
+
30
+ def init?
31
+ if !options[:force_overwrite] && ::File.exists?(filename)
32
+ raise FileExistsException, "Can not create file: #{filename} exits"
33
+ end
34
+ true
35
+ end
36
+
37
+ def streams
38
+ files.map do |path|
39
+ next if path.include?(options[:index_filename])
40
+ [::File.basename(path), ::File.stat(path).mtime]
41
+ end.compact
42
+ end
43
+
44
+ private
45
+ def filename
46
+ ::File.join options[:document_full], options[:filename]
47
+ end
48
+
49
+ def tmp_filename
50
+ filename + ".tmp"
51
+ end
52
+
53
+ def files
54
+ Dir[::File.join(options[:document_full], "*.xml")]
55
+ end
56
+ end
57
+
58
+ end
59
+ end
@@ -0,0 +1,24 @@
1
+ require 'zlib'
2
+
3
+ require "massive_sitemap/writer/file"
4
+ # Write into GZipped File
5
+
6
+ module MassiveSitemap
7
+ module Writer
8
+
9
+ class GzipFile < File
10
+ def open_stream
11
+ ::Zlib::GzipWriter.new(super)
12
+ end
13
+
14
+ private
15
+ def filename
16
+ super + ".gz"
17
+ end
18
+
19
+ def files
20
+ Dir[::File.join(options[:document_full], "*.xml.gz")]
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,31 @@
1
+ require 'zlib'
2
+
3
+ require "massive_sitemap/writer/file"
4
+ # Create Lock before writing to file
5
+
6
+ module MassiveSitemap
7
+ module Writer
8
+
9
+ class LockingFile < File
10
+ LOCK_FILE = 'generator.lock'
11
+
12
+ def open_stream
13
+ ::File.open(LOCK_FILE, 'w', ::File::EXCL) #lock!
14
+ super
15
+ end
16
+
17
+ def close_stream(stream)
18
+ super
19
+ FileUtils.rm(LOCK_FILE) #unlock!
20
+ end
21
+
22
+ def init?
23
+ if ::File.exists?(LOCK_FILE)
24
+ raise Errno::EACCES
25
+ end
26
+ super
27
+ end
28
+ end
29
+
30
+ end
31
+ end
@@ -0,0 +1,29 @@
1
+ require 'stringio'
2
+ require "massive_sitemap/writer/base"
3
+
4
+ # Write into String
5
+ # Perfect for testing porpuses
6
+ module MassiveSitemap
7
+ module Writer
8
+
9
+ class String < Base
10
+
11
+ def open_stream
12
+ @string ||= StringIO.new
13
+ end
14
+
15
+ def to_s
16
+ @string.string rescue ""
17
+ end
18
+
19
+ def ==(other_string)
20
+ to_s == other_string
21
+ end
22
+
23
+ def include?(other_string)
24
+ to_s.include?(other_string)
25
+ end
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,52 @@
1
+ require 'massive_sitemap/writer/file'
2
+ require 'massive_sitemap/writer/gzip_file'
3
+ require 'massive_sitemap/builder/rotating'
4
+ require 'massive_sitemap/builder/index'
5
+
6
+ # Page at -> <base_url>
7
+ # http://example.de/dir/
8
+
9
+ # Index at
10
+ # http://sitemap.example.de/index-dir/
11
+
12
+ # Save at -> <document_full>
13
+ # /root/dir/ -> <document_root>/<document_path>
14
+
15
+ module MassiveSitemap
16
+ DEFAULTS = {
17
+ # global
18
+ :index_base_url => nil,
19
+ :gzip => false,
20
+ :writer => MassiveSitemap::Writer::File,
21
+
22
+ # writer
23
+ :document_full => '.',
24
+ :force_overwrite => false,
25
+ :filename => "sitemap.xml",
26
+ :index_filename => "sitemap_index.xml",
27
+
28
+ # builder
29
+ :base_url => nil,
30
+ :indent_by => 2,
31
+ }
32
+
33
+ def generate(options = {}, &block)
34
+ @options = DEFAULTS.merge options
35
+
36
+ unless @options[:base_url]
37
+ raise ArgumentError, 'you must specify ":base_url" string'
38
+ end
39
+ @options[:index_base_url] ||= @options[:base_url]
40
+
41
+ Dir.mkdir(@options[:document_full]) unless ::File.exists?(@options[:document_full])
42
+
43
+ @options[:writer] = MassiveSitemap::Writer::GzipFile if @options[:gzip]
44
+
45
+ @writer = @options[:writer].new @options
46
+ Builder::Rotating.generate(@writer, @options, &block)
47
+
48
+ @writer.options.merge!(:filename => @options[:index_filename], :force_overwrite => true)
49
+ Builder::Index.generate(@writer, @options.merge(:base_url => @options[:index_base_url]))
50
+ end
51
+ module_function :generate
52
+ end
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "massive_sitemap"
6
+ s.version = File.read("VERSION").to_s.strip
7
+ s.authors = ["Tobias Bielohlawek"]
8
+ s.email = ["tobi@soundcloud.com"]
9
+ s.homepage = "http://github.com/rngtng/massive_sitemap"
10
+ s.summary = %q{Build painfree sitemaps for websites with millions of pages}
11
+ s.description = %q{MassiveSitemap - build huge sitemaps painfree. Differential updates keeps generation time short and reduces load on DB. It's heavealy inspired by BigSitemaps and offers compatiable API}
12
+
13
+ s.files = `git ls-files`.split("\n")
14
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
15
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
16
+ s.require_paths = ["lib"]
17
+
18
+ %w(rake rspec).each do |gem|
19
+ s.add_development_dependency *gem.split(' ')
20
+ end
21
+ end
@@ -0,0 +1,149 @@
1
+ require "spec_helper"
2
+
3
+ require "massive_sitemap/builder"
4
+ require "massive_sitemap/writer/string"
5
+
6
+ describe MassiveSitemap::Builder::Base do
7
+ let(:header) { %Q(<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">) }
8
+ let(:writer) { MassiveSitemap::Writer::String.new }
9
+ let(:builder) { MassiveSitemap::Builder.new(writer) }
10
+
11
+ describe "#arguments" do
12
+ it 'fail if no writer given' do
13
+ expect do
14
+ MassiveSitemap::Builder.new
15
+ end.to raise_error(ArgumentError)
16
+ end
17
+ end
18
+
19
+ context "no content added" do
20
+ it 'empty per default' do
21
+ builder
22
+
23
+ writer.should == ""
24
+ end
25
+
26
+ it 'generate basic skeleton' do
27
+ builder.init!
28
+ writer.should == header
29
+ end
30
+
31
+ it 'generate basic skeleton on double init' do
32
+ builder.init!
33
+ builder.init!
34
+ writer.should == header
35
+ end
36
+
37
+ it 'generate nothing when not inited' do
38
+ builder.close!
39
+ writer.should == ""
40
+ end
41
+
42
+ it "same result on double close" do
43
+ builder.close!
44
+ builder.close!
45
+ writer.should == ""
46
+ end
47
+
48
+ it "same result on double close" do
49
+ builder.init!
50
+ builder.close!
51
+ builder.close!
52
+ writer.should == %Q(#{header}\n</urlset>)
53
+ end
54
+
55
+ it "same result on double close" do
56
+ builder.init! do
57
+ add "test"
58
+ end
59
+ writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
60
+ end
61
+ end
62
+
63
+ context "adding content" do
64
+ it 'seq: generate one url' do
65
+ builder.add 'test'
66
+ builder.close!
67
+ writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
68
+ end
69
+ end
70
+
71
+ context "as block" do
72
+ it 'generate basic skeleton' do
73
+ MassiveSitemap::Builder.new(writer) {}
74
+ writer.should == ""
75
+ end
76
+
77
+ it 'generate one url' do
78
+ MassiveSitemap::Builder.new(writer) do
79
+ add 'test'
80
+ end
81
+ writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
82
+ end
83
+
84
+ it 'generate one url with init!' do
85
+ MassiveSitemap::Builder.new(writer) do
86
+ init!
87
+ add 'test'
88
+ end
89
+ writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
90
+ end
91
+
92
+ it 'generate one url with init! block' do
93
+ MassiveSitemap::Builder.new(writer) do
94
+ init! do
95
+ add 'test'
96
+ end
97
+ end
98
+ writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
99
+ end
100
+
101
+ it 'generate one url with close!' do
102
+ MassiveSitemap::Builder.new(writer) do
103
+ add 'test'
104
+ close!
105
+ end
106
+ writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
107
+ end
108
+
109
+ it 'generate one url, no indent' do
110
+ MassiveSitemap::Builder.new(writer, :indent_by => 0) do
111
+ add_url! 'test'
112
+ end
113
+ writer.should == %Q(#{header}\n<url>\n<loc>test</loc>\n</url>\n</urlset>)
114
+ end
115
+
116
+ it 'generate two url' do
117
+ MassiveSitemap::Builder.new(writer) do
118
+ add_url! 'test'
119
+ add_url! 'test2'
120
+ end
121
+ writer.should == %Q(#{header}\n <url>\n <loc>test</loc>\n </url>\n <url>\n <loc>test2</loc>\n </url>\n</urlset>)
122
+ end
123
+
124
+ it 'generate one url with attrs' do
125
+ MassiveSitemap::Builder.new(writer, :indent_by => 0) do
126
+ add_url! 'test', :change_frequency => 'weekly', :priority => 0.8
127
+ end
128
+ writer.should include("<loc>test</loc>\n<changefreq>weekly</changefreq>\n<priority>0.8</priority>")
129
+ end
130
+ end
131
+
132
+ describe ".base_url" do
133
+ URLS = %w(
134
+ http://test.de/
135
+ test.de/
136
+ test.de
137
+ )
138
+
139
+ URLS.each do |url|
140
+ it "transforms to valid url" do
141
+ MassiveSitemap::Builder.new(writer, :base_url => url).send(:base_url).should == "http://test.de/"
142
+ end
143
+ end
144
+
145
+ it "transforms to valid url with https" do
146
+ MassiveSitemap::Builder.new(writer, :base_url => "https://test.de/").send(:base_url).should == "https://test.de/"
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,26 @@
1
+ require "spec_helper"
2
+
3
+ require "massive_sitemap/builder/index"
4
+ require "massive_sitemap/writer/string"
5
+
6
+ describe MassiveSitemap::Builder::Index do
7
+ INDEX_HEADER = %Q(<?xml version="1.0" encoding="UTF-8"?>\n<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n<sitemap>)
8
+
9
+ let(:writer) { MassiveSitemap::Writer::String.new }
10
+
11
+ before do
12
+ writer.stub!(:streams).and_return(['test'])
13
+ end
14
+
15
+ it 'generates one url' do
16
+ MassiveSitemap::Builder::Index.new(writer, :indent_by => 0)
17
+
18
+ writer.should == %Q(#{INDEX_HEADER}\n<loc>/test</loc>\n</sitemap>\n</sitemapindex>)
19
+ end
20
+
21
+ it 'include base_url' do
22
+ MassiveSitemap::Builder::Index.new(writer, :base_url => "test.de", :indent_by => 0)
23
+ writer.should include("<loc>http://test.de/test</loc>")
24
+ end
25
+
26
+ end
@@ -0,0 +1,133 @@
1
+ require "spec_helper"
2
+
3
+ require "massive_sitemap/builder/rotating"
4
+ require "massive_sitemap/writer/string"
5
+
6
+ describe MassiveSitemap::Builder::Rotating do
7
+ let(:header) { %Q(<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">) }
8
+ let(:writer) { MassiveSitemap::Writer::String.new }
9
+ let(:builder) { MassiveSitemap::Builder::Rotating.new(writer) }
10
+
11
+ it 'raises error when max_per_sitemap > MAX_URLS' do
12
+ expect do
13
+ MassiveSitemap::Builder::Rotating.new(writer, :max_per_sitemap => MassiveSitemap::Builder::Rotating::NUM_URLS.max + 1)
14
+ end.to raise_error(ArgumentError)
15
+ end
16
+
17
+ it 'generates one url' do
18
+ MassiveSitemap::Builder::Rotating.new(writer) do
19
+ add_url! 'test'
20
+ end
21
+ writer.should == %Q(#{header}\n <url>\n <loc>test</loc>\n </url>\n</urlset>)
22
+ end
23
+
24
+ it 'generates two url' do
25
+ MassiveSitemap::Builder::Rotating.new(writer, :max_per_sitemap => 1) do
26
+ add_url! 'test'
27
+ add_url! 'test2'
28
+ end
29
+ writer.should == %Q(#{header}\n <url>\n <loc>test</loc>\n </url>\n</urlset>#{header}\n <url>\n <loc>test2</loc>\n </url>\n</urlset>)
30
+ end
31
+
32
+ context "with file" do
33
+ let(:filename) { 'sitemap.xml' }
34
+ let(:filename2) { 'sitemap-1.xml' }
35
+ let(:writer) { MassiveSitemap::Writer::File.new }
36
+
37
+ after do
38
+ FileUtils.rm(filename) rescue nil
39
+ FileUtils.rm(filename2) rescue nil
40
+ end
41
+
42
+ it 'generates two url' do
43
+ expect do
44
+ expect do
45
+ MassiveSitemap::Builder::Rotating.new(writer, :max_per_sitemap => 1) do
46
+ add 'test'
47
+ add 'test2'
48
+ end
49
+ end.to change { File.exists?(filename) }.to(true)
50
+ end.to change { File.exists?(filename2) }.to(true)
51
+ end
52
+
53
+ it 'generates two url when file exists' do
54
+ File.open(filename, 'w') {}
55
+ expect do
56
+ expect do
57
+ MassiveSitemap::Builder::Rotating.new(writer, :max_per_sitemap => 1) do
58
+ begin
59
+ add 'test'
60
+ rescue MassiveSitemap::Writer::File::FileExistsException => e
61
+ end
62
+ add 'test2'
63
+ end
64
+ end.to_not change { File.exists?(filename) }.to(true)
65
+ end.to change { File.exists?(filename2) }.to(true)
66
+ end
67
+ end
68
+
69
+ describe "#filename_with_rotation" do
70
+ context "keeps filename" do
71
+ it "rotation is zero" do
72
+ builder.send(:filename_with_rotation, "sitemap.xml").should == "sitemap.xml"
73
+ end
74
+
75
+ it "rotation is zero" do
76
+ builder.send(:filename_with_rotation, "sitemap2.xml").should == "sitemap2.xml"
77
+ end
78
+
79
+ it "rotation is zero" do
80
+ builder.send(:filename_with_rotation, "sitemap.xml", nil).should == "sitemap.xml"
81
+ end
82
+
83
+ it "rotation is nil" do
84
+ builder.send(:filename_with_rotation, "sitemap.xml", 0).should == "sitemap.xml"
85
+ end
86
+ end
87
+
88
+ context "rotation is 1" do
89
+ it "add prefix" do
90
+ builder.send(:filename_with_rotation, "sitemap.xml", 1).should == "sitemap-1.xml"
91
+ end
92
+
93
+ it "rotation is zero" do
94
+ builder.send(:filename_with_rotation, "sitemap-1.xml", 1).should == "sitemap-1.xml"
95
+ end
96
+
97
+ it "rotation is zero" do
98
+ builder.send(:filename_with_rotation, "sitemap-user.xml", 1).should == "sitemap-user-1.xml"
99
+ end
100
+ end
101
+ end
102
+
103
+ describe "#split_filename" do
104
+ FILENAMES = {
105
+ nil => ["", nil, nil],
106
+ ".xml" => ["", nil, ".xml"],
107
+ ".xml.gz" => ["", nil, ".xml.gz"],
108
+ "sitemap" => ["sitemap", nil, nil],
109
+ "sitemap.xml" => ["sitemap", nil, ".xml"],
110
+ "sitemap.xml.gz" => ["sitemap", nil, ".xml.gz"],
111
+ "-1.xml" => ["", "-1", ".xml"],
112
+ "-1.xml.gz" => ["", "-1", ".xml.gz"],
113
+ "sitemap-1" => ["sitemap", "-1", nil],
114
+ "sitemap-1.xml" => ["sitemap", "-1", ".xml"],
115
+ "sitemap-1.xml.gz" => ["sitemap", "-1", ".xml.gz"],
116
+ "-user-1.xml" => ["-user", "-1", ".xml"],
117
+ "-user-1.xml.gz" => ["-user", "-1", ".xml.gz"],
118
+ "sitemap-user-1" => ["sitemap-user", "-1", nil],
119
+ "sitemap-user-1.xml" => ["sitemap-user", "-1", ".xml"],
120
+ "sitemap-user-1.xml.gz" => ["sitemap-user", "-1", ".xml.gz"],
121
+ "sitemap1" => ["sitemap1", nil, nil],
122
+ "sitemap1.xml" => ["sitemap1", nil, ".xml"],
123
+ "sitemap1.xml.gz" => ["sitemap1", nil, ".xml.gz"],
124
+ }
125
+
126
+ FILENAMES.each do |filename, expected|
127
+ it "splits filename #{filename} into #{expected.join(' ')}" do
128
+ builder.send(:split_filename, filename).should == expected
129
+ end
130
+ end
131
+ end
132
+
133
+ end
@@ -0,0 +1,158 @@
1
+ require "spec_helper"
2
+
3
+ require "massive_sitemap/writer/gzip_file"
4
+
5
+ describe MassiveSitemap do
6
+ let(:index_filename) { 'sitemap_index.xml' }
7
+ let(:filename) { 'sitemap.xml' }
8
+ let(:filename2) { 'sitemap2.xml' }
9
+
10
+ def output(file = filename)
11
+ `cat '#{file}'`
12
+ end
13
+
14
+ def gz_filename(file = filename)
15
+ "#{file}.gz"
16
+ end
17
+
18
+ after do
19
+ FileUtils.rm(index_filename) rescue nil
20
+ FileUtils.rm(filename) rescue nil
21
+ FileUtils.rm(filename2) rescue nil
22
+ end
23
+
24
+ describe "#initalize" do
25
+ it 'fail if no base_url given' do
26
+ expect do
27
+ MassiveSitemap.generate
28
+ end.to raise_error(ArgumentError)
29
+ end
30
+
31
+ it "does not create empty sitemap file" do
32
+ expect do
33
+ MassiveSitemap.generate(:base_url => 'test.de/')
34
+ end.to_not change { ::File.exists?(filename) }
35
+ end
36
+
37
+ context "custom writer" do
38
+ after do
39
+ FileUtils.rm(gz_filename(index_filename)) rescue nil
40
+ FileUtils.rm(gz_filename) rescue nil
41
+ end
42
+
43
+ it 'takes gzips writer' do
44
+ expect do
45
+ MassiveSitemap.generate(:base_url => 'test.de/', :gzip => true) do
46
+ add "dummy"
47
+ end
48
+ end.to change { ::File.exists?(gz_filename) }.to(true)
49
+ end
50
+
51
+ it 'takes custom writer' do
52
+ expect do
53
+ MassiveSitemap.generate(:base_url => 'test.de/', :writer => MassiveSitemap::Writer::GzipFile) do
54
+ add "dummy"
55
+ end
56
+ end.to change { ::File.exists?(gz_filename) }.to(true)
57
+ end
58
+ end
59
+ end
60
+
61
+ describe "#generate" do
62
+ it 'adds url' do
63
+ MassiveSitemap.generate(:base_url => 'test.de') do
64
+ add "track/name"
65
+ end
66
+ output.should include("<loc>http://test.de/track/name</loc>")
67
+ end
68
+
69
+ it 'adds url with root slash' do
70
+ MassiveSitemap.generate(:base_url => 'test.de/') do
71
+ add "/track/name"
72
+ end
73
+ output.should include("<loc>http://test.de/track/name</loc>")
74
+ end
75
+
76
+ it "doesn't fail for existing file" do
77
+ File.open(filename, 'w') {}
78
+ expect do
79
+ MassiveSitemap.generate(:base_url => 'test.de/') do
80
+ add "/track/name"
81
+ end
82
+ end.to_not change { File.stat(filename).mtime }
83
+ end
84
+
85
+ context 'nested generation' do
86
+ it 'adds url of nested builder' do
87
+ MassiveSitemap.generate(:base_url => 'test.de/') do
88
+ writer = @writer.class.new(@options.merge(:filename => 'sitemap2.xml'))
89
+ MassiveSitemap::Builder::Rotating.new(writer, @options) do
90
+ add "/set/name"
91
+ end
92
+ end
93
+ output(filename2).should include("<loc>http://test.de/set/name</loc>")
94
+ end
95
+
96
+ it 'executes block altough first sitemap exists' do
97
+ File.open(filename, 'w') {}
98
+ MassiveSitemap.generate(:base_url => 'test.de/') do
99
+ writer = @writer.class.new(@options.merge(:filename => 'sitemap2.xml'))
100
+ MassiveSitemap::Builder::Rotating.new(writer, @options) do
101
+ add "/set/name"
102
+ end
103
+ end
104
+ output(filename2).should include("<loc>http://test.de/set/name</loc>")
105
+ end
106
+ end
107
+
108
+ end
109
+
110
+ describe "#generate_index" do
111
+ let(:lastmod) { File.stat(index_filename).mtime.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') }
112
+
113
+ it "does not create empty files" do
114
+ MassiveSitemap.generate(:base_url => 'test.de/')
115
+ ::File.exists?(index_filename).should be_false
116
+ end
117
+
118
+ it 'includes urls' do
119
+ MassiveSitemap.generate(:base_url => 'test.de/', :indent_by => 0) do
120
+ add "dummy"
121
+ end
122
+
123
+ output(index_filename).should include("<sitemap>\n<loc>http://test.de/sitemap.xml</loc>\n<lastmod>#{lastmod}</lastmod>\n</sitemap>")
124
+ end
125
+
126
+ it 'includes index base url' do
127
+ MassiveSitemap.generate(:base_url => 'test.de/', :index_base_url => 'index.de/') do
128
+ add "dummy"
129
+ end
130
+
131
+ output(index_filename).should include("<loc>http://index.de/sitemap.xml</loc>")
132
+ end
133
+
134
+ it 'overwrites existing one' do
135
+ File.open(index_filename, 'w') {}
136
+ MassiveSitemap.generate(:base_url => 'test.de/', :index_base_url => 'index.de/') do
137
+ add "dummy"
138
+ end
139
+
140
+ output(index_filename).should include("<loc>http://index.de/sitemap.xml</loc>")
141
+ end
142
+
143
+ context "gziped" do
144
+ after do
145
+ FileUtils.rm(gz_filename(index_filename)) rescue nil
146
+ FileUtils.rm(gz_filename) rescue nil
147
+ end
148
+
149
+ it 'creates sitemap file' do
150
+ expect do
151
+ MassiveSitemap.generate(:base_url => 'test.de/', :writer => MassiveSitemap::Writer::GzipFile) do
152
+ add "dummy"
153
+ end
154
+ end.to change { ::File.exists?(gz_filename(index_filename)) }.to(true)
155
+ end
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,2 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+ require "massive_sitemap"
@@ -0,0 +1,104 @@
1
+ require "spec_helper"
2
+
3
+ require "massive_sitemap/writer/file"
4
+
5
+ describe MassiveSitemap::Writer::File do
6
+ let(:filename) { 'sitemap.xml' }
7
+ let(:filename2) { 'sitemap-1.xml' }
8
+ let(:writer) { MassiveSitemap::Writer::File.new.tap { |w| w.init! } }
9
+
10
+ after do
11
+ FileUtils.rm(filename) rescue nil
12
+ FileUtils.rm(filename2) rescue nil
13
+ end
14
+
15
+ describe "document_full" do
16
+ let(:folder) { "test" }
17
+
18
+ before do
19
+ Dir.mkdir(folder) unless ::File.exists?(folder)
20
+ end
21
+
22
+ after do
23
+ FileUtils.rm_rf(folder) rescue nil
24
+ end
25
+
26
+ it 'appends document_full' do
27
+ expect do
28
+ MassiveSitemap::Writer::File.new(:document_full => folder).tap do |w|
29
+ w.init!
30
+ w.close!
31
+ end
32
+ end.to change { File.exists?("test/#{filename}") }.to(true)
33
+ end
34
+
35
+ it 'appends document_full' do
36
+ expect do
37
+ MassiveSitemap::Writer::File.new(:document_full => "#{folder}/").tap do |w|
38
+ w.init!
39
+ w.close!
40
+ end
41
+ end.to change { File.exists?("test/#{filename}") }.to(true)
42
+ end
43
+ end
44
+
45
+ it 'create file' do
46
+ expect do
47
+ writer.close!
48
+ end.to change { File.exists?(filename) }.to(true)
49
+ end
50
+
51
+ it 'create second file on rotation' do
52
+ expect do
53
+ expect do
54
+ writer.close!
55
+ end.to change { File.exists?(filename) }.to(true)
56
+ writer.init!(:filename => filename2)
57
+ writer.close!
58
+ end.to change { File.exists?(filename2) }.to(true)
59
+ end
60
+
61
+ it 'write into file' do
62
+ writer.print 'test'
63
+ writer.close!
64
+ `cat '#{filename}'`.should == "test"
65
+ end
66
+
67
+ it 'init new file closes current' do
68
+ writer.print 'test'
69
+ writer.init!(:filename => filename2)
70
+ `cat '#{filename}'`.should == "test"
71
+ end
72
+
73
+ it 'write into second file' do
74
+ writer.print 'test'
75
+ writer.init!(:filename => filename2)
76
+ writer.print 'test2'
77
+ writer.close!
78
+ `cat '#{filename2}'`.should == "test2"
79
+ end
80
+
81
+ context "opening write file" do
82
+ before do
83
+ File.open(filename, 'w') {}
84
+ end
85
+
86
+ after do
87
+ FileUtils.rm(filename) rescue nil
88
+ end
89
+
90
+ it 'raises when file exits' do
91
+ writer = MassiveSitemap::Writer::File.new
92
+ expect do
93
+ writer.init!
94
+ end.to raise_error(MassiveSitemap::Writer::File::FileExistsException)
95
+ end
96
+
97
+ it 'raises when file exits' do
98
+ writer = MassiveSitemap::Writer::File.new(:force_overwrite => true)
99
+ expect do
100
+ writer.init!
101
+ end.to_not raise_error(MassiveSitemap::Writer::File::FileExistsException)
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,20 @@
1
+ require "massive_sitemap/writer/gzip_file"
2
+
3
+ describe MassiveSitemap::Writer::GzipFile do
4
+ let(:filename) { 'sitemap.xml' }
5
+ let(:tmp_filename) { "#{filename}.tmp" }
6
+ let(:gz_filename) { "#{filename}.gz" }
7
+ let(:writer) { MassiveSitemap::Writer::GzipFile.new.tap { |w| w.init! } }
8
+
9
+ after do
10
+ FileUtils.rm(filename) rescue nil
11
+ FileUtils.rm(tmp_filename) rescue nil
12
+ FileUtils.rm(gz_filename) rescue nil
13
+ end
14
+
15
+ it 'creates gzip file' do
16
+ expect do
17
+ writer.close!
18
+ end.to change { File.exists?(gz_filename) }.from(false).to(true)
19
+ end
20
+ end
@@ -0,0 +1,34 @@
1
+ require "massive_sitemap/writer/locking_file"
2
+
3
+ describe MassiveSitemap::Writer::LockingFile do
4
+ let(:filename) { 'sitemap.xml' }
5
+ let(:tmp_filename) { "#{filename}.tmp" }
6
+ let(:lock_file) { MassiveSitemap::Writer::LockingFile::LOCK_FILE }
7
+ let(:writer) { MassiveSitemap::Writer::LockingFile.new.tap { |w| w.init! } }
8
+
9
+ after do
10
+ FileUtils.rm(filename) rescue nil
11
+ FileUtils.rm(tmp_filename) rescue nil
12
+ FileUtils.rm(lock_file) rescue nil
13
+ end
14
+
15
+ it 'creates lockfile' do
16
+ expect do
17
+ writer
18
+ end.to change { File.exists?(lock_file) }.to(true)
19
+ end
20
+
21
+ it 'deletes lockfile' do
22
+ writer
23
+ expect do
24
+ writer.close!
25
+ end.to change { File.exists?(lock_file) }.to(false)
26
+ end
27
+
28
+ it 'fails if lockfile exists' do
29
+ File.open(lock_file, 'w') {}
30
+ expect do
31
+ writer
32
+ end.to raise_error
33
+ end
34
+ end
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: massive_sitemap
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.0.rc1
5
+ prerelease: 6
6
+ platform: ruby
7
+ authors:
8
+ - Tobias Bielohlawek
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-12 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: &70297299930500 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70297299930500
25
+ - !ruby/object:Gem::Dependency
26
+ name: rspec
27
+ requirement: &70297299930040 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *70297299930040
36
+ description: MassiveSitemap - build huge sitemaps painfree. Differential updates keeps
37
+ generation time short and reduces load on DB. It's heavealy inspired by BigSitemaps
38
+ and offers compatiable API
39
+ email:
40
+ - tobi@soundcloud.com
41
+ executables: []
42
+ extensions: []
43
+ extra_rdoc_files: []
44
+ files:
45
+ - .gitignore
46
+ - .travis.yml
47
+ - CHANGELOG.md
48
+ - Gemfile
49
+ - Gemfile.lock
50
+ - README.md
51
+ - Rakefile
52
+ - VERSION
53
+ - lib/massive_sitemap.rb
54
+ - lib/massive_sitemap/builder.rb
55
+ - lib/massive_sitemap/builder/base.rb
56
+ - lib/massive_sitemap/builder/index.rb
57
+ - lib/massive_sitemap/builder/rotating.rb
58
+ - lib/massive_sitemap/ping.rb
59
+ - lib/massive_sitemap/writer/base.rb
60
+ - lib/massive_sitemap/writer/file.rb
61
+ - lib/massive_sitemap/writer/gzip_file.rb
62
+ - lib/massive_sitemap/writer/locking_file.rb
63
+ - lib/massive_sitemap/writer/string.rb
64
+ - massive_sitemap.gemspec
65
+ - spec/builder/base_spec.rb
66
+ - spec/builder/index_spec.rb
67
+ - spec/builder/rotating_spec.rb
68
+ - spec/massive_sitemap_spec.rb
69
+ - spec/spec_helper.rb
70
+ - spec/writer/file_spec.rb
71
+ - spec/writer/gzip_file_spec.rb
72
+ - spec/writer/locking_file_spec.rb
73
+ homepage: http://github.com/rngtng/massive_sitemap
74
+ licenses: []
75
+ post_install_message:
76
+ rdoc_options: []
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ! '>='
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ! '>'
89
+ - !ruby/object:Gem::Version
90
+ version: 1.3.1
91
+ requirements: []
92
+ rubyforge_project:
93
+ rubygems_version: 1.8.15
94
+ signing_key:
95
+ specification_version: 3
96
+ summary: Build painfree sitemaps for websites with millions of pages
97
+ test_files:
98
+ - spec/builder/base_spec.rb
99
+ - spec/builder/index_spec.rb
100
+ - spec/builder/rotating_spec.rb
101
+ - spec/massive_sitemap_spec.rb
102
+ - spec/spec_helper.rb
103
+ - spec/writer/file_spec.rb
104
+ - spec/writer/gzip_file_spec.rb
105
+ - spec/writer/locking_file_spec.rb