massive_sitemap 2.0.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/.travis.yml +3 -0
- data/CHANGELOG.md +28 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +24 -0
- data/README.md +15 -0
- data/Rakefile +8 -0
- data/VERSION +1 -0
- data/lib/massive_sitemap/builder/base.rb +104 -0
- data/lib/massive_sitemap/builder/index.rb +29 -0
- data/lib/massive_sitemap/builder/rotating.rb +53 -0
- data/lib/massive_sitemap/builder.rb +12 -0
- data/lib/massive_sitemap/ping.rb +21 -0
- data/lib/massive_sitemap/writer/base.rb +60 -0
- data/lib/massive_sitemap/writer/file.rb +59 -0
- data/lib/massive_sitemap/writer/gzip_file.rb +24 -0
- data/lib/massive_sitemap/writer/locking_file.rb +31 -0
- data/lib/massive_sitemap/writer/string.rb +29 -0
- data/lib/massive_sitemap.rb +52 -0
- data/massive_sitemap.gemspec +21 -0
- data/spec/builder/base_spec.rb +149 -0
- data/spec/builder/index_spec.rb +26 -0
- data/spec/builder/rotating_spec.rb +133 -0
- data/spec/massive_sitemap_spec.rb +158 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/writer/file_spec.rb +104 -0
- data/spec/writer/gzip_file_spec.rb +20 -0
- data/spec/writer/locking_file_spec.rb +34 -0
- metadata +105 -0
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# Changes
|
2
|
+
|
3
|
+
## vx.x.x - ???
|
4
|
+
|
5
|
+
## v2.0.x - ???
|
6
|
+
|
7
|
+
* updated/fixed Ping
|
8
|
+
* updated Docu
|
9
|
+
* switch to writer chain
|
10
|
+
* add BigSitemap API
|
11
|
+
|
12
|
+
## v2.0.0 - 13-02-2012
|
13
|
+
_inital release_
|
14
|
+
|
15
|
+
* restructured gem completely based on BigSitemap gem
|
16
|
+
* seperated logic in two major parts:
|
17
|
+
* Builder -> creates content
|
18
|
+
* Writer -> stores content
|
19
|
+
* added several implementations/specifiaction of builder/writer
|
20
|
+
* added generator for default setup
|
21
|
+
* added specs
|
22
|
+
* writer overwrite detection
|
23
|
+
* added Index generation
|
24
|
+
* don't init new writer all the time
|
25
|
+
* move inited status to writer
|
26
|
+
* move index build into indexer and resource handling/selection into writer
|
27
|
+
* manifest handling:
|
28
|
+
* moved Amazon S3 integration to [massive_sitemap-writer-s3](https://github.com/rngtng/massive_sitemap-writer-s3)
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
massive_sitemap (0.0.1)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.1.3)
|
10
|
+
rspec (2.8.0)
|
11
|
+
rspec-core (~> 2.8.0)
|
12
|
+
rspec-expectations (~> 2.8.0)
|
13
|
+
rspec-mocks (~> 2.8.0)
|
14
|
+
rspec-core (2.8.0)
|
15
|
+
rspec-expectations (2.8.0)
|
16
|
+
diff-lcs (~> 1.1.2)
|
17
|
+
rspec-mocks (2.8.0)
|
18
|
+
|
19
|
+
PLATFORMS
|
20
|
+
ruby
|
21
|
+
|
22
|
+
DEPENDENCIES
|
23
|
+
massive_sitemap!
|
24
|
+
rspec
|
data/README.md
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# MassiveSitemap
|
2
|
+
|
3
|
+
[![](http://travis-ci.org/rngtng/massive_sitemap.png)](http://travis-ci.org/rngtng/massive_sitemap)
|
4
|
+
|
5
|
+
Build painfree sitemaps for websites with millions of pages
|
6
|
+
|
7
|
+
MassiveSitemap is a successor project of [BigSitemap](https://github.com/alexrabarts/big_sitemap), a [Sitemap](http://sitemaps.org) generator for websites with millions of pages.
|
8
|
+
It implements various generation stategies, e.g. to split large Sitemaps into multiple files, gzip files to minimize bandwidth usage, or incremental updates. It offers API is very similar to _BigSitemap_ and therefor can be set up with just a few lines of code and is compatible with just about any framework.
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
* clear structure
|
13
|
+
* allows extension (S3)
|
14
|
+
|
15
|
+
MassiveSitemap - build huge sitemaps painfree. Differential updates keeps generation time short and reduces load on DB. It's heavealy inspired by BigSitemaps and offers compatiable API
|
data/Rakefile
ADDED
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.0.0.rc1
|
@@ -0,0 +1,104 @@
|
|
1
|
+
module MassiveSitemap
|
2
|
+
module Builder
|
3
|
+
|
4
|
+
class Base
|
5
|
+
OPTS = {
|
6
|
+
:base_url => nil,
|
7
|
+
:indent_by => 2
|
8
|
+
}
|
9
|
+
|
10
|
+
HEADER_NAME = 'urlset'
|
11
|
+
HEADER_ATTRIBUTES = {
|
12
|
+
'xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9',
|
13
|
+
'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
|
14
|
+
'xsi:schemaLocation' => "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
|
15
|
+
}
|
16
|
+
|
17
|
+
attr_reader :options
|
18
|
+
|
19
|
+
def initialize(writer, options = {}, &block)
|
20
|
+
@writer = writer
|
21
|
+
@options = OPTS.merge(options)
|
22
|
+
@opened_tags = []
|
23
|
+
|
24
|
+
if block
|
25
|
+
instance_eval(&block)
|
26
|
+
close!
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.generate(writer, options = {}, &block)
|
31
|
+
self.new(writer, options, &block)
|
32
|
+
end
|
33
|
+
|
34
|
+
def add(path, attrs = {})
|
35
|
+
add_url! File.join(base_url, path), attrs
|
36
|
+
rescue MassiveSitemap::Writer::File::FileExistsException => e
|
37
|
+
end
|
38
|
+
|
39
|
+
def init!(&block)
|
40
|
+
unless @writer.inited?
|
41
|
+
@writer.init!
|
42
|
+
header!(&block)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def close!(indent = true)
|
47
|
+
if name = @opened_tags.pop
|
48
|
+
@writer.print "\n" + ' ' * options[:indent_by] * @opened_tags.size if indent
|
49
|
+
@writer.print "</#{name}>"
|
50
|
+
if @opened_tags.size == 0
|
51
|
+
@writer.close!
|
52
|
+
true
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
def header!(&block)
|
59
|
+
@writer.print '<?xml version="1.0" encoding="UTF-8"?>'
|
60
|
+
tag! self.class::HEADER_NAME, self.class::HEADER_ATTRIBUTES, &block
|
61
|
+
end
|
62
|
+
|
63
|
+
def add_url!(location, attrs = {})
|
64
|
+
init!
|
65
|
+
|
66
|
+
tag! 'url' do
|
67
|
+
tag! 'loc', location
|
68
|
+
tag! 'lastmod', attrs[:last_modified].utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if attrs[:last_modified]
|
69
|
+
tag! 'changefreq', attrs[:change_frequency] if attrs[:change_frequency]
|
70
|
+
tag! 'priority', attrs[:priority].to_s if attrs[:priority]
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def tag!(name, content = nil, attrs = {}, &block)
|
75
|
+
attrs = content if content.is_a? Hash
|
76
|
+
open!(name, attrs)
|
77
|
+
if content.is_a? String
|
78
|
+
@writer.print content.gsub('&', '&')
|
79
|
+
close!(false)
|
80
|
+
else
|
81
|
+
if block
|
82
|
+
instance_eval(&block)
|
83
|
+
close!
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def open!(name, attrs = {})
|
89
|
+
attrs = attrs.map { |attr, value| %Q( #{attr}="#{value}") }.join('')
|
90
|
+
@writer.print "\n" + ' ' * options[:indent_by] * @opened_tags.size
|
91
|
+
@opened_tags << name
|
92
|
+
@writer.print "<#{name}#{attrs}>"
|
93
|
+
end
|
94
|
+
|
95
|
+
private
|
96
|
+
def base_url
|
97
|
+
schema, host = @options[:base_url].scan(/^(https?:\/\/)?(.+?)\/?$/).flatten
|
98
|
+
"#{schema || 'http://'}#{host}/"
|
99
|
+
rescue
|
100
|
+
""
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require "massive_sitemap/builder/base"
|
2
|
+
|
3
|
+
module MassiveSitemap
|
4
|
+
module Builder
|
5
|
+
class Index < Base
|
6
|
+
HEADER_NAME = 'sitemapindex'
|
7
|
+
HEADER_ATTRIBUTES = {
|
8
|
+
:xmlns => 'http://www.sitemaps.org/schemas/sitemap/0.9'
|
9
|
+
}
|
10
|
+
|
11
|
+
def initialize(writer, options = {}, &block)
|
12
|
+
super(writer, options) do
|
13
|
+
writer.each do |path, last_modified|
|
14
|
+
add path, :last_modified => last_modified
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def add_url!(location, attrs = {})
|
20
|
+
init!
|
21
|
+
|
22
|
+
tag! 'sitemap' do
|
23
|
+
tag! 'loc', location
|
24
|
+
tag! 'lastmod', attrs[:last_modified].utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if attrs[:last_modified]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require "massive_sitemap/builder/base"
|
2
|
+
# writer only has print and puts as interface
|
3
|
+
|
4
|
+
module MassiveSitemap
|
5
|
+
module Builder
|
6
|
+
class Rotating < Base
|
7
|
+
NUM_URLS = 1..50_000
|
8
|
+
|
9
|
+
def initialize(writer, options = {}, &block)
|
10
|
+
@max_urls = options[:max_per_sitemap] || NUM_URLS.max
|
11
|
+
@rotations = 0
|
12
|
+
@urls = 0
|
13
|
+
|
14
|
+
unless NUM_URLS.member?(@max_urls)
|
15
|
+
raise ArgumentError, %Q(":max_per_sitemap" must be greater than #{NUM_URLS.min} and smaller than #{NUM_URLS.max})
|
16
|
+
end
|
17
|
+
|
18
|
+
super
|
19
|
+
end
|
20
|
+
|
21
|
+
# On rotation, close current file, and reopen a new one
|
22
|
+
# with same file name but -<counter> appendend
|
23
|
+
def init!(&block)
|
24
|
+
unless @writer.inited?
|
25
|
+
@urls = 0
|
26
|
+
filename = filename_with_rotation(@writer.options[:filename], @rotations)
|
27
|
+
@rotations += 1
|
28
|
+
@writer.init! :filename => filename
|
29
|
+
header!(&block)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def add_url!(location, attrs = {})
|
34
|
+
if @urls >= @max_urls
|
35
|
+
close!
|
36
|
+
end
|
37
|
+
super
|
38
|
+
@urls += 1
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
def filename_with_rotation(filename, rotation = nil)
|
43
|
+
filename, _, ext = split_filename(filename)
|
44
|
+
rotation = (rotation.to_i > 0) ? "-#{rotation}" : nil
|
45
|
+
[filename, rotation, ext].join
|
46
|
+
end
|
47
|
+
|
48
|
+
def split_filename(filename)
|
49
|
+
filename.to_s.scan(/^([^.]*?)(-[0-9]+)?(\..+)?$/).flatten
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module MassiveSitemap
|
2
|
+
class Ping
|
3
|
+
PING = {
|
4
|
+
:google => 'http://www.google.comwebmasters/tools/ping?sitemap=%s';
|
5
|
+
:bing => 'http://www.bing.com/webmaster/ping.aspx?siteMap=%s',
|
6
|
+
:ask => 'http://submissions.ask.com/ping?sitemap=%s'
|
7
|
+
}
|
8
|
+
|
9
|
+
def self.ping_search_engines(sitemap_uri, engines = [])
|
10
|
+
require 'net/http'
|
11
|
+
require 'uri'
|
12
|
+
require 'cgi'
|
13
|
+
|
14
|
+
sitemap_uri = CGI::escape(sitemap_uri)
|
15
|
+
|
16
|
+
Array(engines).each do |engine_url|
|
17
|
+
Net::HTTP.get URI.parse(engine_url % sitemap_uri)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
|
2
|
+
module MassiveSitemap
|
3
|
+
module Writer
|
4
|
+
class Base
|
5
|
+
OPTS = {}
|
6
|
+
|
7
|
+
attr_reader :options
|
8
|
+
|
9
|
+
def initialize(options = {})
|
10
|
+
@options = self.class::OPTS.merge(options)
|
11
|
+
@stream = nil
|
12
|
+
end
|
13
|
+
|
14
|
+
# Interface
|
15
|
+
def open_stream
|
16
|
+
@string ||= StringIO.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def close_stream(stream)
|
20
|
+
end
|
21
|
+
|
22
|
+
def init?
|
23
|
+
true
|
24
|
+
end
|
25
|
+
|
26
|
+
def streams
|
27
|
+
[]
|
28
|
+
end
|
29
|
+
|
30
|
+
# API
|
31
|
+
def init!(options = {})
|
32
|
+
close!
|
33
|
+
@options.merge!(options)
|
34
|
+
if init?
|
35
|
+
@stream = open_stream
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def close!
|
40
|
+
if inited?
|
41
|
+
close_stream(@stream)
|
42
|
+
@stream = nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def inited?
|
47
|
+
@stream
|
48
|
+
end
|
49
|
+
|
50
|
+
def print(string)
|
51
|
+
@stream.print(string) if inited?
|
52
|
+
end
|
53
|
+
|
54
|
+
def each(&block)
|
55
|
+
streams.each(&block)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require "massive_sitemap/writer/base"
|
3
|
+
|
4
|
+
# Write into File
|
5
|
+
|
6
|
+
module MassiveSitemap
|
7
|
+
module Writer
|
8
|
+
class File < Base
|
9
|
+
|
10
|
+
class FileExistsException < IOError; end
|
11
|
+
|
12
|
+
OPTS = {
|
13
|
+
:document_full => '.',
|
14
|
+
:force_overwrite => false,
|
15
|
+
:filename => "sitemap.xml",
|
16
|
+
:index_filename => "sitemap_index.xml",
|
17
|
+
}
|
18
|
+
|
19
|
+
def open_stream
|
20
|
+
::File.open(tmp_filename, 'w:ASCII-8BIT')
|
21
|
+
end
|
22
|
+
|
23
|
+
def close_stream(stream)
|
24
|
+
stream.close
|
25
|
+
# Move from tmp_file into acutal file
|
26
|
+
::File.delete(filename) if ::File.exists?(filename)
|
27
|
+
::File.rename(tmp_filename, filename)
|
28
|
+
end
|
29
|
+
|
30
|
+
def init?
|
31
|
+
if !options[:force_overwrite] && ::File.exists?(filename)
|
32
|
+
raise FileExistsException, "Can not create file: #{filename} exits"
|
33
|
+
end
|
34
|
+
true
|
35
|
+
end
|
36
|
+
|
37
|
+
def streams
|
38
|
+
files.map do |path|
|
39
|
+
next if path.include?(options[:index_filename])
|
40
|
+
[::File.basename(path), ::File.stat(path).mtime]
|
41
|
+
end.compact
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def filename
|
46
|
+
::File.join options[:document_full], options[:filename]
|
47
|
+
end
|
48
|
+
|
49
|
+
def tmp_filename
|
50
|
+
filename + ".tmp"
|
51
|
+
end
|
52
|
+
|
53
|
+
def files
|
54
|
+
Dir[::File.join(options[:document_full], "*.xml")]
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
|
3
|
+
require "massive_sitemap/writer/file"
|
4
|
+
# Write into GZipped File
|
5
|
+
|
6
|
+
module MassiveSitemap
|
7
|
+
module Writer
|
8
|
+
|
9
|
+
class GzipFile < File
|
10
|
+
def open_stream
|
11
|
+
::Zlib::GzipWriter.new(super)
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
def filename
|
16
|
+
super + ".gz"
|
17
|
+
end
|
18
|
+
|
19
|
+
def files
|
20
|
+
Dir[::File.join(options[:document_full], "*.xml.gz")]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
|
3
|
+
require "massive_sitemap/writer/file"
|
4
|
+
# Create Lock before writing to file
|
5
|
+
|
6
|
+
module MassiveSitemap
|
7
|
+
module Writer
|
8
|
+
|
9
|
+
class LockingFile < File
|
10
|
+
LOCK_FILE = 'generator.lock'
|
11
|
+
|
12
|
+
def open_stream
|
13
|
+
::File.open(LOCK_FILE, 'w', ::File::EXCL) #lock!
|
14
|
+
super
|
15
|
+
end
|
16
|
+
|
17
|
+
def close_stream(stream)
|
18
|
+
super
|
19
|
+
FileUtils.rm(LOCK_FILE) #unlock!
|
20
|
+
end
|
21
|
+
|
22
|
+
def init?
|
23
|
+
if ::File.exists?(LOCK_FILE)
|
24
|
+
raise Errno::EACCES
|
25
|
+
end
|
26
|
+
super
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require "massive_sitemap/writer/base"
|
3
|
+
|
4
|
+
# Write into String
|
5
|
+
# Perfect for testing porpuses
|
6
|
+
module MassiveSitemap
|
7
|
+
module Writer
|
8
|
+
|
9
|
+
class String < Base
|
10
|
+
|
11
|
+
def open_stream
|
12
|
+
@string ||= StringIO.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_s
|
16
|
+
@string.string rescue ""
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other_string)
|
20
|
+
to_s == other_string
|
21
|
+
end
|
22
|
+
|
23
|
+
def include?(other_string)
|
24
|
+
to_s.include?(other_string)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'massive_sitemap/writer/file'
|
2
|
+
require 'massive_sitemap/writer/gzip_file'
|
3
|
+
require 'massive_sitemap/builder/rotating'
|
4
|
+
require 'massive_sitemap/builder/index'
|
5
|
+
|
6
|
+
# Page at -> <base_url>
|
7
|
+
# http://example.de/dir/
|
8
|
+
|
9
|
+
# Index at
|
10
|
+
# http://sitemap.example.de/index-dir/
|
11
|
+
|
12
|
+
# Save at -> <document_full>
|
13
|
+
# /root/dir/ -> <document_root>/<document_path>
|
14
|
+
|
15
|
+
module MassiveSitemap
|
16
|
+
DEFAULTS = {
|
17
|
+
# global
|
18
|
+
:index_base_url => nil,
|
19
|
+
:gzip => false,
|
20
|
+
:writer => MassiveSitemap::Writer::File,
|
21
|
+
|
22
|
+
# writer
|
23
|
+
:document_full => '.',
|
24
|
+
:force_overwrite => false,
|
25
|
+
:filename => "sitemap.xml",
|
26
|
+
:index_filename => "sitemap_index.xml",
|
27
|
+
|
28
|
+
# builder
|
29
|
+
:base_url => nil,
|
30
|
+
:indent_by => 2,
|
31
|
+
}
|
32
|
+
|
33
|
+
def generate(options = {}, &block)
|
34
|
+
@options = DEFAULTS.merge options
|
35
|
+
|
36
|
+
unless @options[:base_url]
|
37
|
+
raise ArgumentError, 'you must specify ":base_url" string'
|
38
|
+
end
|
39
|
+
@options[:index_base_url] ||= @options[:base_url]
|
40
|
+
|
41
|
+
Dir.mkdir(@options[:document_full]) unless ::File.exists?(@options[:document_full])
|
42
|
+
|
43
|
+
@options[:writer] = MassiveSitemap::Writer::GzipFile if @options[:gzip]
|
44
|
+
|
45
|
+
@writer = @options[:writer].new @options
|
46
|
+
Builder::Rotating.generate(@writer, @options, &block)
|
47
|
+
|
48
|
+
@writer.options.merge!(:filename => @options[:index_filename], :force_overwrite => true)
|
49
|
+
Builder::Index.generate(@writer, @options.merge(:base_url => @options[:index_base_url]))
|
50
|
+
end
|
51
|
+
module_function :generate
|
52
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "massive_sitemap"
|
6
|
+
s.version = File.read("VERSION").to_s.strip
|
7
|
+
s.authors = ["Tobias Bielohlawek"]
|
8
|
+
s.email = ["tobi@soundcloud.com"]
|
9
|
+
s.homepage = "http://github.com/rngtng/massive_sitemap"
|
10
|
+
s.summary = %q{Build painfree sitemaps for websites with millions of pages}
|
11
|
+
s.description = %q{MassiveSitemap - build huge sitemaps painfree. Differential updates keeps generation time short and reduces load on DB. It's heavealy inspired by BigSitemaps and offers compatiable API}
|
12
|
+
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
|
18
|
+
%w(rake rspec).each do |gem|
|
19
|
+
s.add_development_dependency *gem.split(' ')
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
require "massive_sitemap/builder"
|
4
|
+
require "massive_sitemap/writer/string"
|
5
|
+
|
6
|
+
describe MassiveSitemap::Builder::Base do
|
7
|
+
let(:header) { %Q(<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">) }
|
8
|
+
let(:writer) { MassiveSitemap::Writer::String.new }
|
9
|
+
let(:builder) { MassiveSitemap::Builder.new(writer) }
|
10
|
+
|
11
|
+
describe "#arguments" do
|
12
|
+
it 'fail if no writer given' do
|
13
|
+
expect do
|
14
|
+
MassiveSitemap::Builder.new
|
15
|
+
end.to raise_error(ArgumentError)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
context "no content added" do
|
20
|
+
it 'empty per default' do
|
21
|
+
builder
|
22
|
+
|
23
|
+
writer.should == ""
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'generate basic skeleton' do
|
27
|
+
builder.init!
|
28
|
+
writer.should == header
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'generate basic skeleton on double init' do
|
32
|
+
builder.init!
|
33
|
+
builder.init!
|
34
|
+
writer.should == header
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'generate nothing when not inited' do
|
38
|
+
builder.close!
|
39
|
+
writer.should == ""
|
40
|
+
end
|
41
|
+
|
42
|
+
it "same result on double close" do
|
43
|
+
builder.close!
|
44
|
+
builder.close!
|
45
|
+
writer.should == ""
|
46
|
+
end
|
47
|
+
|
48
|
+
it "same result on double close" do
|
49
|
+
builder.init!
|
50
|
+
builder.close!
|
51
|
+
builder.close!
|
52
|
+
writer.should == %Q(#{header}\n</urlset>)
|
53
|
+
end
|
54
|
+
|
55
|
+
it "same result on double close" do
|
56
|
+
builder.init! do
|
57
|
+
add "test"
|
58
|
+
end
|
59
|
+
writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
context "adding content" do
|
64
|
+
it 'seq: generate one url' do
|
65
|
+
builder.add 'test'
|
66
|
+
builder.close!
|
67
|
+
writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
context "as block" do
|
72
|
+
it 'generate basic skeleton' do
|
73
|
+
MassiveSitemap::Builder.new(writer) {}
|
74
|
+
writer.should == ""
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'generate one url' do
|
78
|
+
MassiveSitemap::Builder.new(writer) do
|
79
|
+
add 'test'
|
80
|
+
end
|
81
|
+
writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
|
82
|
+
end
|
83
|
+
|
84
|
+
it 'generate one url with init!' do
|
85
|
+
MassiveSitemap::Builder.new(writer) do
|
86
|
+
init!
|
87
|
+
add 'test'
|
88
|
+
end
|
89
|
+
writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'generate one url with init! block' do
|
93
|
+
MassiveSitemap::Builder.new(writer) do
|
94
|
+
init! do
|
95
|
+
add 'test'
|
96
|
+
end
|
97
|
+
end
|
98
|
+
writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'generate one url with close!' do
|
102
|
+
MassiveSitemap::Builder.new(writer) do
|
103
|
+
add 'test'
|
104
|
+
close!
|
105
|
+
end
|
106
|
+
writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'generate one url, no indent' do
|
110
|
+
MassiveSitemap::Builder.new(writer, :indent_by => 0) do
|
111
|
+
add_url! 'test'
|
112
|
+
end
|
113
|
+
writer.should == %Q(#{header}\n<url>\n<loc>test</loc>\n</url>\n</urlset>)
|
114
|
+
end
|
115
|
+
|
116
|
+
it 'generate two url' do
|
117
|
+
MassiveSitemap::Builder.new(writer) do
|
118
|
+
add_url! 'test'
|
119
|
+
add_url! 'test2'
|
120
|
+
end
|
121
|
+
writer.should == %Q(#{header}\n <url>\n <loc>test</loc>\n </url>\n <url>\n <loc>test2</loc>\n </url>\n</urlset>)
|
122
|
+
end
|
123
|
+
|
124
|
+
it 'generate one url with attrs' do
|
125
|
+
MassiveSitemap::Builder.new(writer, :indent_by => 0) do
|
126
|
+
add_url! 'test', :change_frequency => 'weekly', :priority => 0.8
|
127
|
+
end
|
128
|
+
writer.should include("<loc>test</loc>\n<changefreq>weekly</changefreq>\n<priority>0.8</priority>")
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
describe ".base_url" do
|
133
|
+
URLS = %w(
|
134
|
+
http://test.de/
|
135
|
+
test.de/
|
136
|
+
test.de
|
137
|
+
)
|
138
|
+
|
139
|
+
URLS.each do |url|
|
140
|
+
it "transforms to valid url" do
|
141
|
+
MassiveSitemap::Builder.new(writer, :base_url => url).send(:base_url).should == "http://test.de/"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
it "transforms to valid url with https" do
|
146
|
+
MassiveSitemap::Builder.new(writer, :base_url => "https://test.de/").send(:base_url).should == "https://test.de/"
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
require "massive_sitemap/builder/index"
|
4
|
+
require "massive_sitemap/writer/string"
|
5
|
+
|
6
|
+
describe MassiveSitemap::Builder::Index do
|
7
|
+
INDEX_HEADER = %Q(<?xml version="1.0" encoding="UTF-8"?>\n<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n<sitemap>)
|
8
|
+
|
9
|
+
let(:writer) { MassiveSitemap::Writer::String.new }
|
10
|
+
|
11
|
+
before do
|
12
|
+
writer.stub!(:streams).and_return(['test'])
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'generates one url' do
|
16
|
+
MassiveSitemap::Builder::Index.new(writer, :indent_by => 0)
|
17
|
+
|
18
|
+
writer.should == %Q(#{INDEX_HEADER}\n<loc>/test</loc>\n</sitemap>\n</sitemapindex>)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'include base_url' do
|
22
|
+
MassiveSitemap::Builder::Index.new(writer, :base_url => "test.de", :indent_by => 0)
|
23
|
+
writer.should include("<loc>http://test.de/test</loc>")
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
require "massive_sitemap/builder/rotating"
|
4
|
+
require "massive_sitemap/writer/string"
|
5
|
+
|
6
|
+
describe MassiveSitemap::Builder::Rotating do
|
7
|
+
let(:header) { %Q(<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">) }
|
8
|
+
let(:writer) { MassiveSitemap::Writer::String.new }
|
9
|
+
let(:builder) { MassiveSitemap::Builder::Rotating.new(writer) }
|
10
|
+
|
11
|
+
it 'raises error when max_per_sitemap > MAX_URLS' do
|
12
|
+
expect do
|
13
|
+
MassiveSitemap::Builder::Rotating.new(writer, :max_per_sitemap => MassiveSitemap::Builder::Rotating::NUM_URLS.max + 1)
|
14
|
+
end.to raise_error(ArgumentError)
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'generates one url' do
|
18
|
+
MassiveSitemap::Builder::Rotating.new(writer) do
|
19
|
+
add_url! 'test'
|
20
|
+
end
|
21
|
+
writer.should == %Q(#{header}\n <url>\n <loc>test</loc>\n </url>\n</urlset>)
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'generates two url' do
|
25
|
+
MassiveSitemap::Builder::Rotating.new(writer, :max_per_sitemap => 1) do
|
26
|
+
add_url! 'test'
|
27
|
+
add_url! 'test2'
|
28
|
+
end
|
29
|
+
writer.should == %Q(#{header}\n <url>\n <loc>test</loc>\n </url>\n</urlset>#{header}\n <url>\n <loc>test2</loc>\n </url>\n</urlset>)
|
30
|
+
end
|
31
|
+
|
32
|
+
context "with file" do
|
33
|
+
let(:filename) { 'sitemap.xml' }
|
34
|
+
let(:filename2) { 'sitemap-1.xml' }
|
35
|
+
let(:writer) { MassiveSitemap::Writer::File.new }
|
36
|
+
|
37
|
+
after do
|
38
|
+
FileUtils.rm(filename) rescue nil
|
39
|
+
FileUtils.rm(filename2) rescue nil
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'generates two url' do
|
43
|
+
expect do
|
44
|
+
expect do
|
45
|
+
MassiveSitemap::Builder::Rotating.new(writer, :max_per_sitemap => 1) do
|
46
|
+
add 'test'
|
47
|
+
add 'test2'
|
48
|
+
end
|
49
|
+
end.to change { File.exists?(filename) }.to(true)
|
50
|
+
end.to change { File.exists?(filename2) }.to(true)
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'generates two url when file exists' do
|
54
|
+
File.open(filename, 'w') {}
|
55
|
+
expect do
|
56
|
+
expect do
|
57
|
+
MassiveSitemap::Builder::Rotating.new(writer, :max_per_sitemap => 1) do
|
58
|
+
begin
|
59
|
+
add 'test'
|
60
|
+
rescue MassiveSitemap::Writer::File::FileExistsException => e
|
61
|
+
end
|
62
|
+
add 'test2'
|
63
|
+
end
|
64
|
+
end.to_not change { File.exists?(filename) }.to(true)
|
65
|
+
end.to change { File.exists?(filename2) }.to(true)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
describe "#filename_with_rotation" do
|
70
|
+
context "keeps filename" do
|
71
|
+
it "rotation is zero" do
|
72
|
+
builder.send(:filename_with_rotation, "sitemap.xml").should == "sitemap.xml"
|
73
|
+
end
|
74
|
+
|
75
|
+
it "rotation is zero" do
|
76
|
+
builder.send(:filename_with_rotation, "sitemap2.xml").should == "sitemap2.xml"
|
77
|
+
end
|
78
|
+
|
79
|
+
it "rotation is zero" do
|
80
|
+
builder.send(:filename_with_rotation, "sitemap.xml", nil).should == "sitemap.xml"
|
81
|
+
end
|
82
|
+
|
83
|
+
it "rotation is nil" do
|
84
|
+
builder.send(:filename_with_rotation, "sitemap.xml", 0).should == "sitemap.xml"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
context "rotation is 1" do
|
89
|
+
it "add prefix" do
|
90
|
+
builder.send(:filename_with_rotation, "sitemap.xml", 1).should == "sitemap-1.xml"
|
91
|
+
end
|
92
|
+
|
93
|
+
it "rotation is zero" do
|
94
|
+
builder.send(:filename_with_rotation, "sitemap-1.xml", 1).should == "sitemap-1.xml"
|
95
|
+
end
|
96
|
+
|
97
|
+
it "rotation is zero" do
|
98
|
+
builder.send(:filename_with_rotation, "sitemap-user.xml", 1).should == "sitemap-user-1.xml"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
describe "#split_filename" do
|
104
|
+
FILENAMES = {
|
105
|
+
nil => ["", nil, nil],
|
106
|
+
".xml" => ["", nil, ".xml"],
|
107
|
+
".xml.gz" => ["", nil, ".xml.gz"],
|
108
|
+
"sitemap" => ["sitemap", nil, nil],
|
109
|
+
"sitemap.xml" => ["sitemap", nil, ".xml"],
|
110
|
+
"sitemap.xml.gz" => ["sitemap", nil, ".xml.gz"],
|
111
|
+
"-1.xml" => ["", "-1", ".xml"],
|
112
|
+
"-1.xml.gz" => ["", "-1", ".xml.gz"],
|
113
|
+
"sitemap-1" => ["sitemap", "-1", nil],
|
114
|
+
"sitemap-1.xml" => ["sitemap", "-1", ".xml"],
|
115
|
+
"sitemap-1.xml.gz" => ["sitemap", "-1", ".xml.gz"],
|
116
|
+
"-user-1.xml" => ["-user", "-1", ".xml"],
|
117
|
+
"-user-1.xml.gz" => ["-user", "-1", ".xml.gz"],
|
118
|
+
"sitemap-user-1" => ["sitemap-user", "-1", nil],
|
119
|
+
"sitemap-user-1.xml" => ["sitemap-user", "-1", ".xml"],
|
120
|
+
"sitemap-user-1.xml.gz" => ["sitemap-user", "-1", ".xml.gz"],
|
121
|
+
"sitemap1" => ["sitemap1", nil, nil],
|
122
|
+
"sitemap1.xml" => ["sitemap1", nil, ".xml"],
|
123
|
+
"sitemap1.xml.gz" => ["sitemap1", nil, ".xml.gz"],
|
124
|
+
}
|
125
|
+
|
126
|
+
FILENAMES.each do |filename, expected|
|
127
|
+
it "splits filename #{filename} into #{expected.join(' ')}" do
|
128
|
+
builder.send(:split_filename, filename).should == expected
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
@@ -0,0 +1,158 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
require "massive_sitemap/writer/gzip_file"
|
4
|
+
|
5
|
+
describe MassiveSitemap do
|
6
|
+
let(:index_filename) { 'sitemap_index.xml' }
|
7
|
+
let(:filename) { 'sitemap.xml' }
|
8
|
+
let(:filename2) { 'sitemap2.xml' }
|
9
|
+
|
10
|
+
def output(file = filename)
|
11
|
+
`cat '#{file}'`
|
12
|
+
end
|
13
|
+
|
14
|
+
def gz_filename(file = filename)
|
15
|
+
"#{file}.gz"
|
16
|
+
end
|
17
|
+
|
18
|
+
after do
|
19
|
+
FileUtils.rm(index_filename) rescue nil
|
20
|
+
FileUtils.rm(filename) rescue nil
|
21
|
+
FileUtils.rm(filename2) rescue nil
|
22
|
+
end
|
23
|
+
|
24
|
+
describe "#initalize" do
|
25
|
+
it 'fail if no base_url given' do
|
26
|
+
expect do
|
27
|
+
MassiveSitemap.generate
|
28
|
+
end.to raise_error(ArgumentError)
|
29
|
+
end
|
30
|
+
|
31
|
+
it "does not create empty sitemap file" do
|
32
|
+
expect do
|
33
|
+
MassiveSitemap.generate(:base_url => 'test.de/')
|
34
|
+
end.to_not change { ::File.exists?(filename) }
|
35
|
+
end
|
36
|
+
|
37
|
+
context "custom writer" do
|
38
|
+
after do
|
39
|
+
FileUtils.rm(gz_filename(index_filename)) rescue nil
|
40
|
+
FileUtils.rm(gz_filename) rescue nil
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'takes gzips writer' do
|
44
|
+
expect do
|
45
|
+
MassiveSitemap.generate(:base_url => 'test.de/', :gzip => true) do
|
46
|
+
add "dummy"
|
47
|
+
end
|
48
|
+
end.to change { ::File.exists?(gz_filename) }.to(true)
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'takes custom writer' do
|
52
|
+
expect do
|
53
|
+
MassiveSitemap.generate(:base_url => 'test.de/', :writer => MassiveSitemap::Writer::GzipFile) do
|
54
|
+
add "dummy"
|
55
|
+
end
|
56
|
+
end.to change { ::File.exists?(gz_filename) }.to(true)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
describe "#generate" do
|
62
|
+
it 'adds url' do
|
63
|
+
MassiveSitemap.generate(:base_url => 'test.de') do
|
64
|
+
add "track/name"
|
65
|
+
end
|
66
|
+
output.should include("<loc>http://test.de/track/name</loc>")
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'adds url with root slash' do
|
70
|
+
MassiveSitemap.generate(:base_url => 'test.de/') do
|
71
|
+
add "/track/name"
|
72
|
+
end
|
73
|
+
output.should include("<loc>http://test.de/track/name</loc>")
|
74
|
+
end
|
75
|
+
|
76
|
+
it "doesn't fail for existing file" do
|
77
|
+
File.open(filename, 'w') {}
|
78
|
+
expect do
|
79
|
+
MassiveSitemap.generate(:base_url => 'test.de/') do
|
80
|
+
add "/track/name"
|
81
|
+
end
|
82
|
+
end.to_not change { File.stat(filename).mtime }
|
83
|
+
end
|
84
|
+
|
85
|
+
context 'nested generation' do
|
86
|
+
it 'adds url of nested builder' do
|
87
|
+
MassiveSitemap.generate(:base_url => 'test.de/') do
|
88
|
+
writer = @writer.class.new(@options.merge(:filename => 'sitemap2.xml'))
|
89
|
+
MassiveSitemap::Builder::Rotating.new(writer, @options) do
|
90
|
+
add "/set/name"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
output(filename2).should include("<loc>http://test.de/set/name</loc>")
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'executes block altough first sitemap exists' do
|
97
|
+
File.open(filename, 'w') {}
|
98
|
+
MassiveSitemap.generate(:base_url => 'test.de/') do
|
99
|
+
writer = @writer.class.new(@options.merge(:filename => 'sitemap2.xml'))
|
100
|
+
MassiveSitemap::Builder::Rotating.new(writer, @options) do
|
101
|
+
add "/set/name"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
output(filename2).should include("<loc>http://test.de/set/name</loc>")
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
describe "#generate_index" do
|
111
|
+
let(:lastmod) { File.stat(index_filename).mtime.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') }
|
112
|
+
|
113
|
+
it "does not create empty files" do
|
114
|
+
MassiveSitemap.generate(:base_url => 'test.de/')
|
115
|
+
::File.exists?(index_filename).should be_false
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'includes urls' do
|
119
|
+
MassiveSitemap.generate(:base_url => 'test.de/', :indent_by => 0) do
|
120
|
+
add "dummy"
|
121
|
+
end
|
122
|
+
|
123
|
+
output(index_filename).should include("<sitemap>\n<loc>http://test.de/sitemap.xml</loc>\n<lastmod>#{lastmod}</lastmod>\n</sitemap>")
|
124
|
+
end
|
125
|
+
|
126
|
+
it 'includes index base url' do
|
127
|
+
MassiveSitemap.generate(:base_url => 'test.de/', :index_base_url => 'index.de/') do
|
128
|
+
add "dummy"
|
129
|
+
end
|
130
|
+
|
131
|
+
output(index_filename).should include("<loc>http://index.de/sitemap.xml</loc>")
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'overwrites existing one' do
|
135
|
+
File.open(index_filename, 'w') {}
|
136
|
+
MassiveSitemap.generate(:base_url => 'test.de/', :index_base_url => 'index.de/') do
|
137
|
+
add "dummy"
|
138
|
+
end
|
139
|
+
|
140
|
+
output(index_filename).should include("<loc>http://index.de/sitemap.xml</loc>")
|
141
|
+
end
|
142
|
+
|
143
|
+
context "gziped" do
|
144
|
+
after do
|
145
|
+
FileUtils.rm(gz_filename(index_filename)) rescue nil
|
146
|
+
FileUtils.rm(gz_filename) rescue nil
|
147
|
+
end
|
148
|
+
|
149
|
+
it 'creates sitemap file' do
|
150
|
+
expect do
|
151
|
+
MassiveSitemap.generate(:base_url => 'test.de/', :writer => MassiveSitemap::Writer::GzipFile) do
|
152
|
+
add "dummy"
|
153
|
+
end
|
154
|
+
end.to change { ::File.exists?(gz_filename(index_filename)) }.to(true)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
require "massive_sitemap/writer/file"
|
4
|
+
|
5
|
+
describe MassiveSitemap::Writer::File do
|
6
|
+
let(:filename) { 'sitemap.xml' }
|
7
|
+
let(:filename2) { 'sitemap-1.xml' }
|
8
|
+
let(:writer) { MassiveSitemap::Writer::File.new.tap { |w| w.init! } }
|
9
|
+
|
10
|
+
after do
|
11
|
+
FileUtils.rm(filename) rescue nil
|
12
|
+
FileUtils.rm(filename2) rescue nil
|
13
|
+
end
|
14
|
+
|
15
|
+
describe "document_full" do
|
16
|
+
let(:folder) { "test" }
|
17
|
+
|
18
|
+
before do
|
19
|
+
Dir.mkdir(folder) unless ::File.exists?(folder)
|
20
|
+
end
|
21
|
+
|
22
|
+
after do
|
23
|
+
FileUtils.rm_rf(folder) rescue nil
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'appends document_full' do
|
27
|
+
expect do
|
28
|
+
MassiveSitemap::Writer::File.new(:document_full => folder).tap do |w|
|
29
|
+
w.init!
|
30
|
+
w.close!
|
31
|
+
end
|
32
|
+
end.to change { File.exists?("test/#{filename}") }.to(true)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'appends document_full' do
|
36
|
+
expect do
|
37
|
+
MassiveSitemap::Writer::File.new(:document_full => "#{folder}/").tap do |w|
|
38
|
+
w.init!
|
39
|
+
w.close!
|
40
|
+
end
|
41
|
+
end.to change { File.exists?("test/#{filename}") }.to(true)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'create file' do
|
46
|
+
expect do
|
47
|
+
writer.close!
|
48
|
+
end.to change { File.exists?(filename) }.to(true)
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'create second file on rotation' do
|
52
|
+
expect do
|
53
|
+
expect do
|
54
|
+
writer.close!
|
55
|
+
end.to change { File.exists?(filename) }.to(true)
|
56
|
+
writer.init!(:filename => filename2)
|
57
|
+
writer.close!
|
58
|
+
end.to change { File.exists?(filename2) }.to(true)
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'write into file' do
|
62
|
+
writer.print 'test'
|
63
|
+
writer.close!
|
64
|
+
`cat '#{filename}'`.should == "test"
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'init new file closes current' do
|
68
|
+
writer.print 'test'
|
69
|
+
writer.init!(:filename => filename2)
|
70
|
+
`cat '#{filename}'`.should == "test"
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'write into second file' do
|
74
|
+
writer.print 'test'
|
75
|
+
writer.init!(:filename => filename2)
|
76
|
+
writer.print 'test2'
|
77
|
+
writer.close!
|
78
|
+
`cat '#{filename2}'`.should == "test2"
|
79
|
+
end
|
80
|
+
|
81
|
+
context "opening write file" do
|
82
|
+
before do
|
83
|
+
File.open(filename, 'w') {}
|
84
|
+
end
|
85
|
+
|
86
|
+
after do
|
87
|
+
FileUtils.rm(filename) rescue nil
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'raises when file exits' do
|
91
|
+
writer = MassiveSitemap::Writer::File.new
|
92
|
+
expect do
|
93
|
+
writer.init!
|
94
|
+
end.to raise_error(MassiveSitemap::Writer::File::FileExistsException)
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'raises when file exits' do
|
98
|
+
writer = MassiveSitemap::Writer::File.new(:force_overwrite => true)
|
99
|
+
expect do
|
100
|
+
writer.init!
|
101
|
+
end.to_not raise_error(MassiveSitemap::Writer::File::FileExistsException)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require "massive_sitemap/writer/gzip_file"
|
2
|
+
|
3
|
+
describe MassiveSitemap::Writer::GzipFile do
|
4
|
+
let(:filename) { 'sitemap.xml' }
|
5
|
+
let(:tmp_filename) { "#{filename}.tmp" }
|
6
|
+
let(:gz_filename) { "#{filename}.gz" }
|
7
|
+
let(:writer) { MassiveSitemap::Writer::GzipFile.new.tap { |w| w.init! } }
|
8
|
+
|
9
|
+
after do
|
10
|
+
FileUtils.rm(filename) rescue nil
|
11
|
+
FileUtils.rm(tmp_filename) rescue nil
|
12
|
+
FileUtils.rm(gz_filename) rescue nil
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'creates gzip file' do
|
16
|
+
expect do
|
17
|
+
writer.close!
|
18
|
+
end.to change { File.exists?(gz_filename) }.from(false).to(true)
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require "massive_sitemap/writer/locking_file"
|
2
|
+
|
3
|
+
describe MassiveSitemap::Writer::LockingFile do
|
4
|
+
let(:filename) { 'sitemap.xml' }
|
5
|
+
let(:tmp_filename) { "#{filename}.tmp" }
|
6
|
+
let(:lock_file) { MassiveSitemap::Writer::LockingFile::LOCK_FILE }
|
7
|
+
let(:writer) { MassiveSitemap::Writer::LockingFile.new.tap { |w| w.init! } }
|
8
|
+
|
9
|
+
after do
|
10
|
+
FileUtils.rm(filename) rescue nil
|
11
|
+
FileUtils.rm(tmp_filename) rescue nil
|
12
|
+
FileUtils.rm(lock_file) rescue nil
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'creates lockfile' do
|
16
|
+
expect do
|
17
|
+
writer
|
18
|
+
end.to change { File.exists?(lock_file) }.to(true)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'deletes lockfile' do
|
22
|
+
writer
|
23
|
+
expect do
|
24
|
+
writer.close!
|
25
|
+
end.to change { File.exists?(lock_file) }.to(false)
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'fails if lockfile exists' do
|
29
|
+
File.open(lock_file, 'w') {}
|
30
|
+
expect do
|
31
|
+
writer
|
32
|
+
end.to raise_error
|
33
|
+
end
|
34
|
+
end
|
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: massive_sitemap
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.0.0.rc1
|
5
|
+
prerelease: 6
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Tobias Bielohlawek
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-02-12 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake
|
16
|
+
requirement: &70297299930500 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70297299930500
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rspec
|
27
|
+
requirement: &70297299930040 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70297299930040
|
36
|
+
description: MassiveSitemap - build huge sitemaps painfree. Differential updates keeps
|
37
|
+
generation time short and reduces load on DB. It's heavealy inspired by BigSitemaps
|
38
|
+
and offers compatiable API
|
39
|
+
email:
|
40
|
+
- tobi@soundcloud.com
|
41
|
+
executables: []
|
42
|
+
extensions: []
|
43
|
+
extra_rdoc_files: []
|
44
|
+
files:
|
45
|
+
- .gitignore
|
46
|
+
- .travis.yml
|
47
|
+
- CHANGELOG.md
|
48
|
+
- Gemfile
|
49
|
+
- Gemfile.lock
|
50
|
+
- README.md
|
51
|
+
- Rakefile
|
52
|
+
- VERSION
|
53
|
+
- lib/massive_sitemap.rb
|
54
|
+
- lib/massive_sitemap/builder.rb
|
55
|
+
- lib/massive_sitemap/builder/base.rb
|
56
|
+
- lib/massive_sitemap/builder/index.rb
|
57
|
+
- lib/massive_sitemap/builder/rotating.rb
|
58
|
+
- lib/massive_sitemap/ping.rb
|
59
|
+
- lib/massive_sitemap/writer/base.rb
|
60
|
+
- lib/massive_sitemap/writer/file.rb
|
61
|
+
- lib/massive_sitemap/writer/gzip_file.rb
|
62
|
+
- lib/massive_sitemap/writer/locking_file.rb
|
63
|
+
- lib/massive_sitemap/writer/string.rb
|
64
|
+
- massive_sitemap.gemspec
|
65
|
+
- spec/builder/base_spec.rb
|
66
|
+
- spec/builder/index_spec.rb
|
67
|
+
- spec/builder/rotating_spec.rb
|
68
|
+
- spec/massive_sitemap_spec.rb
|
69
|
+
- spec/spec_helper.rb
|
70
|
+
- spec/writer/file_spec.rb
|
71
|
+
- spec/writer/gzip_file_spec.rb
|
72
|
+
- spec/writer/locking_file_spec.rb
|
73
|
+
homepage: http://github.com/rngtng/massive_sitemap
|
74
|
+
licenses: []
|
75
|
+
post_install_message:
|
76
|
+
rdoc_options: []
|
77
|
+
require_paths:
|
78
|
+
- lib
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ! '>='
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
86
|
+
none: false
|
87
|
+
requirements:
|
88
|
+
- - ! '>'
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: 1.3.1
|
91
|
+
requirements: []
|
92
|
+
rubyforge_project:
|
93
|
+
rubygems_version: 1.8.15
|
94
|
+
signing_key:
|
95
|
+
specification_version: 3
|
96
|
+
summary: Build painfree sitemaps for websites with millions of pages
|
97
|
+
test_files:
|
98
|
+
- spec/builder/base_spec.rb
|
99
|
+
- spec/builder/index_spec.rb
|
100
|
+
- spec/builder/rotating_spec.rb
|
101
|
+
- spec/massive_sitemap_spec.rb
|
102
|
+
- spec/spec_helper.rb
|
103
|
+
- spec/writer/file_spec.rb
|
104
|
+
- spec/writer/gzip_file_spec.rb
|
105
|
+
- spec/writer/locking_file_spec.rb
|