massive_sitemap 2.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/.travis.yml +3 -0
- data/CHANGELOG.md +28 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +24 -0
- data/README.md +15 -0
- data/Rakefile +8 -0
- data/VERSION +1 -0
- data/lib/massive_sitemap/builder/base.rb +104 -0
- data/lib/massive_sitemap/builder/index.rb +29 -0
- data/lib/massive_sitemap/builder/rotating.rb +53 -0
- data/lib/massive_sitemap/builder.rb +12 -0
- data/lib/massive_sitemap/ping.rb +21 -0
- data/lib/massive_sitemap/writer/base.rb +60 -0
- data/lib/massive_sitemap/writer/file.rb +59 -0
- data/lib/massive_sitemap/writer/gzip_file.rb +24 -0
- data/lib/massive_sitemap/writer/locking_file.rb +31 -0
- data/lib/massive_sitemap/writer/string.rb +29 -0
- data/lib/massive_sitemap.rb +52 -0
- data/massive_sitemap.gemspec +21 -0
- data/spec/builder/base_spec.rb +149 -0
- data/spec/builder/index_spec.rb +26 -0
- data/spec/builder/rotating_spec.rb +133 -0
- data/spec/massive_sitemap_spec.rb +158 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/writer/file_spec.rb +104 -0
- data/spec/writer/gzip_file_spec.rb +20 -0
- data/spec/writer/locking_file_spec.rb +34 -0
- metadata +105 -0
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# Changes
|
2
|
+
|
3
|
+
## vx.x.x - ???
|
4
|
+
|
5
|
+
## v2.0.x - ???
|
6
|
+
|
7
|
+
* updated/fixed Ping
|
8
|
+
* updated Docu
|
9
|
+
* switch to writer chain
|
10
|
+
* add BigSitemap API
|
11
|
+
|
12
|
+
## v2.0.0 - 13-02-2012
|
13
|
+
_inital release_
|
14
|
+
|
15
|
+
* restructured gem completely based on BigSitemap gem
|
16
|
+
* seperated logic in two major parts:
|
17
|
+
* Builder -> creates content
|
18
|
+
* Writer -> stores content
|
19
|
+
* added several implementations/specifiaction of builder/writer
|
20
|
+
* added generator for default setup
|
21
|
+
* added specs
|
22
|
+
* writer overwrite detection
|
23
|
+
* added Index generation
|
24
|
+
* don't init new writer all the time
|
25
|
+
* move inited status to writer
|
26
|
+
* move index build into indexer and resource handling/selection into writer
|
27
|
+
* manifest handling:
|
28
|
+
* moved Amazon S3 integration to [massive_sitemap-writer-s3](https://github.com/rngtng/massive_sitemap-writer-s3)
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
massive_sitemap (0.0.1)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.1.3)
|
10
|
+
rspec (2.8.0)
|
11
|
+
rspec-core (~> 2.8.0)
|
12
|
+
rspec-expectations (~> 2.8.0)
|
13
|
+
rspec-mocks (~> 2.8.0)
|
14
|
+
rspec-core (2.8.0)
|
15
|
+
rspec-expectations (2.8.0)
|
16
|
+
diff-lcs (~> 1.1.2)
|
17
|
+
rspec-mocks (2.8.0)
|
18
|
+
|
19
|
+
PLATFORMS
|
20
|
+
ruby
|
21
|
+
|
22
|
+
DEPENDENCIES
|
23
|
+
massive_sitemap!
|
24
|
+
rspec
|
data/README.md
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# MassiveSitemap
|
2
|
+
|
3
|
+
[](http://travis-ci.org/rngtng/massive_sitemap)
|
4
|
+
|
5
|
+
Build painfree sitemaps for websites with millions of pages
|
6
|
+
|
7
|
+
MassiveSitemap is a successor project of [BigSitemap](https://github.com/alexrabarts/big_sitemap), a [Sitemap](http://sitemaps.org) generator for websites with millions of pages.
|
8
|
+
It implements various generation stategies, e.g. to split large Sitemaps into multiple files, gzip files to minimize bandwidth usage, or incremental updates. It offers API is very similar to _BigSitemap_ and therefor can be set up with just a few lines of code and is compatible with just about any framework.
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
* clear structure
|
13
|
+
* allows extension (S3)
|
14
|
+
|
15
|
+
MassiveSitemap - build huge sitemaps painfree. Differential updates keeps generation time short and reduces load on DB. It's heavealy inspired by BigSitemaps and offers compatiable API
|
data/Rakefile
ADDED
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.0.0.rc1
|
@@ -0,0 +1,104 @@
|
|
1
|
+
module MassiveSitemap
|
2
|
+
module Builder
|
3
|
+
|
4
|
+
class Base
|
5
|
+
OPTS = {
|
6
|
+
:base_url => nil,
|
7
|
+
:indent_by => 2
|
8
|
+
}
|
9
|
+
|
10
|
+
HEADER_NAME = 'urlset'
|
11
|
+
HEADER_ATTRIBUTES = {
|
12
|
+
'xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9',
|
13
|
+
'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
|
14
|
+
'xsi:schemaLocation' => "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
|
15
|
+
}
|
16
|
+
|
17
|
+
attr_reader :options
|
18
|
+
|
19
|
+
def initialize(writer, options = {}, &block)
|
20
|
+
@writer = writer
|
21
|
+
@options = OPTS.merge(options)
|
22
|
+
@opened_tags = []
|
23
|
+
|
24
|
+
if block
|
25
|
+
instance_eval(&block)
|
26
|
+
close!
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.generate(writer, options = {}, &block)
|
31
|
+
self.new(writer, options, &block)
|
32
|
+
end
|
33
|
+
|
34
|
+
def add(path, attrs = {})
|
35
|
+
add_url! File.join(base_url, path), attrs
|
36
|
+
rescue MassiveSitemap::Writer::File::FileExistsException => e
|
37
|
+
end
|
38
|
+
|
39
|
+
def init!(&block)
|
40
|
+
unless @writer.inited?
|
41
|
+
@writer.init!
|
42
|
+
header!(&block)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def close!(indent = true)
|
47
|
+
if name = @opened_tags.pop
|
48
|
+
@writer.print "\n" + ' ' * options[:indent_by] * @opened_tags.size if indent
|
49
|
+
@writer.print "</#{name}>"
|
50
|
+
if @opened_tags.size == 0
|
51
|
+
@writer.close!
|
52
|
+
true
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
def header!(&block)
|
59
|
+
@writer.print '<?xml version="1.0" encoding="UTF-8"?>'
|
60
|
+
tag! self.class::HEADER_NAME, self.class::HEADER_ATTRIBUTES, &block
|
61
|
+
end
|
62
|
+
|
63
|
+
def add_url!(location, attrs = {})
|
64
|
+
init!
|
65
|
+
|
66
|
+
tag! 'url' do
|
67
|
+
tag! 'loc', location
|
68
|
+
tag! 'lastmod', attrs[:last_modified].utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if attrs[:last_modified]
|
69
|
+
tag! 'changefreq', attrs[:change_frequency] if attrs[:change_frequency]
|
70
|
+
tag! 'priority', attrs[:priority].to_s if attrs[:priority]
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def tag!(name, content = nil, attrs = {}, &block)
|
75
|
+
attrs = content if content.is_a? Hash
|
76
|
+
open!(name, attrs)
|
77
|
+
if content.is_a? String
|
78
|
+
@writer.print content.gsub('&', '&')
|
79
|
+
close!(false)
|
80
|
+
else
|
81
|
+
if block
|
82
|
+
instance_eval(&block)
|
83
|
+
close!
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def open!(name, attrs = {})
|
89
|
+
attrs = attrs.map { |attr, value| %Q( #{attr}="#{value}") }.join('')
|
90
|
+
@writer.print "\n" + ' ' * options[:indent_by] * @opened_tags.size
|
91
|
+
@opened_tags << name
|
92
|
+
@writer.print "<#{name}#{attrs}>"
|
93
|
+
end
|
94
|
+
|
95
|
+
private
|
96
|
+
def base_url
|
97
|
+
schema, host = @options[:base_url].scan(/^(https?:\/\/)?(.+?)\/?$/).flatten
|
98
|
+
"#{schema || 'http://'}#{host}/"
|
99
|
+
rescue
|
100
|
+
""
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require "massive_sitemap/builder/base"
|
2
|
+
|
3
|
+
module MassiveSitemap
|
4
|
+
module Builder
|
5
|
+
class Index < Base
|
6
|
+
HEADER_NAME = 'sitemapindex'
|
7
|
+
HEADER_ATTRIBUTES = {
|
8
|
+
:xmlns => 'http://www.sitemaps.org/schemas/sitemap/0.9'
|
9
|
+
}
|
10
|
+
|
11
|
+
def initialize(writer, options = {}, &block)
|
12
|
+
super(writer, options) do
|
13
|
+
writer.each do |path, last_modified|
|
14
|
+
add path, :last_modified => last_modified
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def add_url!(location, attrs = {})
|
20
|
+
init!
|
21
|
+
|
22
|
+
tag! 'sitemap' do
|
23
|
+
tag! 'loc', location
|
24
|
+
tag! 'lastmod', attrs[:last_modified].utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if attrs[:last_modified]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require "massive_sitemap/builder/base"
|
2
|
+
# writer only has print and puts as interface
|
3
|
+
|
4
|
+
module MassiveSitemap
|
5
|
+
module Builder
|
6
|
+
class Rotating < Base
|
7
|
+
NUM_URLS = 1..50_000
|
8
|
+
|
9
|
+
def initialize(writer, options = {}, &block)
|
10
|
+
@max_urls = options[:max_per_sitemap] || NUM_URLS.max
|
11
|
+
@rotations = 0
|
12
|
+
@urls = 0
|
13
|
+
|
14
|
+
unless NUM_URLS.member?(@max_urls)
|
15
|
+
raise ArgumentError, %Q(":max_per_sitemap" must be greater than #{NUM_URLS.min} and smaller than #{NUM_URLS.max})
|
16
|
+
end
|
17
|
+
|
18
|
+
super
|
19
|
+
end
|
20
|
+
|
21
|
+
# On rotation, close current file, and reopen a new one
|
22
|
+
# with same file name but -<counter> appendend
|
23
|
+
def init!(&block)
|
24
|
+
unless @writer.inited?
|
25
|
+
@urls = 0
|
26
|
+
filename = filename_with_rotation(@writer.options[:filename], @rotations)
|
27
|
+
@rotations += 1
|
28
|
+
@writer.init! :filename => filename
|
29
|
+
header!(&block)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def add_url!(location, attrs = {})
|
34
|
+
if @urls >= @max_urls
|
35
|
+
close!
|
36
|
+
end
|
37
|
+
super
|
38
|
+
@urls += 1
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
def filename_with_rotation(filename, rotation = nil)
|
43
|
+
filename, _, ext = split_filename(filename)
|
44
|
+
rotation = (rotation.to_i > 0) ? "-#{rotation}" : nil
|
45
|
+
[filename, rotation, ext].join
|
46
|
+
end
|
47
|
+
|
48
|
+
def split_filename(filename)
|
49
|
+
filename.to_s.scan(/^([^.]*?)(-[0-9]+)?(\..+)?$/).flatten
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module MassiveSitemap
|
2
|
+
class Ping
|
3
|
+
PING = {
|
4
|
+
:google => 'http://www.google.comwebmasters/tools/ping?sitemap=%s';
|
5
|
+
:bing => 'http://www.bing.com/webmaster/ping.aspx?siteMap=%s',
|
6
|
+
:ask => 'http://submissions.ask.com/ping?sitemap=%s'
|
7
|
+
}
|
8
|
+
|
9
|
+
def self.ping_search_engines(sitemap_uri, engines = [])
|
10
|
+
require 'net/http'
|
11
|
+
require 'uri'
|
12
|
+
require 'cgi'
|
13
|
+
|
14
|
+
sitemap_uri = CGI::escape(sitemap_uri)
|
15
|
+
|
16
|
+
Array(engines).each do |engine_url|
|
17
|
+
Net::HTTP.get URI.parse(engine_url % sitemap_uri)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
|
2
|
+
module MassiveSitemap
|
3
|
+
module Writer
|
4
|
+
class Base
|
5
|
+
OPTS = {}
|
6
|
+
|
7
|
+
attr_reader :options
|
8
|
+
|
9
|
+
def initialize(options = {})
|
10
|
+
@options = self.class::OPTS.merge(options)
|
11
|
+
@stream = nil
|
12
|
+
end
|
13
|
+
|
14
|
+
# Interface
|
15
|
+
def open_stream
|
16
|
+
@string ||= StringIO.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def close_stream(stream)
|
20
|
+
end
|
21
|
+
|
22
|
+
def init?
|
23
|
+
true
|
24
|
+
end
|
25
|
+
|
26
|
+
def streams
|
27
|
+
[]
|
28
|
+
end
|
29
|
+
|
30
|
+
# API
|
31
|
+
def init!(options = {})
|
32
|
+
close!
|
33
|
+
@options.merge!(options)
|
34
|
+
if init?
|
35
|
+
@stream = open_stream
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def close!
|
40
|
+
if inited?
|
41
|
+
close_stream(@stream)
|
42
|
+
@stream = nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def inited?
|
47
|
+
@stream
|
48
|
+
end
|
49
|
+
|
50
|
+
def print(string)
|
51
|
+
@stream.print(string) if inited?
|
52
|
+
end
|
53
|
+
|
54
|
+
def each(&block)
|
55
|
+
streams.each(&block)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require "massive_sitemap/writer/base"
|
3
|
+
|
4
|
+
# Write into File
|
5
|
+
|
6
|
+
module MassiveSitemap
|
7
|
+
module Writer
|
8
|
+
class File < Base
|
9
|
+
|
10
|
+
class FileExistsException < IOError; end
|
11
|
+
|
12
|
+
OPTS = {
|
13
|
+
:document_full => '.',
|
14
|
+
:force_overwrite => false,
|
15
|
+
:filename => "sitemap.xml",
|
16
|
+
:index_filename => "sitemap_index.xml",
|
17
|
+
}
|
18
|
+
|
19
|
+
def open_stream
|
20
|
+
::File.open(tmp_filename, 'w:ASCII-8BIT')
|
21
|
+
end
|
22
|
+
|
23
|
+
def close_stream(stream)
|
24
|
+
stream.close
|
25
|
+
# Move from tmp_file into acutal file
|
26
|
+
::File.delete(filename) if ::File.exists?(filename)
|
27
|
+
::File.rename(tmp_filename, filename)
|
28
|
+
end
|
29
|
+
|
30
|
+
def init?
|
31
|
+
if !options[:force_overwrite] && ::File.exists?(filename)
|
32
|
+
raise FileExistsException, "Can not create file: #{filename} exits"
|
33
|
+
end
|
34
|
+
true
|
35
|
+
end
|
36
|
+
|
37
|
+
def streams
|
38
|
+
files.map do |path|
|
39
|
+
next if path.include?(options[:index_filename])
|
40
|
+
[::File.basename(path), ::File.stat(path).mtime]
|
41
|
+
end.compact
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def filename
|
46
|
+
::File.join options[:document_full], options[:filename]
|
47
|
+
end
|
48
|
+
|
49
|
+
def tmp_filename
|
50
|
+
filename + ".tmp"
|
51
|
+
end
|
52
|
+
|
53
|
+
def files
|
54
|
+
Dir[::File.join(options[:document_full], "*.xml")]
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
|
3
|
+
require "massive_sitemap/writer/file"
|
4
|
+
# Write into GZipped File
|
5
|
+
|
6
|
+
module MassiveSitemap
|
7
|
+
module Writer
|
8
|
+
|
9
|
+
class GzipFile < File
|
10
|
+
def open_stream
|
11
|
+
::Zlib::GzipWriter.new(super)
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
def filename
|
16
|
+
super + ".gz"
|
17
|
+
end
|
18
|
+
|
19
|
+
def files
|
20
|
+
Dir[::File.join(options[:document_full], "*.xml.gz")]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
|
3
|
+
require "massive_sitemap/writer/file"
|
4
|
+
# Create Lock before writing to file
|
5
|
+
|
6
|
+
module MassiveSitemap
|
7
|
+
module Writer
|
8
|
+
|
9
|
+
class LockingFile < File
|
10
|
+
LOCK_FILE = 'generator.lock'
|
11
|
+
|
12
|
+
def open_stream
|
13
|
+
::File.open(LOCK_FILE, 'w', ::File::EXCL) #lock!
|
14
|
+
super
|
15
|
+
end
|
16
|
+
|
17
|
+
def close_stream(stream)
|
18
|
+
super
|
19
|
+
FileUtils.rm(LOCK_FILE) #unlock!
|
20
|
+
end
|
21
|
+
|
22
|
+
def init?
|
23
|
+
if ::File.exists?(LOCK_FILE)
|
24
|
+
raise Errno::EACCES
|
25
|
+
end
|
26
|
+
super
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require "massive_sitemap/writer/base"
|
3
|
+
|
4
|
+
# Write into String
|
5
|
+
# Perfect for testing porpuses
|
6
|
+
module MassiveSitemap
|
7
|
+
module Writer
|
8
|
+
|
9
|
+
class String < Base
|
10
|
+
|
11
|
+
def open_stream
|
12
|
+
@string ||= StringIO.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_s
|
16
|
+
@string.string rescue ""
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other_string)
|
20
|
+
to_s == other_string
|
21
|
+
end
|
22
|
+
|
23
|
+
def include?(other_string)
|
24
|
+
to_s.include?(other_string)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'massive_sitemap/writer/file'
|
2
|
+
require 'massive_sitemap/writer/gzip_file'
|
3
|
+
require 'massive_sitemap/builder/rotating'
|
4
|
+
require 'massive_sitemap/builder/index'
|
5
|
+
|
6
|
+
# Page at -> <base_url>
|
7
|
+
# http://example.de/dir/
|
8
|
+
|
9
|
+
# Index at
|
10
|
+
# http://sitemap.example.de/index-dir/
|
11
|
+
|
12
|
+
# Save at -> <document_full>
|
13
|
+
# /root/dir/ -> <document_root>/<document_path>
|
14
|
+
|
15
|
+
module MassiveSitemap
|
16
|
+
DEFAULTS = {
|
17
|
+
# global
|
18
|
+
:index_base_url => nil,
|
19
|
+
:gzip => false,
|
20
|
+
:writer => MassiveSitemap::Writer::File,
|
21
|
+
|
22
|
+
# writer
|
23
|
+
:document_full => '.',
|
24
|
+
:force_overwrite => false,
|
25
|
+
:filename => "sitemap.xml",
|
26
|
+
:index_filename => "sitemap_index.xml",
|
27
|
+
|
28
|
+
# builder
|
29
|
+
:base_url => nil,
|
30
|
+
:indent_by => 2,
|
31
|
+
}
|
32
|
+
|
33
|
+
def generate(options = {}, &block)
|
34
|
+
@options = DEFAULTS.merge options
|
35
|
+
|
36
|
+
unless @options[:base_url]
|
37
|
+
raise ArgumentError, 'you must specify ":base_url" string'
|
38
|
+
end
|
39
|
+
@options[:index_base_url] ||= @options[:base_url]
|
40
|
+
|
41
|
+
Dir.mkdir(@options[:document_full]) unless ::File.exists?(@options[:document_full])
|
42
|
+
|
43
|
+
@options[:writer] = MassiveSitemap::Writer::GzipFile if @options[:gzip]
|
44
|
+
|
45
|
+
@writer = @options[:writer].new @options
|
46
|
+
Builder::Rotating.generate(@writer, @options, &block)
|
47
|
+
|
48
|
+
@writer.options.merge!(:filename => @options[:index_filename], :force_overwrite => true)
|
49
|
+
Builder::Index.generate(@writer, @options.merge(:base_url => @options[:index_base_url]))
|
50
|
+
end
|
51
|
+
module_function :generate
|
52
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "massive_sitemap"
|
6
|
+
s.version = File.read("VERSION").to_s.strip
|
7
|
+
s.authors = ["Tobias Bielohlawek"]
|
8
|
+
s.email = ["tobi@soundcloud.com"]
|
9
|
+
s.homepage = "http://github.com/rngtng/massive_sitemap"
|
10
|
+
s.summary = %q{Build painfree sitemaps for websites with millions of pages}
|
11
|
+
s.description = %q{MassiveSitemap - build huge sitemaps painfree. Differential updates keeps generation time short and reduces load on DB. It's heavealy inspired by BigSitemaps and offers compatiable API}
|
12
|
+
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
|
18
|
+
%w(rake rspec).each do |gem|
|
19
|
+
s.add_development_dependency *gem.split(' ')
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
require "massive_sitemap/builder"
|
4
|
+
require "massive_sitemap/writer/string"
|
5
|
+
|
6
|
+
describe MassiveSitemap::Builder::Base do
|
7
|
+
let(:header) { %Q(<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">) }
|
8
|
+
let(:writer) { MassiveSitemap::Writer::String.new }
|
9
|
+
let(:builder) { MassiveSitemap::Builder.new(writer) }
|
10
|
+
|
11
|
+
describe "#arguments" do
|
12
|
+
it 'fail if no writer given' do
|
13
|
+
expect do
|
14
|
+
MassiveSitemap::Builder.new
|
15
|
+
end.to raise_error(ArgumentError)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
context "no content added" do
|
20
|
+
it 'empty per default' do
|
21
|
+
builder
|
22
|
+
|
23
|
+
writer.should == ""
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'generate basic skeleton' do
|
27
|
+
builder.init!
|
28
|
+
writer.should == header
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'generate basic skeleton on double init' do
|
32
|
+
builder.init!
|
33
|
+
builder.init!
|
34
|
+
writer.should == header
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'generate nothing when not inited' do
|
38
|
+
builder.close!
|
39
|
+
writer.should == ""
|
40
|
+
end
|
41
|
+
|
42
|
+
it "same result on double close" do
|
43
|
+
builder.close!
|
44
|
+
builder.close!
|
45
|
+
writer.should == ""
|
46
|
+
end
|
47
|
+
|
48
|
+
it "same result on double close" do
|
49
|
+
builder.init!
|
50
|
+
builder.close!
|
51
|
+
builder.close!
|
52
|
+
writer.should == %Q(#{header}\n</urlset>)
|
53
|
+
end
|
54
|
+
|
55
|
+
it "same result on double close" do
|
56
|
+
builder.init! do
|
57
|
+
add "test"
|
58
|
+
end
|
59
|
+
writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
context "adding content" do
|
64
|
+
it 'seq: generate one url' do
|
65
|
+
builder.add 'test'
|
66
|
+
builder.close!
|
67
|
+
writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
context "as block" do
|
72
|
+
it 'generate basic skeleton' do
|
73
|
+
MassiveSitemap::Builder.new(writer) {}
|
74
|
+
writer.should == ""
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'generate one url' do
|
78
|
+
MassiveSitemap::Builder.new(writer) do
|
79
|
+
add 'test'
|
80
|
+
end
|
81
|
+
writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
|
82
|
+
end
|
83
|
+
|
84
|
+
it 'generate one url with init!' do
|
85
|
+
MassiveSitemap::Builder.new(writer) do
|
86
|
+
init!
|
87
|
+
add 'test'
|
88
|
+
end
|
89
|
+
writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'generate one url with init! block' do
|
93
|
+
MassiveSitemap::Builder.new(writer) do
|
94
|
+
init! do
|
95
|
+
add 'test'
|
96
|
+
end
|
97
|
+
end
|
98
|
+
writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'generate one url with close!' do
|
102
|
+
MassiveSitemap::Builder.new(writer) do
|
103
|
+
add 'test'
|
104
|
+
close!
|
105
|
+
end
|
106
|
+
writer.should == %Q(#{header}\n <url>\n <loc>/test</loc>\n </url>\n</urlset>)
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'generate one url, no indent' do
|
110
|
+
MassiveSitemap::Builder.new(writer, :indent_by => 0) do
|
111
|
+
add_url! 'test'
|
112
|
+
end
|
113
|
+
writer.should == %Q(#{header}\n<url>\n<loc>test</loc>\n</url>\n</urlset>)
|
114
|
+
end
|
115
|
+
|
116
|
+
it 'generate two url' do
|
117
|
+
MassiveSitemap::Builder.new(writer) do
|
118
|
+
add_url! 'test'
|
119
|
+
add_url! 'test2'
|
120
|
+
end
|
121
|
+
writer.should == %Q(#{header}\n <url>\n <loc>test</loc>\n </url>\n <url>\n <loc>test2</loc>\n </url>\n</urlset>)
|
122
|
+
end
|
123
|
+
|
124
|
+
it 'generate one url with attrs' do
|
125
|
+
MassiveSitemap::Builder.new(writer, :indent_by => 0) do
|
126
|
+
add_url! 'test', :change_frequency => 'weekly', :priority => 0.8
|
127
|
+
end
|
128
|
+
writer.should include("<loc>test</loc>\n<changefreq>weekly</changefreq>\n<priority>0.8</priority>")
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
describe ".base_url" do
|
133
|
+
URLS = %w(
|
134
|
+
http://test.de/
|
135
|
+
test.de/
|
136
|
+
test.de
|
137
|
+
)
|
138
|
+
|
139
|
+
URLS.each do |url|
|
140
|
+
it "transforms to valid url" do
|
141
|
+
MassiveSitemap::Builder.new(writer, :base_url => url).send(:base_url).should == "http://test.de/"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
it "transforms to valid url with https" do
|
146
|
+
MassiveSitemap::Builder.new(writer, :base_url => "https://test.de/").send(:base_url).should == "https://test.de/"
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
require "massive_sitemap/builder/index"
|
4
|
+
require "massive_sitemap/writer/string"
|
5
|
+
|
6
|
+
describe MassiveSitemap::Builder::Index do
|
7
|
+
INDEX_HEADER = %Q(<?xml version="1.0" encoding="UTF-8"?>\n<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n<sitemap>)
|
8
|
+
|
9
|
+
let(:writer) { MassiveSitemap::Writer::String.new }
|
10
|
+
|
11
|
+
before do
|
12
|
+
writer.stub!(:streams).and_return(['test'])
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'generates one url' do
|
16
|
+
MassiveSitemap::Builder::Index.new(writer, :indent_by => 0)
|
17
|
+
|
18
|
+
writer.should == %Q(#{INDEX_HEADER}\n<loc>/test</loc>\n</sitemap>\n</sitemapindex>)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'include base_url' do
|
22
|
+
MassiveSitemap::Builder::Index.new(writer, :base_url => "test.de", :indent_by => 0)
|
23
|
+
writer.should include("<loc>http://test.de/test</loc>")
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
require "massive_sitemap/builder/rotating"
|
4
|
+
require "massive_sitemap/writer/string"
|
5
|
+
|
6
|
+
describe MassiveSitemap::Builder::Rotating do
|
7
|
+
let(:header) { %Q(<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">) }
|
8
|
+
let(:writer) { MassiveSitemap::Writer::String.new }
|
9
|
+
let(:builder) { MassiveSitemap::Builder::Rotating.new(writer) }
|
10
|
+
|
11
|
+
it 'raises error when max_per_sitemap > MAX_URLS' do
|
12
|
+
expect do
|
13
|
+
MassiveSitemap::Builder::Rotating.new(writer, :max_per_sitemap => MassiveSitemap::Builder::Rotating::NUM_URLS.max + 1)
|
14
|
+
end.to raise_error(ArgumentError)
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'generates one url' do
|
18
|
+
MassiveSitemap::Builder::Rotating.new(writer) do
|
19
|
+
add_url! 'test'
|
20
|
+
end
|
21
|
+
writer.should == %Q(#{header}\n <url>\n <loc>test</loc>\n </url>\n</urlset>)
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'generates two url' do
|
25
|
+
MassiveSitemap::Builder::Rotating.new(writer, :max_per_sitemap => 1) do
|
26
|
+
add_url! 'test'
|
27
|
+
add_url! 'test2'
|
28
|
+
end
|
29
|
+
writer.should == %Q(#{header}\n <url>\n <loc>test</loc>\n </url>\n</urlset>#{header}\n <url>\n <loc>test2</loc>\n </url>\n</urlset>)
|
30
|
+
end
|
31
|
+
|
32
|
+
context "with file" do
|
33
|
+
let(:filename) { 'sitemap.xml' }
|
34
|
+
let(:filename2) { 'sitemap-1.xml' }
|
35
|
+
let(:writer) { MassiveSitemap::Writer::File.new }
|
36
|
+
|
37
|
+
after do
|
38
|
+
FileUtils.rm(filename) rescue nil
|
39
|
+
FileUtils.rm(filename2) rescue nil
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'generates two url' do
|
43
|
+
expect do
|
44
|
+
expect do
|
45
|
+
MassiveSitemap::Builder::Rotating.new(writer, :max_per_sitemap => 1) do
|
46
|
+
add 'test'
|
47
|
+
add 'test2'
|
48
|
+
end
|
49
|
+
end.to change { File.exists?(filename) }.to(true)
|
50
|
+
end.to change { File.exists?(filename2) }.to(true)
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'generates two url when file exists' do
|
54
|
+
File.open(filename, 'w') {}
|
55
|
+
expect do
|
56
|
+
expect do
|
57
|
+
MassiveSitemap::Builder::Rotating.new(writer, :max_per_sitemap => 1) do
|
58
|
+
begin
|
59
|
+
add 'test'
|
60
|
+
rescue MassiveSitemap::Writer::File::FileExistsException => e
|
61
|
+
end
|
62
|
+
add 'test2'
|
63
|
+
end
|
64
|
+
end.to_not change { File.exists?(filename) }.to(true)
|
65
|
+
end.to change { File.exists?(filename2) }.to(true)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
describe "#filename_with_rotation" do
|
70
|
+
context "keeps filename" do
|
71
|
+
it "rotation is zero" do
|
72
|
+
builder.send(:filename_with_rotation, "sitemap.xml").should == "sitemap.xml"
|
73
|
+
end
|
74
|
+
|
75
|
+
it "rotation is zero" do
|
76
|
+
builder.send(:filename_with_rotation, "sitemap2.xml").should == "sitemap2.xml"
|
77
|
+
end
|
78
|
+
|
79
|
+
it "rotation is zero" do
|
80
|
+
builder.send(:filename_with_rotation, "sitemap.xml", nil).should == "sitemap.xml"
|
81
|
+
end
|
82
|
+
|
83
|
+
it "rotation is nil" do
|
84
|
+
builder.send(:filename_with_rotation, "sitemap.xml", 0).should == "sitemap.xml"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
context "rotation is 1" do
|
89
|
+
it "add prefix" do
|
90
|
+
builder.send(:filename_with_rotation, "sitemap.xml", 1).should == "sitemap-1.xml"
|
91
|
+
end
|
92
|
+
|
93
|
+
it "rotation is zero" do
|
94
|
+
builder.send(:filename_with_rotation, "sitemap-1.xml", 1).should == "sitemap-1.xml"
|
95
|
+
end
|
96
|
+
|
97
|
+
it "rotation is zero" do
|
98
|
+
builder.send(:filename_with_rotation, "sitemap-user.xml", 1).should == "sitemap-user-1.xml"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
describe "#split_filename" do
|
104
|
+
FILENAMES = {
|
105
|
+
nil => ["", nil, nil],
|
106
|
+
".xml" => ["", nil, ".xml"],
|
107
|
+
".xml.gz" => ["", nil, ".xml.gz"],
|
108
|
+
"sitemap" => ["sitemap", nil, nil],
|
109
|
+
"sitemap.xml" => ["sitemap", nil, ".xml"],
|
110
|
+
"sitemap.xml.gz" => ["sitemap", nil, ".xml.gz"],
|
111
|
+
"-1.xml" => ["", "-1", ".xml"],
|
112
|
+
"-1.xml.gz" => ["", "-1", ".xml.gz"],
|
113
|
+
"sitemap-1" => ["sitemap", "-1", nil],
|
114
|
+
"sitemap-1.xml" => ["sitemap", "-1", ".xml"],
|
115
|
+
"sitemap-1.xml.gz" => ["sitemap", "-1", ".xml.gz"],
|
116
|
+
"-user-1.xml" => ["-user", "-1", ".xml"],
|
117
|
+
"-user-1.xml.gz" => ["-user", "-1", ".xml.gz"],
|
118
|
+
"sitemap-user-1" => ["sitemap-user", "-1", nil],
|
119
|
+
"sitemap-user-1.xml" => ["sitemap-user", "-1", ".xml"],
|
120
|
+
"sitemap-user-1.xml.gz" => ["sitemap-user", "-1", ".xml.gz"],
|
121
|
+
"sitemap1" => ["sitemap1", nil, nil],
|
122
|
+
"sitemap1.xml" => ["sitemap1", nil, ".xml"],
|
123
|
+
"sitemap1.xml.gz" => ["sitemap1", nil, ".xml.gz"],
|
124
|
+
}
|
125
|
+
|
126
|
+
FILENAMES.each do |filename, expected|
|
127
|
+
it "splits filename #{filename} into #{expected.join(' ')}" do
|
128
|
+
builder.send(:split_filename, filename).should == expected
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
@@ -0,0 +1,158 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
require "massive_sitemap/writer/gzip_file"
|
4
|
+
|
5
|
+
describe MassiveSitemap do
|
6
|
+
let(:index_filename) { 'sitemap_index.xml' }
|
7
|
+
let(:filename) { 'sitemap.xml' }
|
8
|
+
let(:filename2) { 'sitemap2.xml' }
|
9
|
+
|
10
|
+
def output(file = filename)
|
11
|
+
`cat '#{file}'`
|
12
|
+
end
|
13
|
+
|
14
|
+
def gz_filename(file = filename)
|
15
|
+
"#{file}.gz"
|
16
|
+
end
|
17
|
+
|
18
|
+
after do
|
19
|
+
FileUtils.rm(index_filename) rescue nil
|
20
|
+
FileUtils.rm(filename) rescue nil
|
21
|
+
FileUtils.rm(filename2) rescue nil
|
22
|
+
end
|
23
|
+
|
24
|
+
describe "#initalize" do
|
25
|
+
it 'fail if no base_url given' do
|
26
|
+
expect do
|
27
|
+
MassiveSitemap.generate
|
28
|
+
end.to raise_error(ArgumentError)
|
29
|
+
end
|
30
|
+
|
31
|
+
it "does not create empty sitemap file" do
|
32
|
+
expect do
|
33
|
+
MassiveSitemap.generate(:base_url => 'test.de/')
|
34
|
+
end.to_not change { ::File.exists?(filename) }
|
35
|
+
end
|
36
|
+
|
37
|
+
context "custom writer" do
|
38
|
+
after do
|
39
|
+
FileUtils.rm(gz_filename(index_filename)) rescue nil
|
40
|
+
FileUtils.rm(gz_filename) rescue nil
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'takes gzips writer' do
|
44
|
+
expect do
|
45
|
+
MassiveSitemap.generate(:base_url => 'test.de/', :gzip => true) do
|
46
|
+
add "dummy"
|
47
|
+
end
|
48
|
+
end.to change { ::File.exists?(gz_filename) }.to(true)
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'takes custom writer' do
|
52
|
+
expect do
|
53
|
+
MassiveSitemap.generate(:base_url => 'test.de/', :writer => MassiveSitemap::Writer::GzipFile) do
|
54
|
+
add "dummy"
|
55
|
+
end
|
56
|
+
end.to change { ::File.exists?(gz_filename) }.to(true)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
describe "#generate" do
|
62
|
+
it 'adds url' do
|
63
|
+
MassiveSitemap.generate(:base_url => 'test.de') do
|
64
|
+
add "track/name"
|
65
|
+
end
|
66
|
+
output.should include("<loc>http://test.de/track/name</loc>")
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'adds url with root slash' do
|
70
|
+
MassiveSitemap.generate(:base_url => 'test.de/') do
|
71
|
+
add "/track/name"
|
72
|
+
end
|
73
|
+
output.should include("<loc>http://test.de/track/name</loc>")
|
74
|
+
end
|
75
|
+
|
76
|
+
it "doesn't fail for existing file" do
|
77
|
+
File.open(filename, 'w') {}
|
78
|
+
expect do
|
79
|
+
MassiveSitemap.generate(:base_url => 'test.de/') do
|
80
|
+
add "/track/name"
|
81
|
+
end
|
82
|
+
end.to_not change { File.stat(filename).mtime }
|
83
|
+
end
|
84
|
+
|
85
|
+
context 'nested generation' do
|
86
|
+
it 'adds url of nested builder' do
|
87
|
+
MassiveSitemap.generate(:base_url => 'test.de/') do
|
88
|
+
writer = @writer.class.new(@options.merge(:filename => 'sitemap2.xml'))
|
89
|
+
MassiveSitemap::Builder::Rotating.new(writer, @options) do
|
90
|
+
add "/set/name"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
output(filename2).should include("<loc>http://test.de/set/name</loc>")
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'executes block altough first sitemap exists' do
|
97
|
+
File.open(filename, 'w') {}
|
98
|
+
MassiveSitemap.generate(:base_url => 'test.de/') do
|
99
|
+
writer = @writer.class.new(@options.merge(:filename => 'sitemap2.xml'))
|
100
|
+
MassiveSitemap::Builder::Rotating.new(writer, @options) do
|
101
|
+
add "/set/name"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
output(filename2).should include("<loc>http://test.de/set/name</loc>")
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
describe "#generate_index" do
|
111
|
+
let(:lastmod) { File.stat(index_filename).mtime.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') }
|
112
|
+
|
113
|
+
it "does not create empty files" do
|
114
|
+
MassiveSitemap.generate(:base_url => 'test.de/')
|
115
|
+
::File.exists?(index_filename).should be_false
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'includes urls' do
|
119
|
+
MassiveSitemap.generate(:base_url => 'test.de/', :indent_by => 0) do
|
120
|
+
add "dummy"
|
121
|
+
end
|
122
|
+
|
123
|
+
output(index_filename).should include("<sitemap>\n<loc>http://test.de/sitemap.xml</loc>\n<lastmod>#{lastmod}</lastmod>\n</sitemap>")
|
124
|
+
end
|
125
|
+
|
126
|
+
it 'includes index base url' do
|
127
|
+
MassiveSitemap.generate(:base_url => 'test.de/', :index_base_url => 'index.de/') do
|
128
|
+
add "dummy"
|
129
|
+
end
|
130
|
+
|
131
|
+
output(index_filename).should include("<loc>http://index.de/sitemap.xml</loc>")
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'overwrites existing one' do
|
135
|
+
File.open(index_filename, 'w') {}
|
136
|
+
MassiveSitemap.generate(:base_url => 'test.de/', :index_base_url => 'index.de/') do
|
137
|
+
add "dummy"
|
138
|
+
end
|
139
|
+
|
140
|
+
output(index_filename).should include("<loc>http://index.de/sitemap.xml</loc>")
|
141
|
+
end
|
142
|
+
|
143
|
+
context "gziped" do
|
144
|
+
after do
|
145
|
+
FileUtils.rm(gz_filename(index_filename)) rescue nil
|
146
|
+
FileUtils.rm(gz_filename) rescue nil
|
147
|
+
end
|
148
|
+
|
149
|
+
it 'creates sitemap file' do
|
150
|
+
expect do
|
151
|
+
MassiveSitemap.generate(:base_url => 'test.de/', :writer => MassiveSitemap::Writer::GzipFile) do
|
152
|
+
add "dummy"
|
153
|
+
end
|
154
|
+
end.to change { ::File.exists?(gz_filename(index_filename)) }.to(true)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
require "massive_sitemap/writer/file"
|
4
|
+
|
5
|
+
describe MassiveSitemap::Writer::File do
|
6
|
+
let(:filename) { 'sitemap.xml' }
|
7
|
+
let(:filename2) { 'sitemap-1.xml' }
|
8
|
+
let(:writer) { MassiveSitemap::Writer::File.new.tap { |w| w.init! } }
|
9
|
+
|
10
|
+
after do
|
11
|
+
FileUtils.rm(filename) rescue nil
|
12
|
+
FileUtils.rm(filename2) rescue nil
|
13
|
+
end
|
14
|
+
|
15
|
+
describe "document_full" do
|
16
|
+
let(:folder) { "test" }
|
17
|
+
|
18
|
+
before do
|
19
|
+
Dir.mkdir(folder) unless ::File.exists?(folder)
|
20
|
+
end
|
21
|
+
|
22
|
+
after do
|
23
|
+
FileUtils.rm_rf(folder) rescue nil
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'appends document_full' do
|
27
|
+
expect do
|
28
|
+
MassiveSitemap::Writer::File.new(:document_full => folder).tap do |w|
|
29
|
+
w.init!
|
30
|
+
w.close!
|
31
|
+
end
|
32
|
+
end.to change { File.exists?("test/#{filename}") }.to(true)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'appends document_full' do
|
36
|
+
expect do
|
37
|
+
MassiveSitemap::Writer::File.new(:document_full => "#{folder}/").tap do |w|
|
38
|
+
w.init!
|
39
|
+
w.close!
|
40
|
+
end
|
41
|
+
end.to change { File.exists?("test/#{filename}") }.to(true)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'create file' do
|
46
|
+
expect do
|
47
|
+
writer.close!
|
48
|
+
end.to change { File.exists?(filename) }.to(true)
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'create second file on rotation' do
|
52
|
+
expect do
|
53
|
+
expect do
|
54
|
+
writer.close!
|
55
|
+
end.to change { File.exists?(filename) }.to(true)
|
56
|
+
writer.init!(:filename => filename2)
|
57
|
+
writer.close!
|
58
|
+
end.to change { File.exists?(filename2) }.to(true)
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'write into file' do
|
62
|
+
writer.print 'test'
|
63
|
+
writer.close!
|
64
|
+
`cat '#{filename}'`.should == "test"
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'init new file closes current' do
|
68
|
+
writer.print 'test'
|
69
|
+
writer.init!(:filename => filename2)
|
70
|
+
`cat '#{filename}'`.should == "test"
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'write into second file' do
|
74
|
+
writer.print 'test'
|
75
|
+
writer.init!(:filename => filename2)
|
76
|
+
writer.print 'test2'
|
77
|
+
writer.close!
|
78
|
+
`cat '#{filename2}'`.should == "test2"
|
79
|
+
end
|
80
|
+
|
81
|
+
context "opening write file" do
|
82
|
+
before do
|
83
|
+
File.open(filename, 'w') {}
|
84
|
+
end
|
85
|
+
|
86
|
+
after do
|
87
|
+
FileUtils.rm(filename) rescue nil
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'raises when file exits' do
|
91
|
+
writer = MassiveSitemap::Writer::File.new
|
92
|
+
expect do
|
93
|
+
writer.init!
|
94
|
+
end.to raise_error(MassiveSitemap::Writer::File::FileExistsException)
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'raises when file exits' do
|
98
|
+
writer = MassiveSitemap::Writer::File.new(:force_overwrite => true)
|
99
|
+
expect do
|
100
|
+
writer.init!
|
101
|
+
end.to_not raise_error(MassiveSitemap::Writer::File::FileExistsException)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require "massive_sitemap/writer/gzip_file"
|
2
|
+
|
3
|
+
describe MassiveSitemap::Writer::GzipFile do
|
4
|
+
let(:filename) { 'sitemap.xml' }
|
5
|
+
let(:tmp_filename) { "#{filename}.tmp" }
|
6
|
+
let(:gz_filename) { "#{filename}.gz" }
|
7
|
+
let(:writer) { MassiveSitemap::Writer::GzipFile.new.tap { |w| w.init! } }
|
8
|
+
|
9
|
+
after do
|
10
|
+
FileUtils.rm(filename) rescue nil
|
11
|
+
FileUtils.rm(tmp_filename) rescue nil
|
12
|
+
FileUtils.rm(gz_filename) rescue nil
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'creates gzip file' do
|
16
|
+
expect do
|
17
|
+
writer.close!
|
18
|
+
end.to change { File.exists?(gz_filename) }.from(false).to(true)
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require "massive_sitemap/writer/locking_file"
|
2
|
+
|
3
|
+
describe MassiveSitemap::Writer::LockingFile do
|
4
|
+
let(:filename) { 'sitemap.xml' }
|
5
|
+
let(:tmp_filename) { "#{filename}.tmp" }
|
6
|
+
let(:lock_file) { MassiveSitemap::Writer::LockingFile::LOCK_FILE }
|
7
|
+
let(:writer) { MassiveSitemap::Writer::LockingFile.new.tap { |w| w.init! } }
|
8
|
+
|
9
|
+
after do
|
10
|
+
FileUtils.rm(filename) rescue nil
|
11
|
+
FileUtils.rm(tmp_filename) rescue nil
|
12
|
+
FileUtils.rm(lock_file) rescue nil
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'creates lockfile' do
|
16
|
+
expect do
|
17
|
+
writer
|
18
|
+
end.to change { File.exists?(lock_file) }.to(true)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'deletes lockfile' do
|
22
|
+
writer
|
23
|
+
expect do
|
24
|
+
writer.close!
|
25
|
+
end.to change { File.exists?(lock_file) }.to(false)
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'fails if lockfile exists' do
|
29
|
+
File.open(lock_file, 'w') {}
|
30
|
+
expect do
|
31
|
+
writer
|
32
|
+
end.to raise_error
|
33
|
+
end
|
34
|
+
end
|
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: massive_sitemap
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.0.0.rc1
|
5
|
+
prerelease: 6
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Tobias Bielohlawek
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-02-12 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake
|
16
|
+
requirement: &70297299930500 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70297299930500
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rspec
|
27
|
+
requirement: &70297299930040 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70297299930040
|
36
|
+
description: MassiveSitemap - build huge sitemaps painfree. Differential updates keeps
|
37
|
+
generation time short and reduces load on DB. It's heavealy inspired by BigSitemaps
|
38
|
+
and offers compatiable API
|
39
|
+
email:
|
40
|
+
- tobi@soundcloud.com
|
41
|
+
executables: []
|
42
|
+
extensions: []
|
43
|
+
extra_rdoc_files: []
|
44
|
+
files:
|
45
|
+
- .gitignore
|
46
|
+
- .travis.yml
|
47
|
+
- CHANGELOG.md
|
48
|
+
- Gemfile
|
49
|
+
- Gemfile.lock
|
50
|
+
- README.md
|
51
|
+
- Rakefile
|
52
|
+
- VERSION
|
53
|
+
- lib/massive_sitemap.rb
|
54
|
+
- lib/massive_sitemap/builder.rb
|
55
|
+
- lib/massive_sitemap/builder/base.rb
|
56
|
+
- lib/massive_sitemap/builder/index.rb
|
57
|
+
- lib/massive_sitemap/builder/rotating.rb
|
58
|
+
- lib/massive_sitemap/ping.rb
|
59
|
+
- lib/massive_sitemap/writer/base.rb
|
60
|
+
- lib/massive_sitemap/writer/file.rb
|
61
|
+
- lib/massive_sitemap/writer/gzip_file.rb
|
62
|
+
- lib/massive_sitemap/writer/locking_file.rb
|
63
|
+
- lib/massive_sitemap/writer/string.rb
|
64
|
+
- massive_sitemap.gemspec
|
65
|
+
- spec/builder/base_spec.rb
|
66
|
+
- spec/builder/index_spec.rb
|
67
|
+
- spec/builder/rotating_spec.rb
|
68
|
+
- spec/massive_sitemap_spec.rb
|
69
|
+
- spec/spec_helper.rb
|
70
|
+
- spec/writer/file_spec.rb
|
71
|
+
- spec/writer/gzip_file_spec.rb
|
72
|
+
- spec/writer/locking_file_spec.rb
|
73
|
+
homepage: http://github.com/rngtng/massive_sitemap
|
74
|
+
licenses: []
|
75
|
+
post_install_message:
|
76
|
+
rdoc_options: []
|
77
|
+
require_paths:
|
78
|
+
- lib
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ! '>='
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
86
|
+
none: false
|
87
|
+
requirements:
|
88
|
+
- - ! '>'
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: 1.3.1
|
91
|
+
requirements: []
|
92
|
+
rubyforge_project:
|
93
|
+
rubygems_version: 1.8.15
|
94
|
+
signing_key:
|
95
|
+
specification_version: 3
|
96
|
+
summary: Build painfree sitemaps for websites with millions of pages
|
97
|
+
test_files:
|
98
|
+
- spec/builder/base_spec.rb
|
99
|
+
- spec/builder/index_spec.rb
|
100
|
+
- spec/builder/rotating_spec.rb
|
101
|
+
- spec/massive_sitemap_spec.rb
|
102
|
+
- spec/spec_helper.rb
|
103
|
+
- spec/writer/file_spec.rb
|
104
|
+
- spec/writer/gzip_file_spec.rb
|
105
|
+
- spec/writer/locking_file_spec.rb
|