massive_sitemap 2.0.0.rc6 → 2.0.0.rc7
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +3 -2
- data/LICENCE +26 -0
- data/README.md +28 -7
- data/VERSION +1 -1
- data/lib/massive_sitemap/builder/base.rb +2 -1
- data/lib/massive_sitemap/builder/index.rb +2 -0
- data/lib/massive_sitemap/builder/rotating.rb +5 -5
- data/lib/massive_sitemap/builder.rb +2 -0
- data/lib/massive_sitemap/lock.rb +3 -2
- data/lib/massive_sitemap/ping.rb +3 -0
- data/lib/massive_sitemap/writer/base.rb +2 -0
- data/lib/massive_sitemap/writer/file.rb +5 -2
- data/lib/massive_sitemap/writer/gzip_file.rb +5 -2
- data/lib/massive_sitemap/writer/string.rb +2 -0
- data/lib/massive_sitemap.rb +19 -15
- data/spec/builder/rotating_spec.rb +4 -0
- data/spec/massive_sitemap_spec.rb +2 -2
- data/spec/writer/file_spec.rb +7 -5
- metadata +7 -6
data/CHANGELOG.md
CHANGED
data/LICENCE
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
Copyright (c) 2012, SoundCloud, Tobias Bielohlawek
|
2
|
+
|
3
|
+
All rights reserved.
|
4
|
+
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
7
|
+
|
8
|
+
- Redistributions of source code must retain the above copyright notice, this
|
9
|
+
list of conditions and the following disclaimer.
|
10
|
+
- Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
- Neither the name of the SoundCloud nor the names of its contributors may be
|
14
|
+
used to endorse or promote products derived from this software without
|
15
|
+
specific prior written permission.
|
16
|
+
|
17
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
18
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
19
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
20
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
21
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
22
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
23
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
24
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
25
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
26
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
CHANGED
@@ -1,6 +1,4 @@
|
|
1
|
-
# MassiveSitemap
|
2
|
-
|
3
|
-
[![](http://travis-ci.org/rngtng/massive_sitemap.png)](http://travis-ci.org/rngtng/massive_sitemap)
|
1
|
+
# MassiveSitemap [![](http://travis-ci.org/rngtng/massive_sitemap.png)](http://travis-ci.org/rngtng/massive_sitemap)
|
4
2
|
|
5
3
|
Build painfree sitemaps for websites with millions of pages
|
6
4
|
|
@@ -10,13 +8,36 @@ It implements various generation stategies, e.g. to split large Sitemaps into mu
|
|
10
8
|
## Usage
|
11
9
|
|
12
10
|
```ruby
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
11
|
+
require 'massive_sitemap'
|
12
|
+
|
13
|
+
index_url = MassiveSitemap.generate(:url => 'test.de/') do
|
14
|
+
add "dummy"
|
15
|
+
end
|
16
|
+
MassiveSitemap.ping(index_url)
|
17
|
+
|
17
18
|
```
|
18
19
|
|
19
20
|
* clear structure
|
20
21
|
* allows extension (S3)
|
21
22
|
|
22
23
|
MassiveSitemap - build huge sitemaps painfree. Differential updates keeps generation time short and reduces load on DB. It's heavealy inspired by BigSitemaps and offers compatiable API
|
24
|
+
|
25
|
+
## Dependencies
|
26
|
+
|
27
|
+
Obviously depends on a S3 library which [S3 gem](https://github.com/qoobaa/s3)
|
28
|
+
|
29
|
+
|
30
|
+
## Contributing
|
31
|
+
|
32
|
+
We'll check out your contribution if you:
|
33
|
+
|
34
|
+
- Provide a comprehensive suite of tests for your fork.
|
35
|
+
- Have a clear and documented rationale for your changes.
|
36
|
+
- Package these up in a pull request.
|
37
|
+
|
38
|
+
We'll do our best to help you out with any contribution issues you may have.
|
39
|
+
|
40
|
+
|
41
|
+
## License
|
42
|
+
|
43
|
+
The license is included as LICENSE in this directory.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.0.0.
|
1
|
+
2.0.0.rc7
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
|
2
|
+
|
1
3
|
# MassiveSitemap Builder
|
2
4
|
# The purpose of a builder is create the XML files: continaing header and all other tag (with attributes).
|
3
5
|
#
|
@@ -91,7 +93,6 @@ module MassiveSitemap
|
|
91
93
|
@writer.print "<#{name}#{attrs}>"
|
92
94
|
end
|
93
95
|
|
94
|
-
private
|
95
96
|
def process(&block)
|
96
97
|
if block
|
97
98
|
instance_eval(&block)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
|
2
|
+
|
1
3
|
require "massive_sitemap/builder/base"
|
2
4
|
|
3
5
|
module MassiveSitemap
|
@@ -9,16 +11,14 @@ module MassiveSitemap
|
|
9
11
|
:max_urls => NUM_URLS.max
|
10
12
|
)
|
11
13
|
|
12
|
-
def
|
13
|
-
@urls
|
14
|
-
|
14
|
+
def header!(&block)
|
15
|
+
@urls = 0
|
15
16
|
super
|
16
17
|
end
|
17
18
|
|
18
19
|
def add_url!(location, attrs = {})
|
19
|
-
if @urls >= @options[:max_urls]
|
20
|
+
if @urls.to_i >= @options[:max_urls]
|
20
21
|
close!
|
21
|
-
@urls = 0
|
22
22
|
end
|
23
23
|
super
|
24
24
|
@urls += 1
|
data/lib/massive_sitemap/lock.rb
CHANGED
data/lib/massive_sitemap/ping.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
|
2
|
+
|
1
3
|
# MassiveSitemap Writer
|
2
4
|
# The purpose of a writer is to store the written data, and to keep the state of existing data.
|
3
5
|
# It offers an API to which a builder can talk to, and a Interface which other writers have to implement
|
@@ -1,7 +1,10 @@
|
|
1
|
+
# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
|
2
|
+
|
1
3
|
require 'fileutils'
|
2
4
|
require 'massive_sitemap/writer/base'
|
3
5
|
|
4
|
-
#
|
6
|
+
# MassiveSitemap Writer File
|
7
|
+
# Extension to base writer for writing into file(s).
|
5
8
|
|
6
9
|
module MassiveSitemap
|
7
10
|
module Writer
|
@@ -27,7 +30,7 @@ module MassiveSitemap
|
|
27
30
|
::File.dirname(tmp_filename).tap do |dir|
|
28
31
|
FileUtils.mkdir_p(dir) unless ::File.exists?(dir)
|
29
32
|
end
|
30
|
-
::File.open(tmp_filename, 'w
|
33
|
+
::File.open(tmp_filename, 'w')
|
31
34
|
end
|
32
35
|
|
33
36
|
def close_stream(file)
|
@@ -1,7 +1,10 @@
|
|
1
|
-
|
1
|
+
# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
|
2
2
|
|
3
|
+
require 'zlib'
|
3
4
|
require "massive_sitemap/writer/file"
|
4
|
-
|
5
|
+
|
6
|
+
# MassiveSitemap Writer GzipFile
|
7
|
+
# Extension to file writer for gzip support
|
5
8
|
|
6
9
|
module MassiveSitemap
|
7
10
|
module Writer
|
data/lib/massive_sitemap.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
|
2
|
+
|
1
3
|
require 'massive_sitemap/writer/file'
|
2
4
|
require 'massive_sitemap/writer/gzip_file'
|
3
5
|
require 'massive_sitemap/builder/rotating'
|
@@ -5,27 +7,29 @@ require 'massive_sitemap/builder/index'
|
|
5
7
|
require 'massive_sitemap/lock'
|
6
8
|
require 'massive_sitemap/ping'
|
7
9
|
|
8
|
-
#
|
9
|
-
#
|
10
|
-
|
11
|
-
#
|
12
|
-
# http://
|
13
|
-
|
14
|
-
#
|
15
|
-
# /
|
10
|
+
# MassiveSitemap
|
11
|
+
# Example Standard setup of a writer, rotating and index builder.
|
12
|
+
# Common parameters:
|
13
|
+
# required:
|
14
|
+
# :url - Url of your website e.g http://example.de/dir/
|
15
|
+
#
|
16
|
+
# optional:
|
17
|
+
# :index_url - Url of your index website e.g http://example.de/sitemap
|
18
|
+
# :root - directory where files get written to e.g. /var/sitemap
|
19
|
+
# :gzip - wether to gzip files or not
|
20
|
+
# :writer - custom wirter
|
16
21
|
|
17
22
|
module MassiveSitemap
|
18
23
|
DEFAULTS = {
|
19
24
|
# global
|
20
25
|
:index_url => nil,
|
26
|
+
:index_filename => "sitemap_index.xml",
|
21
27
|
:gzip => false,
|
22
|
-
:writer => MassiveSitemap::Writer::File,
|
23
28
|
|
24
29
|
# writer
|
25
30
|
:root => '.',
|
26
31
|
:force_overwrite => false,
|
27
32
|
:filename => "sitemap.xml",
|
28
|
-
:index_filename => "sitemap_index.xml",
|
29
33
|
|
30
34
|
# builder
|
31
35
|
:url => nil,
|
@@ -37,18 +41,18 @@ module MassiveSitemap
|
|
37
41
|
@options = DEFAULTS.merge options
|
38
42
|
|
39
43
|
unless @options[:url]
|
40
|
-
raise ArgumentError,
|
44
|
+
raise ArgumentError, %Q(":url" not given)
|
41
45
|
end
|
42
46
|
@options[:index_url] ||= @options[:url]
|
43
47
|
|
44
48
|
if @options[:max_urls] && !Builder::Rotating::NUM_URLS.member?(@options[:max_urls])
|
45
|
-
raise ArgumentError, %Q(":max_urls" must be greater than #{NUM_URLS.min} and smaller than #{NUM_URLS.max})
|
49
|
+
raise ArgumentError, %Q(":max_urls" must be greater than #{Builder::Rotating::NUM_URLS.min} and smaller than #{Builder::Rotating::NUM_URLS.max})
|
46
50
|
end
|
47
51
|
|
48
|
-
@
|
52
|
+
@writer = @options.delete(:writer)
|
53
|
+
@writer ||= (@options.delete(:gzip) ? Writer::GzipFile : Writer::File).new
|
49
54
|
|
50
|
-
@writer
|
51
|
-
Builder::Rotating.generate(@writer, @options, &block)
|
55
|
+
Builder::Rotating.generate(@writer.set(@options), @options, &block)
|
52
56
|
|
53
57
|
@writer.set(:filename => @options[:index_filename])
|
54
58
|
Builder::Index.generate(@writer, @options.merge(:url => @options[:index_url]))
|
@@ -57,7 +57,7 @@ describe MassiveSitemap do
|
|
57
57
|
|
58
58
|
it 'takes custom writer' do
|
59
59
|
expect do
|
60
|
-
MassiveSitemap.generate(:url => 'test.de/', :writer => MassiveSitemap::Writer::GzipFile) do
|
60
|
+
MassiveSitemap.generate(:url => 'test.de/', :writer => MassiveSitemap::Writer::GzipFile.new) do
|
61
61
|
add "dummy"
|
62
62
|
end
|
63
63
|
end.to change { ::File.exists?(gz_filename) }.to(true)
|
@@ -157,7 +157,7 @@ describe MassiveSitemap do
|
|
157
157
|
|
158
158
|
it 'creates sitemap file' do
|
159
159
|
expect do
|
160
|
-
MassiveSitemap.generate(:url => 'test.de/', :writer => MassiveSitemap::Writer::GzipFile) do
|
160
|
+
MassiveSitemap.generate(:url => 'test.de/', :writer => MassiveSitemap::Writer::GzipFile.new) do
|
161
161
|
add "dummy"
|
162
162
|
end
|
163
163
|
end.to change { ::File.exists?(gz_filename(index_filename)) }.to(true)
|
data/spec/writer/file_spec.rb
CHANGED
@@ -180,35 +180,37 @@ describe MassiveSitemap::Writer::File do
|
|
180
180
|
|
181
181
|
describe "chaos_monkey_stream_ids" do
|
182
182
|
let(:writer) { MassiveSitemap::Writer::File.new }
|
183
|
+
let(:day) { 60 * 60 * 24}
|
183
184
|
|
184
185
|
context "one file" do
|
186
|
+
|
185
187
|
it { writer.send(:chaos_monkey_stream_ids, %w(sitemap-1.xml), 0).should == [] }
|
186
188
|
it { writer.send(:chaos_monkey_stream_ids, %w(sitemap-1.xml), 1).should == %w(sitemap-1.xml) }
|
187
189
|
|
188
190
|
it "keeps file for 2 days" do
|
189
|
-
Time.stub!(:now).and_return(
|
191
|
+
Time.stub!(:now).and_return(1 * day)
|
190
192
|
writer.send(:chaos_monkey_stream_ids, %w(sitemap-1.xml), 2).should == []
|
191
193
|
end
|
192
194
|
|
193
195
|
it "deletes file on snd day" do
|
194
|
-
Time.stub!(:now).and_return(
|
196
|
+
Time.stub!(:now).and_return(2 * day)
|
195
197
|
writer.send(:chaos_monkey_stream_ids, %w(sitemap-1.xml), 2).should == %w(sitemap-1.xml)
|
196
198
|
end
|
197
199
|
end
|
198
200
|
|
199
201
|
context "many files" do
|
200
202
|
it "keeps file for 2 days" do
|
201
|
-
Time.stub!(:now).and_return(
|
203
|
+
Time.stub!(:now).and_return(1 * day)
|
202
204
|
writer.send(:chaos_monkey_stream_ids, %w(sitemap-1.xml sitemap-2.xml sitemap-3.xml), 2).should == %w(sitemap-2.xml)
|
203
205
|
end
|
204
206
|
|
205
207
|
it "deletes file on 2nd day" do
|
206
|
-
Time.stub!(:now).and_return(
|
208
|
+
Time.stub!(:now).and_return(2 * day)
|
207
209
|
writer.send(:chaos_monkey_stream_ids, %w(sitemap-1.xml sitemap-2.xml sitemap-3.xml), 2).should == %w(sitemap-1.xml sitemap-3.xml)
|
208
210
|
end
|
209
211
|
|
210
212
|
it "deletes file on 3rd day" do
|
211
|
-
Time.stub!(:now).and_return(
|
213
|
+
Time.stub!(:now).and_return(3 * day)
|
212
214
|
writer.send(:chaos_monkey_stream_ids, %w(sitemap-1.xml sitemap-2.xml sitemap-3.xml), 2).should == %w(sitemap-2.xml)
|
213
215
|
end
|
214
216
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: massive_sitemap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.0.
|
4
|
+
version: 2.0.0.rc7
|
5
5
|
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-16 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
16
|
-
requirement: &
|
16
|
+
requirement: &70312466352360 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70312466352360
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &70312466351900 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70312466351900
|
36
36
|
description: MassiveSitemap - build huge sitemaps painfree. Differential updates keeps
|
37
37
|
generation time short and reduces load on DB. It's heavealy inspired by BigSitemaps
|
38
38
|
and offers compatiable API
|
@@ -47,6 +47,7 @@ files:
|
|
47
47
|
- CHANGELOG.md
|
48
48
|
- Gemfile
|
49
49
|
- Gemfile.lock
|
50
|
+
- LICENCE
|
50
51
|
- README.md
|
51
52
|
- Rakefile
|
52
53
|
- VERSION
|