massive_sitemap 2.0.0.rc7 → 2.0.0.rc8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +14 -6
- data/VERSION +1 -1
- data/lib/massive_sitemap/writer/file.rb +4 -4
- data/spec/writer/file_spec.rb +6 -1
- metadata +6 -6
data/README.md
CHANGED
|
@@ -3,10 +3,14 @@
|
|
|
3
3
|
Build painfree sitemaps for websites with millions of pages
|
|
4
4
|
|
|
5
5
|
MassiveSitemap is a successor project of [BigSitemap](https://github.com/alexrabarts/big_sitemap), a [Sitemap](http://sitemaps.org) generator for websites with millions of pages.
|
|
6
|
-
It implements various generation stategies, e.g. to split large Sitemaps into multiple files, gzip files to minimize bandwidth usage, or incremental updates.
|
|
6
|
+
It implements various generation stategies, e.g. to split large Sitemaps into multiple files, gzip files to minimize bandwidth usage, or incremental updates. Its API is very similar to _BigSitemap_, can be set up with just a few lines of code and is compatible with just about any framework.
|
|
7
|
+
|
|
7
8
|
|
|
8
9
|
## Usage
|
|
9
10
|
|
|
11
|
+
A simple usecase which fits most of the standard scenarios:
|
|
12
|
+
|
|
13
|
+
|
|
10
14
|
```ruby
|
|
11
15
|
require 'massive_sitemap'
|
|
12
16
|
|
|
@@ -17,15 +21,19 @@ MassiveSitemap.ping(index_url)
|
|
|
17
21
|
|
|
18
22
|
```
|
|
19
23
|
|
|
20
|
-
|
|
21
|
-
|
|
24
|
+
## Structure
|
|
25
|
+
|
|
26
|
+
MassiveSitemap is structured in two major parts: `Builder` and `Writer`. Both offer an abstract interface which is tailored to the specific needs.
|
|
27
|
+
|
|
28
|
+
### Builder
|
|
29
|
+
`Builder` keeps all the sitemap structure related logic to build the XML data. `Builder::Index` does the similar for the index structure. `Builder::Rotation` is an extension to make sure no more than 50k urls are written per files, according to sitemap specs.
|
|
22
30
|
|
|
23
|
-
MassiveSitemap - build huge sitemaps painfree. Differential updates keeps generation time short and reduces load on DB. It's heavealy inspired by BigSitemaps and offers compatiable API
|
|
24
31
|
|
|
25
|
-
|
|
32
|
+
### Writer
|
|
33
|
+
The `Writer` takes care of the storage. At top level, that's just a string (`Writer::String`), however `Writer::File` stores to files, `Writer::GzipFile` gzips it as well. `Writer` keeps the state of the files and implements various strategies how to update the files.
|
|
26
34
|
|
|
27
|
-
Obviously depends on a S3 library which [S3 gem](https://github.com/qoobaa/s3)
|
|
28
35
|
|
|
36
|
+
Further extension and customization can easily be done, e.g. a `Writer::S3` [extenstion](https://github.com/rngtng/massive_sitemap-writer-s3) stores the sitemap files to Amazon S3 .
|
|
29
37
|
|
|
30
38
|
## Contributing
|
|
31
39
|
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
2.0.0.
|
|
1
|
+
2.0.0.rc8
|
|
@@ -85,10 +85,10 @@ module MassiveSitemap
|
|
|
85
85
|
|
|
86
86
|
def chaos_monkey_stream_ids(stream_id_keys, days)
|
|
87
87
|
return [] if days < 1
|
|
88
|
-
offset = Time.now.to_i / (24 * 60 * 60)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
end
|
|
88
|
+
offset = 1 + Time.now.to_i / (24 * 60 * 60)
|
|
89
|
+
stream_id_keys.select do |stream_id_key|
|
|
90
|
+
(stream_id_key.scan(/\d+/).first.to_i + offset) % days == 0
|
|
91
|
+
end
|
|
92
92
|
end
|
|
93
93
|
|
|
94
94
|
def delete_stream_ids(to_delete)
|
data/spec/writer/file_spec.rb
CHANGED
|
@@ -179,7 +179,7 @@ describe MassiveSitemap::Writer::File do
|
|
|
179
179
|
end
|
|
180
180
|
|
|
181
181
|
describe "chaos_monkey_stream_ids" do
|
|
182
|
-
let(:writer) {
|
|
182
|
+
let(:writer) { MassiveSitemap::Writer::File.new }
|
|
183
183
|
let(:day) { 60 * 60 * 24}
|
|
184
184
|
|
|
185
185
|
context "one file" do
|
|
@@ -213,6 +213,11 @@ describe MassiveSitemap::Writer::File do
|
|
|
213
213
|
Time.stub!(:now).and_return(3 * day)
|
|
214
214
|
writer.send(:chaos_monkey_stream_ids, %w(sitemap-1.xml sitemap-2.xml sitemap-3.xml), 2).should == %w(sitemap-2.xml)
|
|
215
215
|
end
|
|
216
|
+
|
|
217
|
+
it "deletes file on 3rd day even when new file" do
|
|
218
|
+
Time.stub!(:now).and_return(3 * day)
|
|
219
|
+
writer.send(:chaos_monkey_stream_ids, %w(sitemap_tracks.xml sitemap-1.xml sitemap-2.xml sitemap-3.xml), 2).should == %w(sitemap_tracks.xml sitemap-2.xml)
|
|
220
|
+
end
|
|
216
221
|
end
|
|
217
222
|
end
|
|
218
223
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: massive_sitemap
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.0.0.
|
|
4
|
+
version: 2.0.0.rc8
|
|
5
5
|
prerelease: 6
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,11 +9,11 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2012-
|
|
12
|
+
date: 2012-03-14 00:00:00.000000000Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: rake
|
|
16
|
-
requirement: &
|
|
16
|
+
requirement: &70360986010320 !ruby/object:Gem::Requirement
|
|
17
17
|
none: false
|
|
18
18
|
requirements:
|
|
19
19
|
- - ! '>='
|
|
@@ -21,10 +21,10 @@ dependencies:
|
|
|
21
21
|
version: '0'
|
|
22
22
|
type: :development
|
|
23
23
|
prerelease: false
|
|
24
|
-
version_requirements: *
|
|
24
|
+
version_requirements: *70360986010320
|
|
25
25
|
- !ruby/object:Gem::Dependency
|
|
26
26
|
name: rspec
|
|
27
|
-
requirement: &
|
|
27
|
+
requirement: &70360986009860 !ruby/object:Gem::Requirement
|
|
28
28
|
none: false
|
|
29
29
|
requirements:
|
|
30
30
|
- - ! '>='
|
|
@@ -32,7 +32,7 @@ dependencies:
|
|
|
32
32
|
version: '0'
|
|
33
33
|
type: :development
|
|
34
34
|
prerelease: false
|
|
35
|
-
version_requirements: *
|
|
35
|
+
version_requirements: *70360986009860
|
|
36
36
|
description: MassiveSitemap - build huge sitemaps painfree. Differential updates keeps
|
|
37
37
|
generation time short and reduces load on DB. It's heavealy inspired by BigSitemaps
|
|
38
38
|
and offers compatiable API
|