simple_sitemap 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +63 -0
- data/Rakefile +2 -0
- data/lib/simple_sitemap.rb +41 -0
- data/lib/simple_sitemap/generators/base.rb +39 -0
- data/lib/simple_sitemap/generators/index.rb +52 -0
- data/lib/simple_sitemap/generators/sitemap.rb +134 -0
- data/lib/simple_sitemap/version.rb +3 -0
- data/lib/simple_sitemap/writers/gzip_writer.rb +18 -0
- data/lib/simple_sitemap/writers/plain_writer.rb +17 -0
- data/simple_sitemap.gemspec +23 -0
- metadata +69 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 ryanlower
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
# SimpleSitemap
|
2
|
+
|
3
|
+
A simple sitemap generator
|
4
|
+
|
5
|
+
## Basic Usage
|
6
|
+
|
7
|
+
### Configure
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
SimpleSitemap.configure do |config|
|
11
|
+
config.local_path = 'tmp/'
|
12
|
+
config.default_path = 'http://yoursite.com'
|
13
|
+
config.sitemap_location = 'http://yoursite.com/sitemap'
|
14
|
+
end
|
15
|
+
```
|
16
|
+
|
17
|
+
### Build your sitemap
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
SimpleSitemap.build do
|
21
|
+
add_path 'home'
|
22
|
+
add_path 'about'
|
23
|
+
sitemap 'ryan' do
|
24
|
+
6.times do |i|
|
25
|
+
add_url i, priority: 0.5
|
26
|
+
end
|
27
|
+
end
|
28
|
+
sitemap 'lower' do
|
29
|
+
5.times do |i|
|
30
|
+
add_url i
|
31
|
+
end
|
32
|
+
end
|
33
|
+
sitemap 'ryan' do
|
34
|
+
6.times do |i|
|
35
|
+
add_path i, priority: 1.0
|
36
|
+
end
|
37
|
+
end
|
38
|
+
add_url 'http://signup.yoursite.com'
|
39
|
+
add_path 'login'
|
40
|
+
end
|
41
|
+
```
|
42
|
+
|
43
|
+
## Hooks
|
44
|
+
|
45
|
+
SimpleSitemap gives you a after_write hook for easy access to sitemap files as they are written.
|
46
|
+
|
47
|
+
For example, to upload sitmaps to S3
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
SimpleSitemap.after_write do |filename|
|
51
|
+
s3 = Fog::Storage.new({
|
52
|
+
provider: 'AWS',
|
53
|
+
aws_access_key_id: 'YOUR_AWS_KEY',
|
54
|
+
aws_secret_access_key: 'YOUR_AWS_SECRET'
|
55
|
+
})
|
56
|
+
bucket = s3.directories.first
|
57
|
+
bucket.files.create(
|
58
|
+
:key => File.basename(filename),
|
59
|
+
:body => open(filename),
|
60
|
+
:public => true
|
61
|
+
)
|
62
|
+
end
|
63
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
|
2
|
+
require 'simple_sitemap/generators/base'
|
3
|
+
require 'simple_sitemap/generators/index'
|
4
|
+
require 'simple_sitemap/generators/sitemap'
|
5
|
+
require 'simple_sitemap/writers/gzip_writer'
|
6
|
+
require 'simple_sitemap/writers/plain_writer'
|
7
|
+
|
8
|
+
require 'simple_sitemap/version'
|
9
|
+
|
10
|
+
|
11
|
+
module SimpleSitemap
|
12
|
+
|
13
|
+
MAX_LINKS_PER_FILE = 50000
|
14
|
+
MAX_FILE_SIZE = 10*1024*1024 # 10 megabytes
|
15
|
+
|
16
|
+
class << self
|
17
|
+
|
18
|
+
attr_accessor :config, :hooks
|
19
|
+
|
20
|
+
def configure(&block)
|
21
|
+
@config = OpenStruct.new
|
22
|
+
@config.gzip = true
|
23
|
+
@config.verbose = false
|
24
|
+
yield @config
|
25
|
+
end
|
26
|
+
|
27
|
+
def build(opts={}, &block)
|
28
|
+
start_time = Time.now
|
29
|
+
generator = Generators::Sitemap.new @config, @hooks
|
30
|
+
generator.instance_eval &block
|
31
|
+
generator.write!
|
32
|
+
puts "Time taken: #{Time.now - start_time}" if @config.verbose
|
33
|
+
end
|
34
|
+
|
35
|
+
def after_write(&block)
|
36
|
+
@hooks ||= {}
|
37
|
+
@hooks[:after_write] = block
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module SimpleSitemap
|
5
|
+
|
6
|
+
module Generators
|
7
|
+
|
8
|
+
class Base
|
9
|
+
|
10
|
+
attr_writer :config, :hooks
|
11
|
+
|
12
|
+
def initialize(config, hooks)
|
13
|
+
@config = config
|
14
|
+
@hooks = hooks
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def write_file(filename, xml)
|
20
|
+
path = File.expand_path filename, @config.local_path
|
21
|
+
if @config.gzip
|
22
|
+
Writers::GzipWriter.new.write path, xml
|
23
|
+
else
|
24
|
+
Writers::PlainWriter.new.write path, xml
|
25
|
+
end
|
26
|
+
call_hooks path if @hooks
|
27
|
+
end
|
28
|
+
|
29
|
+
def call_hooks(path)
|
30
|
+
if @hooks[:after_write]
|
31
|
+
@hooks[:after_write].call path
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module SimpleSitemap
|
5
|
+
|
6
|
+
module Generators
|
7
|
+
|
8
|
+
class Index < Base
|
9
|
+
|
10
|
+
attr_accessor :sitemaps
|
11
|
+
|
12
|
+
def initialize(config, hooks)
|
13
|
+
super
|
14
|
+
@sitemaps = []
|
15
|
+
end
|
16
|
+
|
17
|
+
def add_sitemap(name)
|
18
|
+
url = if @config.sitemap_location[-1,1] == '/'
|
19
|
+
"#{@config.sitemap_location}#{name}"
|
20
|
+
else
|
21
|
+
"#{@config.sitemap_location}/#{name}"
|
22
|
+
end
|
23
|
+
@sitemaps << url
|
24
|
+
end
|
25
|
+
|
26
|
+
def write!
|
27
|
+
xml = to_xml
|
28
|
+
index_filename = 'index.xml'
|
29
|
+
if @config.gzip
|
30
|
+
index_filename << '.gz'
|
31
|
+
end
|
32
|
+
write_file index_filename, xml
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_xml
|
36
|
+
builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
|
37
|
+
xml.sitemapindex(xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9') do
|
38
|
+
@sitemaps.each do |sitemap_url|
|
39
|
+
xml.sitemap do
|
40
|
+
xml.loc sitemap_url
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
builder.to_xml
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module SimpleSitemap
|
5
|
+
|
6
|
+
module Generators
|
7
|
+
|
8
|
+
class Sitemap < Base
|
9
|
+
|
10
|
+
attr_accessor :sitemap_name, :sitemap_data
|
11
|
+
|
12
|
+
def initialize(config, hooks)
|
13
|
+
super
|
14
|
+
@sitemap_data = {}
|
15
|
+
enter_sitemap nil
|
16
|
+
end
|
17
|
+
|
18
|
+
def add_url(url, opts={})
|
19
|
+
link = { url: url }
|
20
|
+
link.merge! opts
|
21
|
+
@sitemap_data[@sitemap_name][:links] << link
|
22
|
+
@sitemap_data[@sitemap_name][:size] += 1
|
23
|
+
## TODO, add correct bytesize (this is an overestimate)
|
24
|
+
@sitemap_data[@sitemap_name][:bytesize] += 200
|
25
|
+
if @sitemap_data[@sitemap_name][:size] >= SimpleSitemap::MAX_LINKS_PER_FILE
|
26
|
+
write @sitemap_name
|
27
|
+
end
|
28
|
+
if @sitemap_data[@sitemap_name][:bytesize] >= SimpleSitemap::MAX_FILE_SIZE
|
29
|
+
write @sitemap_name
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def add_path(path, opts={})
|
34
|
+
if @config.default_path
|
35
|
+
if @config.default_path[-1,1] != '/' && path[0] != '/'
|
36
|
+
path = "/#{path}"
|
37
|
+
end
|
38
|
+
add_url "#{@config.default_path}#{path}", opts
|
39
|
+
else
|
40
|
+
raise "Can't add a path without configuring default_path"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def sitemap(name, &block)
|
45
|
+
enter_sitemap name
|
46
|
+
yield
|
47
|
+
exit_sitemap
|
48
|
+
end
|
49
|
+
|
50
|
+
def write!
|
51
|
+
@sitemap_data.keys.each do |name|
|
52
|
+
if @sitemap_data[name][:links].size > 0
|
53
|
+
write name
|
54
|
+
end
|
55
|
+
end
|
56
|
+
write_index
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def enter_sitemap(name)
|
62
|
+
unless @sitemap_data.has_key? name
|
63
|
+
@sitemap_data[name] = { index: 1 }
|
64
|
+
reset_sitemap_data name
|
65
|
+
end
|
66
|
+
@sitemap_name = name
|
67
|
+
end
|
68
|
+
|
69
|
+
def exit_sitemap
|
70
|
+
@sitemap_name = nil
|
71
|
+
end
|
72
|
+
|
73
|
+
def write_index
|
74
|
+
index = Generators::Index.new @config, @hooks
|
75
|
+
@sitemap_data.keys.each do |name|
|
76
|
+
(1...sitemap_data[name][:index]).each do |i|
|
77
|
+
index.add_sitemap sitemap_filename(name, i)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
index.write!
|
81
|
+
end
|
82
|
+
|
83
|
+
def write(name)
|
84
|
+
puts "Writing sitemap #{name}#{@sitemap_data[name][:index]}\t [#{@sitemap_data[name][:size]} urls]" if @config.verbose
|
85
|
+
xml = to_xml name
|
86
|
+
write_file sitemap_filename(name), xml
|
87
|
+
reset_sitemap_data name
|
88
|
+
@sitemap_data[name][:index] += 1
|
89
|
+
enter_sitemap name
|
90
|
+
end
|
91
|
+
|
92
|
+
def reset_sitemap_data(name)
|
93
|
+
default_sitemap_data = {
|
94
|
+
links: [],
|
95
|
+
size: 0,
|
96
|
+
bytesize: 110
|
97
|
+
}
|
98
|
+
@sitemap_data[name].merge! default_sitemap_data
|
99
|
+
end
|
100
|
+
|
101
|
+
def sitemap_filename(name, index=nil)
|
102
|
+
index ||= @sitemap_data[name][:index]
|
103
|
+
filename = if name
|
104
|
+
"#{name}_#{index}.xml"
|
105
|
+
else
|
106
|
+
"sitemap_#{index}.xml"
|
107
|
+
end
|
108
|
+
if @config.gzip
|
109
|
+
filename << '.gz'
|
110
|
+
end
|
111
|
+
filename
|
112
|
+
end
|
113
|
+
|
114
|
+
def to_xml(name)
|
115
|
+
builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
|
116
|
+
xml.urlset(xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9') do
|
117
|
+
@sitemap_data[name][:links].each do |url|
|
118
|
+
xml.url do
|
119
|
+
xml.loc url[:url]
|
120
|
+
# xml.lastmod url[:lastmod].utc if url[:lastmod]
|
121
|
+
xml.changefreq url[:changefreq] if url[:changefreq]
|
122
|
+
xml.priority url[:priority] if url[:priority]
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
builder.to_xml
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
|
134
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path('../lib', __FILE__)
|
3
|
+
require 'simple_sitemap/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
|
7
|
+
gem.name = 'simple_sitemap'
|
8
|
+
gem.version = SimpleSitemap::VERSION
|
9
|
+
|
10
|
+
gem.add_dependency 'nokogiri', '~> 1.5.0'
|
11
|
+
|
12
|
+
gem.authors = ['ryanlower']
|
13
|
+
gem.email = ['rpjlower@gmail.com']
|
14
|
+
gem.description = 'A simple sitemap generator'
|
15
|
+
gem.summary = 'Simple sitemap generator'
|
16
|
+
gem.homepage = 'https://github.com/academia-edu/simple-sitemap'
|
17
|
+
|
18
|
+
gem.files = `git ls-files`.split($\)
|
19
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
20
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
21
|
+
gem.require_paths = ['lib']
|
22
|
+
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: simple_sitemap
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- ryanlower
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-03-22 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: &70222280337640 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.5.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70222280337640
|
25
|
+
description: A simple sitemap generator
|
26
|
+
email:
|
27
|
+
- rpjlower@gmail.com
|
28
|
+
executables: []
|
29
|
+
extensions: []
|
30
|
+
extra_rdoc_files: []
|
31
|
+
files:
|
32
|
+
- .gitignore
|
33
|
+
- Gemfile
|
34
|
+
- LICENSE
|
35
|
+
- README.md
|
36
|
+
- Rakefile
|
37
|
+
- lib/simple_sitemap.rb
|
38
|
+
- lib/simple_sitemap/generators/base.rb
|
39
|
+
- lib/simple_sitemap/generators/index.rb
|
40
|
+
- lib/simple_sitemap/generators/sitemap.rb
|
41
|
+
- lib/simple_sitemap/version.rb
|
42
|
+
- lib/simple_sitemap/writers/gzip_writer.rb
|
43
|
+
- lib/simple_sitemap/writers/plain_writer.rb
|
44
|
+
- simple_sitemap.gemspec
|
45
|
+
homepage: https://github.com/academia-edu/simple-sitemap
|
46
|
+
licenses: []
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options: []
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ! '>='
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ! '>='
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
requirements: []
|
64
|
+
rubyforge_project:
|
65
|
+
rubygems_version: 1.8.11
|
66
|
+
signing_key:
|
67
|
+
specification_version: 3
|
68
|
+
summary: Simple sitemap generator
|
69
|
+
test_files: []
|