static_sitemap_tasks 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/Gemfile +6 -0
- data/MIT-LICENSE +22 -0
- data/README.md +59 -0
- data/Rakefile +7 -0
- data/lib/static_sitemap_tasks.rb +150 -0
- data/static_sitemap_tasks.gemspec +25 -0
- metadata +76 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Originally Authored by Chris Martin on 2007-04-30 (http://chriscodes.com/articles/view/54)
|
2
|
+
Updated by Tom Cocca on 2008-10-10.
|
3
|
+
Updated by Michael Leinartas on 2011-10-12
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
== Generate Sitemap Plugin
|
2
|
+
|
3
|
+
Originally Authored by Chris Marting (http://chriscodes.com/articles/view/54)
|
4
|
+
Updates by Tom Cocca
|
5
|
+
Updates include:
|
6
|
+
- Adding a YAML config file to set URL and Change Frequency
|
7
|
+
|
8
|
+
|
9
|
+
== Generate Sitemap Rake Task
|
10
|
+
|
11
|
+
This is a Rails plugin that consists of a rake task to generate a sitemap.xml file.
|
12
|
+
The task crawls a domain (specified in a YAML config per environment) for all it's url's,
|
13
|
+
then builds the sitemap.xml file in public.
|
14
|
+
|
15
|
+
Since this simply crawls a domain for url's, this could be used to generate
|
16
|
+
sitemaps for any site, not just a Rails application.
|
17
|
+
|
18
|
+
|
19
|
+
== Requirements
|
20
|
+
|
21
|
+
Both of these are available via RubyGems
|
22
|
+
Hpricot - http://code.whytheluckystiff.net/hpricot/
|
23
|
+
Builder - http://rubyforge.org/projects/builder/
|
24
|
+
|
25
|
+
|
26
|
+
== Configuration/Usage
|
27
|
+
|
28
|
+
On initialization generate_sitemap will copy the generate_sitemap_key.yml file you your
|
29
|
+
RAILS_ROOT/config directory.
|
30
|
+
|
31
|
+
Open this file and specify your domain for the different environments and specify the changefreq for
|
32
|
+
your site.
|
33
|
+
|
34
|
+
For changefreq options see this page: http://www.sitemaps.org/protocol.php
|
35
|
+
I haven't devised a way to set this dynamically, as most pages will vary.
|
36
|
+
|
37
|
+
The plugin will pull the correct domain name for crawling the site by using the RAILS_ENV variable
|
38
|
+
in the rake task.
|
39
|
+
|
40
|
+
Run the task with the command
|
41
|
+
rake plugin:generate_sitemap
|
42
|
+
|
43
|
+
Go to http://localhost:3000/sitemap.xml (or open public/sitemap.xml)
|
44
|
+
|
45
|
+
|
46
|
+
== TODO
|
47
|
+
|
48
|
+
- set changefreq, lastmod, priority dynamically during generation
|
49
|
+
- allow generation of sitemap index files
|
50
|
+
- write tests
|
51
|
+
- allow for exclusions to be specified in an array
|
52
|
+
|
53
|
+
|
54
|
+
== More Info
|
55
|
+
|
56
|
+
http://www.sitemaps.org/protocol.php
|
57
|
+
|
58
|
+
Questions, comments, patches, etc. can be sent to
|
59
|
+
tom dot cocca at gmail dot com
|
data/Rakefile
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'builder'
|
3
|
+
require 'hpricot'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
|
7
|
+
module SitemapGenerator
|
8
|
+
class Tasks
|
9
|
+
include Rake::DSL
|
10
|
+
|
11
|
+
def self.install(options = {})
|
12
|
+
dir = options.delete(:dir) || Dir.pwd
|
13
|
+
self.new(options).install
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(options = {})
|
17
|
+
# Root of files to crawl
|
18
|
+
@public_root = options[:public_root] || Dir.pwd
|
19
|
+
# Change frequency - see: http://www.sitemaps.org/protocol.php#changefreqdef
|
20
|
+
@change_frequency = options[:change_frequency]
|
21
|
+
# Canonical domain of published site
|
22
|
+
@base_url = options[:base_url]
|
23
|
+
# Index pages
|
24
|
+
@index_files = options[:index_files] || [ 'index.html', 'index.htm' ]
|
25
|
+
# Compress output to sitemap.xml.gz
|
26
|
+
@gzip_output = options[:gzip_output] || true
|
27
|
+
end
|
28
|
+
|
29
|
+
def install
|
30
|
+
desc "Generate a sitemap based on the contents of #{@public_root}"
|
31
|
+
task 'generate_sitemap' do
|
32
|
+
generate_sitemap
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def generate_sitemap
|
37
|
+
# holds pages to go into map, and pages crawled
|
38
|
+
@pages = []
|
39
|
+
@pages_crawled = []
|
40
|
+
|
41
|
+
# start with index pages
|
42
|
+
crawl_for_links('/')
|
43
|
+
|
44
|
+
# crawl each page in pages array unless it's already been crawled
|
45
|
+
@pages.each {|page|
|
46
|
+
crawl_for_links(page) unless @pages_crawled.include?(page)
|
47
|
+
}
|
48
|
+
|
49
|
+
# create xml for sitemap
|
50
|
+
xml = Builder::XmlMarkup.new( :indent => 2 )
|
51
|
+
xml.instruct!
|
52
|
+
xml.comment! "Generated on: " + Time.now.to_s
|
53
|
+
xml.urlset("xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9") {
|
54
|
+
# loop through array of pages, and build sitemap.xml
|
55
|
+
@pages.sort.each {|link|
|
56
|
+
xml.url {
|
57
|
+
xml.loc URI.join(@base_url, link)
|
58
|
+
# TODO - set changefreq dynamically per page
|
59
|
+
xml.changefreq @change_frequency unless @change_frequency.nil?
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
# convert builder xml to xml string, and save
|
65
|
+
xml_string = xml.to_s.gsub("<to_s/>","")
|
66
|
+
filename = File.join(@public_root,'sitemap.xml')
|
67
|
+
|
68
|
+
if @gzip_output
|
69
|
+
require 'zlib'
|
70
|
+
filename << '.gz'
|
71
|
+
xml_file = Zlib::GzipWriter.open(filename)
|
72
|
+
else
|
73
|
+
xml_file = File.open(filename, 'w')
|
74
|
+
end
|
75
|
+
|
76
|
+
xml_file << xml_string
|
77
|
+
xml_file.close
|
78
|
+
end
|
79
|
+
|
80
|
+
# uses Hpricot to grab links from a URI
|
81
|
+
# adds uri to @pages_crawled
|
82
|
+
# loops each link found
|
83
|
+
# adds link to pages array if it should be included, unless it already exists
|
84
|
+
def crawl_for_links(link_path)
|
85
|
+
if link_path.include?('http')
|
86
|
+
return unless link_path.include?(@base_url)
|
87
|
+
link_path.sub!(@base_url,'')
|
88
|
+
end
|
89
|
+
file_path = resolve_file_path(File.join(@public_root, link_path))
|
90
|
+
|
91
|
+
if file_path.nil?
|
92
|
+
puts "Warning: Unable to resolve #{link_path} to a local file"
|
93
|
+
return
|
94
|
+
end
|
95
|
+
|
96
|
+
puts "Inspecting #{file_path}...\n"
|
97
|
+
doc = Hpricot(open(file_path)) rescue nil
|
98
|
+
return unless doc
|
99
|
+
@pages_crawled << link_path
|
100
|
+
(doc/"a").each do |a|
|
101
|
+
if a['href'] && should_be_included?(a['href'])
|
102
|
+
@pages << a['href'] unless(link_exists?(a['href'],@pages))
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def resolve_file_path(path)
|
108
|
+
file_path = nil
|
109
|
+
|
110
|
+
if File.directory?(path)
|
111
|
+
@index_files.each do |f|
|
112
|
+
index_file = File.join(path,f)
|
113
|
+
if File.exists?(index_file)
|
114
|
+
file_path = index_file
|
115
|
+
break
|
116
|
+
end
|
117
|
+
end
|
118
|
+
else
|
119
|
+
if File.exists?(path)
|
120
|
+
file_path = path
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
file_path
|
125
|
+
end
|
126
|
+
|
127
|
+
# returns true if any of the following are true:
|
128
|
+
# - link isn't external (eg, contains 'http://') and doesn't contain 'mailto:'
|
129
|
+
# - is equal to '/'
|
130
|
+
# - link contains @base_url
|
131
|
+
def should_be_included?(str)
|
132
|
+
if ((!str.include?('http://') && !str.include?('mailto:')) || str == '/' || str.include?(@base_url))
|
133
|
+
unless str.slice(0,1) == "#"
|
134
|
+
return true
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# checks each value in a given array for the given string
|
140
|
+
# removes '/' character before comparison
|
141
|
+
def link_exists?(str, array)
|
142
|
+
array.detect{|l| strip_slashes(l) == strip_slashes(str)}
|
143
|
+
end
|
144
|
+
|
145
|
+
# removes '/' character from string
|
146
|
+
def strip_slashes(str)
|
147
|
+
str.gsub('/','')
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "static_sitemap_tasks"
|
6
|
+
s.version = "0.1"
|
7
|
+
s.platform = Gem::Platform::RUBY
|
8
|
+
s.summary = 'Rake tasks to manage sitemap.xml generation for static sites'
|
9
|
+
s.description = 'Rake tasks to manage sitemap.xml generation for static sites'
|
10
|
+
|
11
|
+
s.required_ruby_version = ">= 1.8.7"
|
12
|
+
s.required_rubygems_version = ">= 1.3.6"
|
13
|
+
|
14
|
+
s.authors = ["Michael Leinartas", "Tim Cocca", "Chris Martin"]
|
15
|
+
s.email = ["mleinartas@gmail.com"]
|
16
|
+
s.homepage = "https://github.com/mleinart/static_sitemap_tasks"
|
17
|
+
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
21
|
+
s.require_path = 'lib'
|
22
|
+
|
23
|
+
s.add_development_dependency "rake", ">= 0.8.7"
|
24
|
+
s.add_development_dependency "bundler", ">= 1.0"
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: static_sitemap_tasks
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.1'
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Michael Leinartas
|
9
|
+
- Tim Cocca
|
10
|
+
- Chris Martin
|
11
|
+
autorequire:
|
12
|
+
bindir: bin
|
13
|
+
cert_chain: []
|
14
|
+
date: 2011-10-12 00:00:00.000000000Z
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: rake
|
18
|
+
requirement: &70212813523740 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - ! '>='
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.8.7
|
24
|
+
type: :development
|
25
|
+
prerelease: false
|
26
|
+
version_requirements: *70212813523740
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: &70212813523280 !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ! '>='
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '1.0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: *70212813523280
|
38
|
+
description: Rake tasks to manage sitemap.xml generation for static sites
|
39
|
+
email:
|
40
|
+
- mleinartas@gmail.com
|
41
|
+
executables: []
|
42
|
+
extensions: []
|
43
|
+
extra_rdoc_files: []
|
44
|
+
files:
|
45
|
+
- .gitignore
|
46
|
+
- Gemfile
|
47
|
+
- MIT-LICENSE
|
48
|
+
- README.md
|
49
|
+
- Rakefile
|
50
|
+
- lib/static_sitemap_tasks.rb
|
51
|
+
- static_sitemap_tasks.gemspec
|
52
|
+
homepage: https://github.com/mleinart/static_sitemap_tasks
|
53
|
+
licenses: []
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options: []
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: 1.8.7
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.3.6
|
70
|
+
requirements: []
|
71
|
+
rubyforge_project:
|
72
|
+
rubygems_version: 1.8.10
|
73
|
+
signing_key:
|
74
|
+
specification_version: 3
|
75
|
+
summary: Rake tasks to manage sitemap.xml generation for static sites
|
76
|
+
test_files: []
|