static_sitemap_tasks 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/Gemfile +6 -0
- data/MIT-LICENSE +22 -0
- data/README.md +59 -0
- data/Rakefile +7 -0
- data/lib/static_sitemap_tasks.rb +150 -0
- data/static_sitemap_tasks.gemspec +25 -0
- metadata +76 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Originally Authored by Chris Martin on 2007-04-30 (http://chriscodes.com/articles/view/54)
|
2
|
+
Updated by Tom Cocca on 2008-10-10.
|
3
|
+
Updated by Michael Leinartas on 2011-10-12
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
== Generate Sitemap Plugin
|
2
|
+
|
3
|
+
Originally Authored by Chris Marting (http://chriscodes.com/articles/view/54)
|
4
|
+
Updates by Tom Cocca
|
5
|
+
Updates include:
|
6
|
+
- Adding a YAML config file to set URL and Change Frequency
|
7
|
+
|
8
|
+
|
9
|
+
== Generate Sitemap Rake Task
|
10
|
+
|
11
|
+
This is a Rails plugin that consists of a rake task to generate a sitemap.xml file.
|
12
|
+
The task crawls a domain (specified in a YAML config per environment) for all it's url's,
|
13
|
+
then builds the sitemap.xml file in public.
|
14
|
+
|
15
|
+
Since this simply crawls a domain for url's, this could be used to generate
|
16
|
+
sitemaps for any site, not just a Rails application.
|
17
|
+
|
18
|
+
|
19
|
+
== Requirements
|
20
|
+
|
21
|
+
Both of these are available via RubyGems
|
22
|
+
Hpricot - http://code.whytheluckystiff.net/hpricot/
|
23
|
+
Builder - http://rubyforge.org/projects/builder/
|
24
|
+
|
25
|
+
|
26
|
+
== Configuration/Usage
|
27
|
+
|
28
|
+
On initialization generate_sitemap will copy the generate_sitemap_key.yml file you your
|
29
|
+
RAILS_ROOT/config directory.
|
30
|
+
|
31
|
+
Open this file and specify your domain for the different environments and specify the changefreq for
|
32
|
+
your site.
|
33
|
+
|
34
|
+
For changefreq options see this page: http://www.sitemaps.org/protocol.php
|
35
|
+
I haven't devised a way to set this dynamically, as most pages will vary.
|
36
|
+
|
37
|
+
The plugin will pull the correct domain name for crawling the site by using the RAILS_ENV variable
|
38
|
+
in the rake task.
|
39
|
+
|
40
|
+
Run the task with the command
|
41
|
+
rake plugin:generate_sitemap
|
42
|
+
|
43
|
+
Go to http://localhost:3000/sitemap.xml (or open public/sitemap.xml)
|
44
|
+
|
45
|
+
|
46
|
+
== TODO
|
47
|
+
|
48
|
+
- set changefreq, lastmod, priority dynamically during generation
|
49
|
+
- allow generation of sitemap index files
|
50
|
+
- write tests
|
51
|
+
- allow for exclusions to be specified in an array
|
52
|
+
|
53
|
+
|
54
|
+
== More Info
|
55
|
+
|
56
|
+
http://www.sitemaps.org/protocol.php
|
57
|
+
|
58
|
+
Questions, comments, patches, etc. can be sent to
|
59
|
+
tom dot cocca at gmail dot com
|
data/Rakefile
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'builder'
|
3
|
+
require 'hpricot'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
|
7
|
+
module SitemapGenerator
|
8
|
+
class Tasks
|
9
|
+
include Rake::DSL
|
10
|
+
|
11
|
+
def self.install(options = {})
|
12
|
+
dir = options.delete(:dir) || Dir.pwd
|
13
|
+
self.new(options).install
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(options = {})
|
17
|
+
# Root of files to crawl
|
18
|
+
@public_root = options[:public_root] || Dir.pwd
|
19
|
+
# Change frequency - see: http://www.sitemaps.org/protocol.php#changefreqdef
|
20
|
+
@change_frequency = options[:change_frequency]
|
21
|
+
# Canonical domain of published site
|
22
|
+
@base_url = options[:base_url]
|
23
|
+
# Index pages
|
24
|
+
@index_files = options[:index_files] || [ 'index.html', 'index.htm' ]
|
25
|
+
# Compress output to sitemap.xml.gz
|
26
|
+
@gzip_output = options[:gzip_output] || true
|
27
|
+
end
|
28
|
+
|
29
|
+
def install
|
30
|
+
desc "Generate a sitemap based on the contents of #{@public_root}"
|
31
|
+
task 'generate_sitemap' do
|
32
|
+
generate_sitemap
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def generate_sitemap
|
37
|
+
# holds pages to go into map, and pages crawled
|
38
|
+
@pages = []
|
39
|
+
@pages_crawled = []
|
40
|
+
|
41
|
+
# start with index pages
|
42
|
+
crawl_for_links('/')
|
43
|
+
|
44
|
+
# crawl each page in pages array unless it's already been crawled
|
45
|
+
@pages.each {|page|
|
46
|
+
crawl_for_links(page) unless @pages_crawled.include?(page)
|
47
|
+
}
|
48
|
+
|
49
|
+
# create xml for sitemap
|
50
|
+
xml = Builder::XmlMarkup.new( :indent => 2 )
|
51
|
+
xml.instruct!
|
52
|
+
xml.comment! "Generated on: " + Time.now.to_s
|
53
|
+
xml.urlset("xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9") {
|
54
|
+
# loop through array of pages, and build sitemap.xml
|
55
|
+
@pages.sort.each {|link|
|
56
|
+
xml.url {
|
57
|
+
xml.loc URI.join(@base_url, link)
|
58
|
+
# TODO - set changefreq dynamically per page
|
59
|
+
xml.changefreq @change_frequency unless @change_frequency.nil?
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
# convert builder xml to xml string, and save
|
65
|
+
xml_string = xml.to_s.gsub("<to_s/>","")
|
66
|
+
filename = File.join(@public_root,'sitemap.xml')
|
67
|
+
|
68
|
+
if @gzip_output
|
69
|
+
require 'zlib'
|
70
|
+
filename << '.gz'
|
71
|
+
xml_file = Zlib::GzipWriter.open(filename)
|
72
|
+
else
|
73
|
+
xml_file = File.open(filename, 'w')
|
74
|
+
end
|
75
|
+
|
76
|
+
xml_file << xml_string
|
77
|
+
xml_file.close
|
78
|
+
end
|
79
|
+
|
80
|
+
# uses Hpricot to grab links from a URI
|
81
|
+
# adds uri to @pages_crawled
|
82
|
+
# loops each link found
|
83
|
+
# adds link to pages array if it should be included, unless it already exists
|
84
|
+
def crawl_for_links(link_path)
|
85
|
+
if link_path.include?('http')
|
86
|
+
return unless link_path.include?(@base_url)
|
87
|
+
link_path.sub!(@base_url,'')
|
88
|
+
end
|
89
|
+
file_path = resolve_file_path(File.join(@public_root, link_path))
|
90
|
+
|
91
|
+
if file_path.nil?
|
92
|
+
puts "Warning: Unable to resolve #{link_path} to a local file"
|
93
|
+
return
|
94
|
+
end
|
95
|
+
|
96
|
+
puts "Inspecting #{file_path}...\n"
|
97
|
+
doc = Hpricot(open(file_path)) rescue nil
|
98
|
+
return unless doc
|
99
|
+
@pages_crawled << link_path
|
100
|
+
(doc/"a").each do |a|
|
101
|
+
if a['href'] && should_be_included?(a['href'])
|
102
|
+
@pages << a['href'] unless(link_exists?(a['href'],@pages))
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def resolve_file_path(path)
|
108
|
+
file_path = nil
|
109
|
+
|
110
|
+
if File.directory?(path)
|
111
|
+
@index_files.each do |f|
|
112
|
+
index_file = File.join(path,f)
|
113
|
+
if File.exists?(index_file)
|
114
|
+
file_path = index_file
|
115
|
+
break
|
116
|
+
end
|
117
|
+
end
|
118
|
+
else
|
119
|
+
if File.exists?(path)
|
120
|
+
file_path = path
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
file_path
|
125
|
+
end
|
126
|
+
|
127
|
+
# returns true if any of the following are true:
|
128
|
+
# - link isn't external (eg, contains 'http://') and doesn't contain 'mailto:'
|
129
|
+
# - is equal to '/'
|
130
|
+
# - link contains @base_url
|
131
|
+
def should_be_included?(str)
|
132
|
+
if ((!str.include?('http://') && !str.include?('mailto:')) || str == '/' || str.include?(@base_url))
|
133
|
+
unless str.slice(0,1) == "#"
|
134
|
+
return true
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# checks each value in a given array for the given string
|
140
|
+
# removes '/' character before comparison
|
141
|
+
def link_exists?(str, array)
|
142
|
+
array.detect{|l| strip_slashes(l) == strip_slashes(str)}
|
143
|
+
end
|
144
|
+
|
145
|
+
# removes '/' character from string
|
146
|
+
def strip_slashes(str)
|
147
|
+
str.gsub('/','')
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "static_sitemap_tasks"
|
6
|
+
s.version = "0.1"
|
7
|
+
s.platform = Gem::Platform::RUBY
|
8
|
+
s.summary = 'Rake tasks to manage sitemap.xml generation for static sites'
|
9
|
+
s.description = 'Rake tasks to manage sitemap.xml generation for static sites'
|
10
|
+
|
11
|
+
s.required_ruby_version = ">= 1.8.7"
|
12
|
+
s.required_rubygems_version = ">= 1.3.6"
|
13
|
+
|
14
|
+
s.authors = ["Michael Leinartas", "Tim Cocca", "Chris Martin"]
|
15
|
+
s.email = ["mleinartas@gmail.com"]
|
16
|
+
s.homepage = "https://github.com/mleinart/static_sitemap_tasks"
|
17
|
+
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
21
|
+
s.require_path = 'lib'
|
22
|
+
|
23
|
+
s.add_development_dependency "rake", ">= 0.8.7"
|
24
|
+
s.add_development_dependency "bundler", ">= 1.0"
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: static_sitemap_tasks
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.1'
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Michael Leinartas
|
9
|
+
- Tim Cocca
|
10
|
+
- Chris Martin
|
11
|
+
autorequire:
|
12
|
+
bindir: bin
|
13
|
+
cert_chain: []
|
14
|
+
date: 2011-10-12 00:00:00.000000000Z
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: rake
|
18
|
+
requirement: &70212813523740 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - ! '>='
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.8.7
|
24
|
+
type: :development
|
25
|
+
prerelease: false
|
26
|
+
version_requirements: *70212813523740
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: &70212813523280 !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ! '>='
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '1.0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: *70212813523280
|
38
|
+
description: Rake tasks to manage sitemap.xml generation for static sites
|
39
|
+
email:
|
40
|
+
- mleinartas@gmail.com
|
41
|
+
executables: []
|
42
|
+
extensions: []
|
43
|
+
extra_rdoc_files: []
|
44
|
+
files:
|
45
|
+
- .gitignore
|
46
|
+
- Gemfile
|
47
|
+
- MIT-LICENSE
|
48
|
+
- README.md
|
49
|
+
- Rakefile
|
50
|
+
- lib/static_sitemap_tasks.rb
|
51
|
+
- static_sitemap_tasks.gemspec
|
52
|
+
homepage: https://github.com/mleinart/static_sitemap_tasks
|
53
|
+
licenses: []
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options: []
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: 1.8.7
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.3.6
|
70
|
+
requirements: []
|
71
|
+
rubyforge_project:
|
72
|
+
rubygems_version: 1.8.10
|
73
|
+
signing_key:
|
74
|
+
specification_version: 3
|
75
|
+
summary: Rake tasks to manage sitemap.xml generation for static sites
|
76
|
+
test_files: []
|