static_sitemap_tasks 0.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +9 -0
- data/README.md +24 -48
- data/lib/static_sitemap_tasks.rb +84 -32
- data/static_sitemap_tasks.gemspec +1 -1
- metadata +7 -6
data/ChangeLog
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
v0.2
|
2
|
+
* Namespace tasks with :sitemap
|
3
|
+
* Add ping task from http://github.com/adamsalter/sitemap_generator
|
4
|
+
* Add lastmod tags ability
|
5
|
+
|
6
|
+
v0.1
|
7
|
+
* Initial fork from http://github.com/tcocca/generate_sitemap
|
8
|
+
* Refactor to scan local directory
|
9
|
+
* Refactor to set config items on rake task install
|
data/README.md
CHANGED
@@ -1,59 +1,35 @@
|
|
1
|
-
|
1
|
+
## Generate Sitemap Rake Task ##
|
2
2
|
|
3
|
-
|
4
|
-
Updates by Tom Cocca
|
5
|
-
Updates include:
|
6
|
-
- Adding a YAML config file to set URL and Change Frequency
|
7
|
-
|
8
|
-
|
9
|
-
== Generate Sitemap Rake Task
|
10
|
-
|
11
|
-
This is a Rails plugin that consists of a rake task to generate a sitemap.xml file.
|
12
|
-
The task crawls a domain (specified in a YAML config per environment) for all it's url's,
|
13
|
-
then builds the sitemap.xml file in public.
|
14
|
-
|
15
|
-
Since this simply crawls a domain for url's, this could be used to generate
|
16
|
-
sitemaps for any site, not just a Rails application.
|
17
|
-
|
18
|
-
|
19
|
-
== Requirements
|
20
|
-
|
21
|
-
Both of these are available via RubyGems
|
22
|
-
Hpricot - http://code.whytheluckystiff.net/hpricot/
|
23
|
-
Builder - http://rubyforge.org/projects/builder/
|
3
|
+
This is a small rake task that will crawl a static site locally in the specific directory and generate a sitemap.xml file with a list of links, optionally compressing it.
|
24
4
|
|
5
|
+
## Installation ##
|
25
6
|
|
26
|
-
|
7
|
+
gem install static_sitemap_tasks
|
27
8
|
|
28
|
-
|
29
|
-
|
9
|
+
## Configuration/Usage ##
|
10
|
+
To use, require the gem in your Rakefile and install the task with configuration
|
30
11
|
|
31
|
-
|
32
|
-
your site.
|
12
|
+
require 'static_sitemap_tasks'
|
33
13
|
|
34
|
-
|
35
|
-
|
14
|
+
SitemapGenerator::Tasks.install(
|
15
|
+
:base_url => 'http://www.mysite.com', # Required
|
16
|
+
:change_frequency => 'daily', # Optional, see http://www.sitemaps.org/protocol.php#changefreqdef
|
17
|
+
:date_mode => 'git' # Optional, one of 'git' or 'mtime'. default: nil (disabled)
|
18
|
+
:gzip_output => true, # Optional, default: true
|
19
|
+
:index_files => [ 'index.html' ], # Optional, default: [ 'index.html', 'index.htm' ]
|
20
|
+
:public_root => 'public' # Optional, default: Dir.pwd
|
21
|
+
)
|
36
22
|
|
37
|
-
|
38
|
-
|
23
|
+
To execute,
|
24
|
+
rake generate_sitemap
|
39
25
|
|
40
|
-
|
41
|
-
rake plugin:generate_sitemap
|
42
|
-
|
43
|
-
Go to http://localhost:3000/sitemap.xml (or open public/sitemap.xml)
|
44
|
-
|
45
|
-
|
46
|
-
== TODO
|
47
|
-
|
48
|
-
- set changefreq, lastmod, priority dynamically during generation
|
49
|
-
- allow generation of sitemap index files
|
50
|
-
- write tests
|
51
|
-
- allow for exclusions to be specified in an array
|
52
|
-
|
53
|
-
|
54
|
-
== More Info
|
26
|
+
## More Info ##
|
55
27
|
|
56
28
|
http://www.sitemaps.org/protocol.php
|
57
29
|
|
58
|
-
|
59
|
-
|
30
|
+
## Credits ##
|
31
|
+
|
32
|
+
Originally Authored by Chris Marting (http://chriscodes.com/articles/view/54)
|
33
|
+
Updates by Tom Cocca
|
34
|
+
Rewrite for static sites by Michael Leinartas
|
35
|
+
ping_search_engines() lifted from http://github.com/adamsalter/sitemap_generator by Adam Salter
|
data/lib/static_sitemap_tasks.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'builder'
|
3
3
|
require 'hpricot'
|
4
|
+
require 'time'
|
5
|
+
require 'cgi'
|
4
6
|
require 'uri'
|
5
7
|
|
6
8
|
|
@@ -14,22 +16,74 @@ module SitemapGenerator
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def initialize(options = {})
|
17
|
-
# Root of files to crawl
|
18
|
-
@public_root = options[:public_root] || Dir.pwd
|
19
|
-
# Change frequency - see: http://www.sitemaps.org/protocol.php#changefreqdef
|
20
|
-
@change_frequency = options[:change_frequency]
|
21
19
|
# Canonical domain of published site
|
22
20
|
@base_url = options[:base_url]
|
23
|
-
#
|
24
|
-
@
|
21
|
+
# Change frequency - see: http://www.sitemaps.org/protocol.php#changefreqdef
|
22
|
+
@change_frequency = options[:change_frequency]
|
23
|
+
# Date mode - one of [ 'git', 'mtime' ]
|
24
|
+
@date_mode = options[:date_mode]
|
25
25
|
# Compress output to sitemap.xml.gz
|
26
26
|
@gzip_output = options[:gzip_output] || true
|
27
|
+
# Index pages
|
28
|
+
@index_files = options[:index_files] || [ 'index.html', 'index.htm' ]
|
29
|
+
# Root of files to crawl
|
30
|
+
@public_root = options[:public_root] || Dir.pwd
|
27
31
|
end
|
28
32
|
|
29
33
|
def install
|
30
|
-
|
31
|
-
|
32
|
-
|
34
|
+
namespace :sitemap do
|
35
|
+
desc "Generate a sitemap based on the contents of #{@public_root}"
|
36
|
+
task :generate do
|
37
|
+
generate_sitemap
|
38
|
+
end
|
39
|
+
|
40
|
+
desc "Ping providers to notify them that a new sitemap.xml is available"
|
41
|
+
task :ping do
|
42
|
+
ping_search_engines
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# uses Hpricot to grab links from a URI
|
48
|
+
# adds uri to @pages_crawled
|
49
|
+
# loops each link found
|
50
|
+
# adds link to pages array if it should be included, unless it already exists
|
51
|
+
def crawl_for_links(link)
|
52
|
+
if link.include?('http')
|
53
|
+
return unless link_path.include?(@base_url)
|
54
|
+
link_path = link.sub!(@base_url,'')
|
55
|
+
else
|
56
|
+
link_path = link
|
57
|
+
end
|
58
|
+
file_path = resolve_file_path(File.join(@public_root, link_path))
|
59
|
+
|
60
|
+
if file_path.nil?
|
61
|
+
puts "Warning: Unable to resolve #{link_path} to a local file"
|
62
|
+
return
|
63
|
+
end
|
64
|
+
|
65
|
+
puts "Inspecting #{file_path}...\n"
|
66
|
+
doc = Hpricot(open(file_path)) rescue nil
|
67
|
+
return unless doc
|
68
|
+
@pages_crawled << link
|
69
|
+
last_updated = find_date(file_path)
|
70
|
+
@page_times[link] = last_updated if last_updated
|
71
|
+
|
72
|
+
(doc/"a").each do |a|
|
73
|
+
if a['href'] && should_be_included?(a['href'])
|
74
|
+
@pages << a['href'] unless(link_exists?(a['href'],@pages))
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def find_date(file)
|
80
|
+
case @date_mode
|
81
|
+
when 'git'
|
82
|
+
date = %x[git log -n 1 --date=iso --format="%ad" #{file}]
|
83
|
+
date.strip()
|
84
|
+
when 'mtime'
|
85
|
+
mtime = File.mtime(file) rescue nil
|
86
|
+
mtime.iso8601 if mtime
|
33
87
|
end
|
34
88
|
end
|
35
89
|
|
@@ -37,6 +91,7 @@ module SitemapGenerator
|
|
37
91
|
# holds pages to go into map, and pages crawled
|
38
92
|
@pages = []
|
39
93
|
@pages_crawled = []
|
94
|
+
@page_times = {}
|
40
95
|
|
41
96
|
# start with index pages
|
42
97
|
crawl_for_links('/')
|
@@ -54,9 +109,10 @@ module SitemapGenerator
|
|
54
109
|
# loop through array of pages, and build sitemap.xml
|
55
110
|
@pages.sort.each {|link|
|
56
111
|
xml.url {
|
57
|
-
xml.loc URI.join(@base_url, link)
|
112
|
+
xml.loc URI.join(@base_url, link).to_s
|
58
113
|
# TODO - set changefreq dynamically per page
|
59
114
|
xml.changefreq @change_frequency unless @change_frequency.nil?
|
115
|
+
xml.lastmod @page_times[link] unless @page_times[link].nil?
|
60
116
|
}
|
61
117
|
}
|
62
118
|
}
|
@@ -77,29 +133,25 @@ module SitemapGenerator
|
|
77
133
|
xml_file.close
|
78
134
|
end
|
79
135
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
return unless link_path.include?(@base_url)
|
87
|
-
link_path.sub!(@base_url,'')
|
88
|
-
end
|
89
|
-
file_path = resolve_file_path(File.join(@public_root, link_path))
|
90
|
-
|
91
|
-
if file_path.nil?
|
92
|
-
puts "Warning: Unable to resolve #{link_path} to a local file"
|
93
|
-
return
|
136
|
+
def ping_search_engines
|
137
|
+
require 'open-uri'
|
138
|
+
if @gzip_output
|
139
|
+
url = URI.join(@base_url,'sitemap.xml.gz').to_s
|
140
|
+
else
|
141
|
+
url = URI.join(@base_url,'sitemap.xml').to_s
|
94
142
|
end
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
143
|
+
index_location = CGI.escape(url)
|
144
|
+
|
145
|
+
# engines list from http://en.wikipedia.org/wiki/Sitemap_index
|
146
|
+
{:google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{index_location}",
|
147
|
+
:ask => "http://submissions.ask.com/ping?sitemap=#{index_location}",
|
148
|
+
:bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{index_location}",
|
149
|
+
:sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{index_location}"}.each do |engine, link|
|
150
|
+
begin
|
151
|
+
open(link)
|
152
|
+
puts "Successful ping of #{engine.to_s}" if verbose
|
153
|
+
rescue Timeout::Error, StandardError => e
|
154
|
+
puts "Ping failed for #{engine.to_s}: #{e.inspect}" if verbose
|
103
155
|
end
|
104
156
|
end
|
105
157
|
end
|
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "static_sitemap_tasks"
|
6
|
-
s.version = "0.
|
6
|
+
s.version = "0.2"
|
7
7
|
s.platform = Gem::Platform::RUBY
|
8
8
|
s.summary = 'Rake tasks to manage sitemap.xml generation for static sites'
|
9
9
|
s.description = 'Rake tasks to manage sitemap.xml generation for static sites'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: static_sitemap_tasks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.2'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,11 +11,11 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2011-10-
|
14
|
+
date: 2011-10-14 00:00:00.000000000Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rake
|
18
|
-
requirement: &
|
18
|
+
requirement: &70099743651700 !ruby/object:Gem::Requirement
|
19
19
|
none: false
|
20
20
|
requirements:
|
21
21
|
- - ! '>='
|
@@ -23,10 +23,10 @@ dependencies:
|
|
23
23
|
version: 0.8.7
|
24
24
|
type: :development
|
25
25
|
prerelease: false
|
26
|
-
version_requirements: *
|
26
|
+
version_requirements: *70099743651700
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
|
-
requirement: &
|
29
|
+
requirement: &70099743644920 !ruby/object:Gem::Requirement
|
30
30
|
none: false
|
31
31
|
requirements:
|
32
32
|
- - ! '>='
|
@@ -34,7 +34,7 @@ dependencies:
|
|
34
34
|
version: '1.0'
|
35
35
|
type: :development
|
36
36
|
prerelease: false
|
37
|
-
version_requirements: *
|
37
|
+
version_requirements: *70099743644920
|
38
38
|
description: Rake tasks to manage sitemap.xml generation for static sites
|
39
39
|
email:
|
40
40
|
- mleinartas@gmail.com
|
@@ -43,6 +43,7 @@ extensions: []
|
|
43
43
|
extra_rdoc_files: []
|
44
44
|
files:
|
45
45
|
- .gitignore
|
46
|
+
- ChangeLog
|
46
47
|
- Gemfile
|
47
48
|
- MIT-LICENSE
|
48
49
|
- README.md
|