static_sitemap_tasks 0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ pkg/
2
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source :rubygems
2
+
3
+ gem "bundler", ">= 1.0"
4
+ gem "hpricot"
5
+ gem "builder"
6
+ gem "rdoc"
data/MIT-LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Originally Authored by Chris Martin on 2007-04-30 (http://chriscodes.com/articles/view/54)
2
+ Updated by Tom Cocca on 2008-10-10.
3
+ Updated by Michael Leinartas on 2011-10-12
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,59 @@
1
+ == Generate Sitemap Plugin
2
+
3
+ Originally Authored by Chris Marting (http://chriscodes.com/articles/view/54)
4
+ Updates by Tom Cocca
5
+ Updates include:
6
+ - Adding a YAML config file to set URL and Change Frequency
7
+
8
+
9
+ == Generate Sitemap Rake Task
10
+
11
+ This is a Rails plugin that consists of a rake task to generate a sitemap.xml file.
12
+ The task crawls a domain (specified in a YAML config per environment) for all it's url's,
13
+ then builds the sitemap.xml file in public.
14
+
15
+ Since this simply crawls a domain for url's, this could be used to generate
16
+ sitemaps for any site, not just a Rails application.
17
+
18
+
19
+ == Requirements
20
+
21
+ Both of these are available via RubyGems
22
+ Hpricot - http://code.whytheluckystiff.net/hpricot/
23
+ Builder - http://rubyforge.org/projects/builder/
24
+
25
+
26
+ == Configuration/Usage
27
+
28
+ On initialization generate_sitemap will copy the generate_sitemap_key.yml file you your
29
+ RAILS_ROOT/config directory.
30
+
31
+ Open this file and specify your domain for the different environments and specify the changefreq for
32
+ your site.
33
+
34
+ For changefreq options see this page: http://www.sitemaps.org/protocol.php
35
+ I haven't devised a way to set this dynamically, as most pages will vary.
36
+
37
+ The plugin will pull the correct domain name for crawling the site by using the RAILS_ENV variable
38
+ in the rake task.
39
+
40
+ Run the task with the command
41
+ rake plugin:generate_sitemap
42
+
43
+ Go to http://localhost:3000/sitemap.xml (or open public/sitemap.xml)
44
+
45
+
46
+ == TODO
47
+
48
+ - set changefreq, lastmod, priority dynamically during generation
49
+ - allow generation of sitemap index files
50
+ - write tests
51
+ - allow for exclusions to be specified in an array
52
+
53
+
54
+ == More Info
55
+
56
+ http://www.sitemaps.org/protocol.php
57
+
58
+ Questions, comments, patches, etc. can be sent to
59
+ tom dot cocca at gmail dot com
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ require 'rake'
4
+
5
+ Bundler::GemHelper.install_tasks
6
+
7
+ task :default => :build
@@ -0,0 +1,150 @@
1
+ require 'rubygems'
2
+ require 'builder'
3
+ require 'hpricot'
4
+ require 'uri'
5
+
6
+
7
+ module SitemapGenerator
8
+ class Tasks
9
+ include Rake::DSL
10
+
11
+ def self.install(options = {})
12
+ dir = options.delete(:dir) || Dir.pwd
13
+ self.new(options).install
14
+ end
15
+
16
+ def initialize(options = {})
17
+ # Root of files to crawl
18
+ @public_root = options[:public_root] || Dir.pwd
19
+ # Change frequency - see: http://www.sitemaps.org/protocol.php#changefreqdef
20
+ @change_frequency = options[:change_frequency]
21
+ # Canonical domain of published site
22
+ @base_url = options[:base_url]
23
+ # Index pages
24
+ @index_files = options[:index_files] || [ 'index.html', 'index.htm' ]
25
+ # Compress output to sitemap.xml.gz
26
+ @gzip_output = options[:gzip_output] || true
27
+ end
28
+
29
+ def install
30
+ desc "Generate a sitemap based on the contents of #{@public_root}"
31
+ task 'generate_sitemap' do
32
+ generate_sitemap
33
+ end
34
+ end
35
+
36
+ def generate_sitemap
37
+ # holds pages to go into map, and pages crawled
38
+ @pages = []
39
+ @pages_crawled = []
40
+
41
+ # start with index pages
42
+ crawl_for_links('/')
43
+
44
+ # crawl each page in pages array unless it's already been crawled
45
+ @pages.each {|page|
46
+ crawl_for_links(page) unless @pages_crawled.include?(page)
47
+ }
48
+
49
+ # create xml for sitemap
50
+ xml = Builder::XmlMarkup.new( :indent => 2 )
51
+ xml.instruct!
52
+ xml.comment! "Generated on: " + Time.now.to_s
53
+ xml.urlset("xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9") {
54
+ # loop through array of pages, and build sitemap.xml
55
+ @pages.sort.each {|link|
56
+ xml.url {
57
+ xml.loc URI.join(@base_url, link)
58
+ # TODO - set changefreq dynamically per page
59
+ xml.changefreq @change_frequency unless @change_frequency.nil?
60
+ }
61
+ }
62
+ }
63
+
64
+ # convert builder xml to xml string, and save
65
+ xml_string = xml.to_s.gsub("<to_s/>","")
66
+ filename = File.join(@public_root,'sitemap.xml')
67
+
68
+ if @gzip_output
69
+ require 'zlib'
70
+ filename << '.gz'
71
+ xml_file = Zlib::GzipWriter.open(filename)
72
+ else
73
+ xml_file = File.open(filename, 'w')
74
+ end
75
+
76
+ xml_file << xml_string
77
+ xml_file.close
78
+ end
79
+
80
+ # uses Hpricot to grab links from a URI
81
+ # adds uri to @pages_crawled
82
+ # loops each link found
83
+ # adds link to pages array if it should be included, unless it already exists
84
+ def crawl_for_links(link_path)
85
+ if link_path.include?('http')
86
+ return unless link_path.include?(@base_url)
87
+ link_path.sub!(@base_url,'')
88
+ end
89
+ file_path = resolve_file_path(File.join(@public_root, link_path))
90
+
91
+ if file_path.nil?
92
+ puts "Warning: Unable to resolve #{link_path} to a local file"
93
+ return
94
+ end
95
+
96
+ puts "Inspecting #{file_path}...\n"
97
+ doc = Hpricot(open(file_path)) rescue nil
98
+ return unless doc
99
+ @pages_crawled << link_path
100
+ (doc/"a").each do |a|
101
+ if a['href'] && should_be_included?(a['href'])
102
+ @pages << a['href'] unless(link_exists?(a['href'],@pages))
103
+ end
104
+ end
105
+ end
106
+
107
+ def resolve_file_path(path)
108
+ file_path = nil
109
+
110
+ if File.directory?(path)
111
+ @index_files.each do |f|
112
+ index_file = File.join(path,f)
113
+ if File.exists?(index_file)
114
+ file_path = index_file
115
+ break
116
+ end
117
+ end
118
+ else
119
+ if File.exists?(path)
120
+ file_path = path
121
+ end
122
+ end
123
+
124
+ file_path
125
+ end
126
+
127
+ # returns true if any of the following are true:
128
+ # - link isn't external (eg, contains 'http://') and doesn't contain 'mailto:'
129
+ # - is equal to '/'
130
+ # - link contains @base_url
131
+ def should_be_included?(str)
132
+ if ((!str.include?('http://') && !str.include?('mailto:')) || str == '/' || str.include?(@base_url))
133
+ unless str.slice(0,1) == "#"
134
+ return true
135
+ end
136
+ end
137
+ end
138
+
139
+ # checks each value in a given array for the given string
140
+ # removes '/' character before comparison
141
+ def link_exists?(str, array)
142
+ array.detect{|l| strip_slashes(l) == strip_slashes(str)}
143
+ end
144
+
145
+ # removes '/' character from string
146
+ def strip_slashes(str)
147
+ str.gsub('/','')
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "static_sitemap_tasks"
6
+ s.version = "0.1"
7
+ s.platform = Gem::Platform::RUBY
8
+ s.summary = 'Rake tasks to manage sitemap.xml generation for static sites'
9
+ s.description = 'Rake tasks to manage sitemap.xml generation for static sites'
10
+
11
+ s.required_ruby_version = ">= 1.8.7"
12
+ s.required_rubygems_version = ">= 1.3.6"
13
+
14
+ s.authors = ["Michael Leinartas", "Tim Cocca", "Chris Martin"]
15
+ s.email = ["mleinartas@gmail.com"]
16
+ s.homepage = "https://github.com/mleinart/static_sitemap_tasks"
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.require_path = 'lib'
22
+
23
+ s.add_development_dependency "rake", ">= 0.8.7"
24
+ s.add_development_dependency "bundler", ">= 1.0"
25
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: static_sitemap_tasks
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Michael Leinartas
9
+ - Tim Cocca
10
+ - Chris Martin
11
+ autorequire:
12
+ bindir: bin
13
+ cert_chain: []
14
+ date: 2011-10-12 00:00:00.000000000Z
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: rake
18
+ requirement: &70212813523740 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ! '>='
22
+ - !ruby/object:Gem::Version
23
+ version: 0.8.7
24
+ type: :development
25
+ prerelease: false
26
+ version_requirements: *70212813523740
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: &70212813523280 !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ! '>='
33
+ - !ruby/object:Gem::Version
34
+ version: '1.0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: *70212813523280
38
+ description: Rake tasks to manage sitemap.xml generation for static sites
39
+ email:
40
+ - mleinartas@gmail.com
41
+ executables: []
42
+ extensions: []
43
+ extra_rdoc_files: []
44
+ files:
45
+ - .gitignore
46
+ - Gemfile
47
+ - MIT-LICENSE
48
+ - README.md
49
+ - Rakefile
50
+ - lib/static_sitemap_tasks.rb
51
+ - static_sitemap_tasks.gemspec
52
+ homepage: https://github.com/mleinart/static_sitemap_tasks
53
+ licenses: []
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: 1.8.7
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: 1.3.6
70
+ requirements: []
71
+ rubyforge_project:
72
+ rubygems_version: 1.8.10
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: Rake tasks to manage sitemap.xml generation for static sites
76
+ test_files: []