blacklight-sitemap 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ gem 'nokogiri'
7
+
8
+ # Add dependencies to develop your gem here.
9
+ # Include everything needed to run rake, tests, features, etc.
10
+ group :development do
11
+ gem "rspec", "~> 2.1.0"
12
+ gem "bundler", "~> 1.0.0"
13
+ gem "jeweler", "~> 1.5.1"
14
+ gem "rcov", ">= 0"
15
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,30 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.2)
5
+ git (1.2.5)
6
+ jeweler (1.5.1)
7
+ bundler (~> 1.0.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ nokogiri (1.4.4)
11
+ rake (0.8.7)
12
+ rcov (0.9.9)
13
+ rspec (2.1.0)
14
+ rspec-core (~> 2.1.0)
15
+ rspec-expectations (~> 2.1.0)
16
+ rspec-mocks (~> 2.1.0)
17
+ rspec-core (2.1.0)
18
+ rspec-expectations (2.1.0)
19
+ diff-lcs (~> 1.1.2)
20
+ rspec-mocks (2.1.0)
21
+
22
+ PLATFORMS
23
+ ruby
24
+
25
+ DEPENDENCIES
26
+ bundler (~> 1.0.0)
27
+ jeweler (~> 1.5.1)
28
+ nokogiri
29
+ rcov
30
+ rspec (~> 2.1.0)
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2010 North Carolina State University
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
data/README.rdoc ADDED
@@ -0,0 +1,41 @@
1
+ = blacklight-sitemap
2
+
3
+ Rake task for creating a sitemap from a Blacklight Solr index.
4
+
5
+ == Installation
6
+
7
+ In config/environment.rb add:
8
+ config.gem 'blacklight-sitemap'
9
+
10
+ Install the gem:
11
+ rake gems:install
12
+ or
13
+ gem install blacklight-sitemap
14
+
15
+ Run the generator:
16
+ script/generate blacklight_sitemap
17
+
18
+ Open your Rakefile and edit the BlacklightSitemapTask to your liking using the
19
+ provided documentation.
20
+
21
+ Run the rake task:
22
+ rake blacklight:sitemap
23
+
24
+ Remove create sitemap files:
25
+ rake blacklight:sitemap:clobber
26
+
27
+
28
+ == TODO
29
+
30
+ * Determine the lastmodified date for a particular sub-sitemap. For large indexes, allowing search engines to only download the sitemaps which have changed will be important. Can we always sort by timestamp?
31
+ * How to more fully test the gem independent from a Rails application with Blacklight installed?
32
+
33
+ == Author
34
+
35
+ Jason Ronallo
36
+
37
+ == Copyright
38
+
39
+ Copyright (c) 2010 North Carolina State University. See LICENSE.txt for
40
+ further details.
41
+
data/Rakefile ADDED
@@ -0,0 +1,51 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'rake'
11
+
12
+ require 'jeweler'
13
+ Jeweler::Tasks.new do |gem|
14
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
+ gem.name = "blacklight-sitemap"
16
+ gem.homepage = "http://github.com/jronallo/blacklight-sitemap"
17
+ gem.license = "MIT"
18
+ gem.summary = %Q{Blacklight sitemap rake task.}
19
+ gem.description = %Q{Blacklight sitemap rake task.}
20
+ gem.email = "jronallo@gmail.com"
21
+ gem.authors = ["Jason Ronallo"]
22
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
23
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
24
+ # gem.add_runtime_dependency 'jabber4r', '> 0.1'
25
+ # gem.add_development_dependency 'rspec', '> 1.2.3'
26
+ gem.files = FileList["[A-Z]*", "{generators,lib}/**/*"]
27
+ end
28
+ Jeweler::RubygemsDotOrgTasks.new
29
+
30
+ require 'rspec/core'
31
+ require 'rspec/core/rake_task'
32
+ RSpec::Core::RakeTask.new(:spec) do |spec|
33
+ spec.pattern = FileList['spec/**/*_spec.rb']
34
+ end
35
+
36
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
37
+ spec.pattern = 'spec/**/*_spec.rb'
38
+ spec.rcov = true
39
+ end
40
+
41
+ task :default => :spec
42
+
43
+ require 'rake/rdoctask'
44
+ Rake::RDocTask.new do |rdoc|
45
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
46
+
47
+ rdoc.rdoc_dir = 'rdoc'
48
+ rdoc.title = "blacklight-sitemap #{version}"
49
+ rdoc.rdoc_files.include('README*')
50
+ rdoc.rdoc_files.include('lib/**/*.rb')
51
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,53 @@
1
+ class BlacklightSitemapGenerator < Rails::Generator::Base
2
+ def initialize(*runtime_args)
3
+ super
4
+ end
5
+
6
+ def manifest
7
+ record do |m|
8
+ sitemap_task = <<EOF
9
+ require 'blacklight-sitemap'
10
+ Rake::BlacklightSitemapTask.new do |sm|
11
+ # below are configuration options with their default values shown.
12
+
13
+ # FIXME: you'll definitely want to change the url value
14
+ # sm.url = 'http://localhost:3000'
15
+
16
+ # base filename given to generated sitemap files
17
+ # sm.base_filename = 'blacklight'
18
+
19
+ # Is the gzip commandline tool available? Then why not gzip up your sitemaps to
20
+ # save bandwidth?
21
+ # sm.gzip = false
22
+
23
+ # for changefreq see http://sitemaps.org/protocol.php#changefreqdef
24
+ # valid values are: always, hourly, daily, weekly, monthly, yearly, never
25
+ # sm.changefreq = nil # nil won't display a changefreq element
26
+
27
+ # sitemaps can contain up to 50000 locations, but also must not be more than
28
+ # 10 MB in size. Using the max value you can control the size of your files.
29
+ # sm.max = 50000
30
+
31
+ # Solr field used to retrieve from a document the value for the lastmod element for a url
32
+ # sm.lastmod_field = 'timestamp'
33
+
34
+ # Solr field used to retrieve from a document the value for the priority element for a url
35
+ # sm.priority_field = nil
36
+ end
37
+ EOF
38
+ rakefile = File.read('Rakefile')
39
+ if rakefile.scan('Rake::BlacklightSitemapTask.new').empty?
40
+ rakefile << "\n" << sitemap_task
41
+ File.open('Rakefile', 'w'){|f| f.puts rakefile}
42
+ end
43
+ end
44
+ end
45
+
46
+ protected
47
+
48
+ def banner
49
+ %{Usage: #{$0} #{spec.name}\nCopies OpenURL.js public/javascripts/}
50
+ end
51
+
52
+ end
53
+
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env ruby
2
+ require 'nokogiri'
3
+ require 'rake'
4
+ require 'fileutils'
5
+
6
+ module Rake
7
+ class BlacklightSitemapTask
8
+ # base url used for all locations
9
+ attr_accessor :url
10
+
11
+ # base filename to use for sitemap in case these will be moved to a location
12
+ # that hosts other sitemaps so these sitemaps do not overwrite others
13
+ attr_accessor :base_filename
14
+
15
+ # should the files be gzipped? requires the commandline tool gzip
16
+ attr_accessor :gzip
17
+
18
+ # value for changefreq for each page listed
19
+ attr_accessor :changefreq
20
+
21
+ # the most resources which should be listed within a single sitemap
22
+ # defaults to 50,000
23
+ attr_accessor :max
24
+
25
+ # Solr field that contains a date to create a lastmod date for the page.
26
+ # Currently must be a string as in W3C Datetime format or YYYY-MM-DD
27
+ attr_accessor :lastmod_field
28
+
29
+ # Solr field to use to provide a priority for this resource
30
+ attr_accessor :priority_field
31
+
32
+
33
+ def initialize
34
+ @url = 'http://localhost:3000'
35
+ @base_filename = 'blacklight'
36
+ @gzip = false
37
+ @changefreq = nil
38
+ @max = 50000 #default value for max number of locs per sitemap file
39
+ @lastmod_field = 'timestamp'
40
+ @priority_field = nil
41
+ yield self if block_given?
42
+ define
43
+ end
44
+
45
+ def define
46
+ namespace :blacklight do
47
+ desc 'clobber then create sitemap files for blacklight'
48
+ task :sitemap => ['sitemap:clobber', 'sitemap:create']
49
+
50
+ namespace :sitemap do
51
+
52
+ desc 'create a sitemap for blacklight'
53
+ task :create => :environment do
54
+ puts 'Creating a sitemap...'
55
+ fl = ['id', @lastmod_field, @priority_field].compact.join(',')
56
+ base_solr_parameters = {:qt => 'standard', :q => 'id:[* TO *]', :fl => fl}
57
+ number_of_resources = Blacklight.solr.find(base_solr_parameters.merge(:rows => 1))['response']['numFound']
58
+ puts 'Number of resources: ' + number_of_resources.to_s
59
+ batches = (number_of_resources / @max.to_f).ceil
60
+ puts 'Total sitemap to create: ' + batches.to_s
61
+ master_sitemap = ''
62
+
63
+ batches.times do |batch_number|
64
+ current_page = batch_number + 1
65
+ start = batch_number * @max
66
+ puts 'Processing batch # ' + current_page.to_s
67
+ response = Blacklight.solr.find(base_solr_parameters.merge(:rows => @max, :start => start))['response']
68
+ sitemap_builder = Nokogiri::XML::Builder.new do |xml|
69
+ xml.urlset "xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9" do
70
+ response['docs'].each do |doc|
71
+ xml.url do
72
+ # FIXME through config
73
+ xml.loc File.join(@url.to_s, doc['id'])
74
+ xml.lastmod doc[@lastmod_field].to_s if @lastmod_field and doc[@lastmod_field]
75
+ xml.priority doc[@priority_field] if @priority_field and doc[@priority_field]
76
+ xml.changefreq @changefreq if @changefreq
77
+ end
78
+ end
79
+ end
80
+ end
81
+ sitemap_filename = File.join(RAILS_ROOT, 'public', @base_filename + '-sitemap' + batch_number.to_s + '.xml')
82
+ File.open(sitemap_filename, 'w') do |fh|
83
+ fh.puts sitemap_builder.to_xml
84
+ end
85
+ if @gzip
86
+ `gzip #{sitemap_filename}`
87
+ end
88
+ end
89
+ puts 'Creating sitemap index...'
90
+ lastmod = DateTime.now.utc.strftime("%Y-%m-%dT%H:%M:%S+00:00")
91
+ sitemap_index_builder = Nokogiri::XML::Builder.new do |xml|
92
+ xml.sitemapindex 'xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9' do
93
+ batches.times do |batch|
94
+ sitemap_filename = File.join(@url.to_s, @base_filename + '-sitemap' + batch.to_s + '.xml')
95
+ sitemap_filename << '.gz' if @gzip
96
+ xml.sitemap{
97
+ xml.loc sitemap_filename
98
+ xml.lastmod lastmod
99
+ }
100
+ end
101
+ end
102
+ end #sitemap_index_builder
103
+ File.open(File.join(RAILS_ROOT, 'public', @base_filename + '-sitemap.xml'), 'w') do |fh|
104
+ fh.puts sitemap_index_builder.to_xml
105
+ end
106
+ puts 'Done.'
107
+ end # task :sitemap
108
+
109
+ desc 'clobber sitemap files'
110
+ task :clobber do
111
+ puts "Deleting all sitemap files..."
112
+ Dir.glob(File.join(RAILS_ROOT, 'public', @base_filename + '-sitemap*')).each do |sitemap|
113
+ FileUtils.rm(sitemap)
114
+ end
115
+ end
116
+
117
+ end # namespace :sitemap
118
+ end # namespace :blacklight
119
+ end # define
120
+ end # BlacklightSitemapTask
121
+ end # Rake
122
+
@@ -0,0 +1,93 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "BlacklightSitemap" do
4
+ before(:all) do
5
+ @default_task = Rake::BlacklightSitemapTask.new
6
+ end
7
+
8
+ it 'should create a default task which clobbers then creates' do
9
+ Rake::BlacklightSitemapTask.new
10
+ Rake::Task['blacklight:sitemap'].should be_a_kind_of Rake::Task
11
+ end
12
+
13
+ it "should create the sitemap creation task" do
14
+ Rake::BlacklightSitemapTask.new
15
+ Rake::Task['blacklight:sitemap:create'].should be_a_kind_of Rake::Task
16
+ end
17
+
18
+ it "should be able to have the url attribute" do
19
+ task = Rake::BlacklightSitemapTask.new do |sm|
20
+ sm.url = 'http://example.com'
21
+ end
22
+ task.url.should eq('http://example.com')
23
+ end
24
+
25
+ it 'should have a base_filename attribute' do
26
+ @default_task.base_filename.should eq('blacklight')
27
+ end
28
+
29
+ it 'should allow for changing the base_filename attribute' do
30
+ task = Rake::BlacklightSitemapTask.new do |sm|
31
+ sm.base_filename = 'bl'
32
+ end
33
+ task.base_filename.should eq('bl')
34
+ end
35
+
36
+ it 'should store a value for whether to gzip or not' do
37
+ task = Rake::BlacklightSitemapTask.new do |sm|
38
+ sm.gzip = true
39
+ end
40
+ task.gzip.should be_true
41
+ end
42
+
43
+ it 'should have a default nil value for changefreq' do
44
+ @default_task.changefreq.should be_nil
45
+ end
46
+
47
+ it 'should allow for changing the changefreq value' do
48
+ task = Rake::BlacklightSitemapTask.new do |sm|
49
+ sm.changefreq = 'never'
50
+ end
51
+ task.changefreq.should eq('never')
52
+ end
53
+
54
+ it 'should set the ceiling to 50,000 as a default' do
55
+ @default_task.max.should == 50000
56
+ end
57
+
58
+ it 'should allow the ceiling to be set to a different value' do
59
+ task = Rake::BlacklightSitemapTask.new do |sm|
60
+ sm.max = 50
61
+ end
62
+ task.max.should == 50
63
+ end
64
+
65
+ it 'should have a default value for the lastmod_field' do
66
+ @default_task.lastmod_field.should eq('timestamp')
67
+ end
68
+
69
+ it 'should allow a new value for lastmod_field' do
70
+ task = Rake::BlacklightSitemapTask.new do |sm|
71
+ sm.lastmod_field = 'date_created'
72
+ end
73
+ task.lastmod_field.should eq('date_created')
74
+ end
75
+
76
+ it 'should have a default value for the priority_field' do
77
+ @default_task.priority_field.should be_nil
78
+ end
79
+
80
+ it 'should allow a new value for priority_field' do
81
+ task = Rake::BlacklightSitemapTask.new do |sm|
82
+ sm.priority_field = 'priority'
83
+ end
84
+ task.priority_field.should eq('priority')
85
+ end
86
+
87
+ it 'should create the sitemap clobber task' do
88
+ Rake::BlacklightSitemapTask.new
89
+ Rake::Task['blacklight:sitemap:clobber'].should be_a_kind_of Rake::Task
90
+ end
91
+
92
+ end
93
+
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'blacklight-sitemap'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,153 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: blacklight-sitemap
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Jason Ronallo
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-12-02 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ version_requirements: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ hash: 3
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ requirement: *id001
32
+ prerelease: false
33
+ type: :runtime
34
+ name: nokogiri
35
+ - !ruby/object:Gem::Dependency
36
+ version_requirements: &id002 !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ~>
40
+ - !ruby/object:Gem::Version
41
+ hash: 11
42
+ segments:
43
+ - 2
44
+ - 1
45
+ - 0
46
+ version: 2.1.0
47
+ requirement: *id002
48
+ prerelease: false
49
+ type: :development
50
+ name: rspec
51
+ - !ruby/object:Gem::Dependency
52
+ version_requirements: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ~>
56
+ - !ruby/object:Gem::Version
57
+ hash: 23
58
+ segments:
59
+ - 1
60
+ - 0
61
+ - 0
62
+ version: 1.0.0
63
+ requirement: *id003
64
+ prerelease: false
65
+ type: :development
66
+ name: bundler
67
+ - !ruby/object:Gem::Dependency
68
+ version_requirements: &id004 !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ~>
72
+ - !ruby/object:Gem::Version
73
+ hash: 1
74
+ segments:
75
+ - 1
76
+ - 5
77
+ - 1
78
+ version: 1.5.1
79
+ requirement: *id004
80
+ prerelease: false
81
+ type: :development
82
+ name: jeweler
83
+ - !ruby/object:Gem::Dependency
84
+ version_requirements: &id005 !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ hash: 3
90
+ segments:
91
+ - 0
92
+ version: "0"
93
+ requirement: *id005
94
+ prerelease: false
95
+ type: :development
96
+ name: rcov
97
+ description: Blacklight sitemap rake task.
98
+ email: jronallo@gmail.com
99
+ executables: []
100
+
101
+ extensions: []
102
+
103
+ extra_rdoc_files:
104
+ - LICENSE.txt
105
+ - README.rdoc
106
+ files:
107
+ - Gemfile
108
+ - Gemfile.lock
109
+ - LICENSE.txt
110
+ - README.rdoc
111
+ - Rakefile
112
+ - VERSION
113
+ - generators/blacklight_sitemap/blacklight_sitemap_generator.rb
114
+ - lib/blacklight-sitemap.rb
115
+ - spec/blacklight-sitemap_spec.rb
116
+ - spec/spec_helper.rb
117
+ has_rdoc: true
118
+ homepage: http://github.com/jronallo/blacklight-sitemap
119
+ licenses:
120
+ - MIT
121
+ post_install_message:
122
+ rdoc_options: []
123
+
124
+ require_paths:
125
+ - lib
126
+ required_ruby_version: !ruby/object:Gem::Requirement
127
+ none: false
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ hash: 3
132
+ segments:
133
+ - 0
134
+ version: "0"
135
+ required_rubygems_version: !ruby/object:Gem::Requirement
136
+ none: false
137
+ requirements:
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ hash: 3
141
+ segments:
142
+ - 0
143
+ version: "0"
144
+ requirements: []
145
+
146
+ rubyforge_project:
147
+ rubygems_version: 1.3.7
148
+ signing_key:
149
+ specification_version: 3
150
+ summary: Blacklight sitemap rake task.
151
+ test_files:
152
+ - spec/blacklight-sitemap_spec.rb
153
+ - spec/spec_helper.rb