blacklight-sitemap 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ gem 'nokogiri'
7
+
8
+ # Add dependencies to develop your gem here.
9
+ # Include everything needed to run rake, tests, features, etc.
10
+ group :development do
11
+ gem "rspec", "~> 2.1.0"
12
+ gem "bundler", "~> 1.0.0"
13
+ gem "jeweler", "~> 1.5.1"
14
+ gem "rcov", ">= 0"
15
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,30 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.2)
5
+ git (1.2.5)
6
+ jeweler (1.5.1)
7
+ bundler (~> 1.0.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ nokogiri (1.4.4)
11
+ rake (0.8.7)
12
+ rcov (0.9.9)
13
+ rspec (2.1.0)
14
+ rspec-core (~> 2.1.0)
15
+ rspec-expectations (~> 2.1.0)
16
+ rspec-mocks (~> 2.1.0)
17
+ rspec-core (2.1.0)
18
+ rspec-expectations (2.1.0)
19
+ diff-lcs (~> 1.1.2)
20
+ rspec-mocks (2.1.0)
21
+
22
+ PLATFORMS
23
+ ruby
24
+
25
+ DEPENDENCIES
26
+ bundler (~> 1.0.0)
27
+ jeweler (~> 1.5.1)
28
+ nokogiri
29
+ rcov
30
+ rspec (~> 2.1.0)
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2010 North Carolina State University
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
data/README.rdoc ADDED
@@ -0,0 +1,41 @@
1
+ = blacklight-sitemap
2
+
3
+ Rake task for creating a sitemap from a Blacklight Solr index.
4
+
5
+ == Installation
6
+
7
+ In config/environment.rb add:
8
+ config.gem 'blacklight-sitemap'
9
+
10
+ Install the gem:
11
+ rake gems:install
12
+ or
13
+ gem install blacklight-sitemap
14
+
15
+ Run the generator:
16
+ script/generate blacklight_sitemap
17
+
18
+ Open your Rakefile and edit the BlacklightSitemapTask to your liking using the
19
+ provided documentation.
20
+
21
+ Run the rake task:
22
+ rake blacklight:sitemap
23
+
24
+ Remove create sitemap files:
25
+ rake blacklight:sitemap:clobber
26
+
27
+
28
+ == TODO
29
+
30
+ * Determine the lastmodified date for a particular sub-sitemap. For large indexes, allowing search engines to only download the sitemaps which have changed will be important. Can we always sort by timestamp?
31
+ * How to more fully test the gem independent from a Rails application with Blacklight installed?
32
+
33
+ == Author
34
+
35
+ Jason Ronallo
36
+
37
+ == Copyright
38
+
39
+ Copyright (c) 2010 North Carolina State University. See LICENSE.txt for
40
+ further details.
41
+
data/Rakefile ADDED
@@ -0,0 +1,51 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'rake'
11
+
12
+ require 'jeweler'
13
+ Jeweler::Tasks.new do |gem|
14
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
+ gem.name = "blacklight-sitemap"
16
+ gem.homepage = "http://github.com/jronallo/blacklight-sitemap"
17
+ gem.license = "MIT"
18
+ gem.summary = %Q{Blacklight sitemap rake task.}
19
+ gem.description = %Q{Blacklight sitemap rake task.}
20
+ gem.email = "jronallo@gmail.com"
21
+ gem.authors = ["Jason Ronallo"]
22
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
23
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
24
+ # gem.add_runtime_dependency 'jabber4r', '> 0.1'
25
+ # gem.add_development_dependency 'rspec', '> 1.2.3'
26
+ gem.files = FileList["[A-Z]*", "{generators,lib}/**/*"]
27
+ end
28
+ Jeweler::RubygemsDotOrgTasks.new
29
+
30
+ require 'rspec/core'
31
+ require 'rspec/core/rake_task'
32
+ RSpec::Core::RakeTask.new(:spec) do |spec|
33
+ spec.pattern = FileList['spec/**/*_spec.rb']
34
+ end
35
+
36
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
37
+ spec.pattern = 'spec/**/*_spec.rb'
38
+ spec.rcov = true
39
+ end
40
+
41
+ task :default => :spec
42
+
43
+ require 'rake/rdoctask'
44
+ Rake::RDocTask.new do |rdoc|
45
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
46
+
47
+ rdoc.rdoc_dir = 'rdoc'
48
+ rdoc.title = "blacklight-sitemap #{version}"
49
+ rdoc.rdoc_files.include('README*')
50
+ rdoc.rdoc_files.include('lib/**/*.rb')
51
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,53 @@
1
+ class BlacklightSitemapGenerator < Rails::Generator::Base
2
+ def initialize(*runtime_args)
3
+ super
4
+ end
5
+
6
+ def manifest
7
+ record do |m|
8
+ sitemap_task = <<EOF
9
+ require 'blacklight-sitemap'
10
+ Rake::BlacklightSitemapTask.new do |sm|
11
+ # below are configuration options with their default values shown.
12
+
13
+ # FIXME: you'll definitely want to change the url value
14
+ # sm.url = 'http://localhost:3000'
15
+
16
+ # base filename given to generated sitemap files
17
+ # sm.base_filename = 'blacklight'
18
+
19
+ # Is the gzip commandline tool available? Then why not gzip up your sitemaps to
20
+ # save bandwidth?
21
+ # sm.gzip = false
22
+
23
+ # for changefreq see http://sitemaps.org/protocol.php#changefreqdef
24
+ # valid values are: always, hourly, daily, weekly, monthly, yearly, never
25
+ # sm.changefreq = nil # nil won't display a changefreq element
26
+
27
+ # sitemaps can contain up to 50000 locations, but also must not be more than
28
+ # 10 MB in size. Using the max value you can control the size of your files.
29
+ # sm.max = 50000
30
+
31
+ # Solr field used to retrieve from a document the value for the lastmod element for a url
32
+ # sm.lastmod_field = 'timestamp'
33
+
34
+ # Solr field used to retrieve from a document the value for the priority element for a url
35
+ # sm.priority_field = nil
36
+ end
37
+ EOF
38
+ rakefile = File.read('Rakefile')
39
+ if rakefile.scan('Rake::BlacklightSitemapTask.new').empty?
40
+ rakefile << "\n" << sitemap_task
41
+ File.open('Rakefile', 'w'){|f| f.puts rakefile}
42
+ end
43
+ end
44
+ end
45
+
46
+ protected
47
+
48
+ def banner
49
+ %{Usage: #{$0} #{spec.name}\nCopies OpenURL.js public/javascripts/}
50
+ end
51
+
52
+ end
53
+
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env ruby
2
+ require 'nokogiri'
3
+ require 'rake'
4
+ require 'fileutils'
5
+
6
+ module Rake
7
+ class BlacklightSitemapTask
8
+ # base url used for all locations
9
+ attr_accessor :url
10
+
11
+ # base filename to use for sitemap in case these will be moved to a location
12
+ # that hosts other sitemaps so these sitemaps do not overwrite others
13
+ attr_accessor :base_filename
14
+
15
+ # should the files be gzipped? requires the commandline tool gzip
16
+ attr_accessor :gzip
17
+
18
+ # value for changefreq for each page listed
19
+ attr_accessor :changefreq
20
+
21
+ # the most resources which should be listed within a single sitemap
22
+ # defaults to 50,000
23
+ attr_accessor :max
24
+
25
+ # Solr field that contains a date to create a lastmod date for the page.
26
+ # Currently must be a string as in W3C Datetime format or YYYY-MM-DD
27
+ attr_accessor :lastmod_field
28
+
29
+ # Solr field to use to provide a priority for this resource
30
+ attr_accessor :priority_field
31
+
32
+
33
+ def initialize
34
+ @url = 'http://localhost:3000'
35
+ @base_filename = 'blacklight'
36
+ @gzip = false
37
+ @changefreq = nil
38
+ @max = 50000 #default value for max number of locs per sitemap file
39
+ @lastmod_field = 'timestamp'
40
+ @priority_field = nil
41
+ yield self if block_given?
42
+ define
43
+ end
44
+
45
+ def define
46
+ namespace :blacklight do
47
+ desc 'clobber then create sitemap files for blacklight'
48
+ task :sitemap => ['sitemap:clobber', 'sitemap:create']
49
+
50
+ namespace :sitemap do
51
+
52
+ desc 'create a sitemap for blacklight'
53
+ task :create => :environment do
54
+ puts 'Creating a sitemap...'
55
+ fl = ['id', @lastmod_field, @priority_field].compact.join(',')
56
+ base_solr_parameters = {:qt => 'standard', :q => 'id:[* TO *]', :fl => fl}
57
+ number_of_resources = Blacklight.solr.find(base_solr_parameters.merge(:rows => 1))['response']['numFound']
58
+ puts 'Number of resources: ' + number_of_resources.to_s
59
+ batches = (number_of_resources / @max.to_f).ceil
60
+ puts 'Total sitemap to create: ' + batches.to_s
61
+ master_sitemap = ''
62
+
63
+ batches.times do |batch_number|
64
+ current_page = batch_number + 1
65
+ start = batch_number * @max
66
+ puts 'Processing batch # ' + current_page.to_s
67
+ response = Blacklight.solr.find(base_solr_parameters.merge(:rows => @max, :start => start))['response']
68
+ sitemap_builder = Nokogiri::XML::Builder.new do |xml|
69
+ xml.urlset "xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9" do
70
+ response['docs'].each do |doc|
71
+ xml.url do
72
+ # FIXME through config
73
+ xml.loc File.join(@url.to_s, doc['id'])
74
+ xml.lastmod doc[@lastmod_field].to_s if @lastmod_field and doc[@lastmod_field]
75
+ xml.priority doc[@priority_field] if @priority_field and doc[@priority_field]
76
+ xml.changefreq @changefreq if @changefreq
77
+ end
78
+ end
79
+ end
80
+ end
81
+ sitemap_filename = File.join(RAILS_ROOT, 'public', @base_filename + '-sitemap' + batch_number.to_s + '.xml')
82
+ File.open(sitemap_filename, 'w') do |fh|
83
+ fh.puts sitemap_builder.to_xml
84
+ end
85
+ if @gzip
86
+ `gzip #{sitemap_filename}`
87
+ end
88
+ end
89
+ puts 'Creating sitemap index...'
90
+ lastmod = DateTime.now.utc.strftime("%Y-%m-%dT%H:%M:%S+00:00")
91
+ sitemap_index_builder = Nokogiri::XML::Builder.new do |xml|
92
+ xml.sitemapindex 'xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9' do
93
+ batches.times do |batch|
94
+ sitemap_filename = File.join(@url.to_s, @base_filename + '-sitemap' + batch.to_s + '.xml')
95
+ sitemap_filename << '.gz' if @gzip
96
+ xml.sitemap{
97
+ xml.loc sitemap_filename
98
+ xml.lastmod lastmod
99
+ }
100
+ end
101
+ end
102
+ end #sitemap_index_builder
103
+ File.open(File.join(RAILS_ROOT, 'public', @base_filename + '-sitemap.xml'), 'w') do |fh|
104
+ fh.puts sitemap_index_builder.to_xml
105
+ end
106
+ puts 'Done.'
107
+ end # task :sitemap
108
+
109
+ desc 'clobber sitemap files'
110
+ task :clobber do
111
+ puts "Deleting all sitemap files..."
112
+ Dir.glob(File.join(RAILS_ROOT, 'public', @base_filename + '-sitemap*')).each do |sitemap|
113
+ FileUtils.rm(sitemap)
114
+ end
115
+ end
116
+
117
+ end # namespace :sitemap
118
+ end # namespace :blacklight
119
+ end # define
120
+ end # BlacklightSitemapTask
121
+ end # Rake
122
+
@@ -0,0 +1,93 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "BlacklightSitemap" do
4
+ before(:all) do
5
+ @default_task = Rake::BlacklightSitemapTask.new
6
+ end
7
+
8
+ it 'should create a default task which clobbers then creates' do
9
+ Rake::BlacklightSitemapTask.new
10
+ Rake::Task['blacklight:sitemap'].should be_a_kind_of Rake::Task
11
+ end
12
+
13
+ it "should create the sitemap creation task" do
14
+ Rake::BlacklightSitemapTask.new
15
+ Rake::Task['blacklight:sitemap:create'].should be_a_kind_of Rake::Task
16
+ end
17
+
18
+ it "should be able to have the url attribute" do
19
+ task = Rake::BlacklightSitemapTask.new do |sm|
20
+ sm.url = 'http://example.com'
21
+ end
22
+ task.url.should eq('http://example.com')
23
+ end
24
+
25
+ it 'should have a base_filename attribute' do
26
+ @default_task.base_filename.should eq('blacklight')
27
+ end
28
+
29
+ it 'should allow for changing the base_filename attribute' do
30
+ task = Rake::BlacklightSitemapTask.new do |sm|
31
+ sm.base_filename = 'bl'
32
+ end
33
+ task.base_filename.should eq('bl')
34
+ end
35
+
36
+ it 'should store a value for whether to gzip or not' do
37
+ task = Rake::BlacklightSitemapTask.new do |sm|
38
+ sm.gzip = true
39
+ end
40
+ task.gzip.should be_true
41
+ end
42
+
43
+ it 'should have a default nil value for changefreq' do
44
+ @default_task.changefreq.should be_nil
45
+ end
46
+
47
+ it 'should allow for changing the changefreq value' do
48
+ task = Rake::BlacklightSitemapTask.new do |sm|
49
+ sm.changefreq = 'never'
50
+ end
51
+ task.changefreq.should eq('never')
52
+ end
53
+
54
+ it 'should set the ceiling to 50,000 as a default' do
55
+ @default_task.max.should == 50000
56
+ end
57
+
58
+ it 'should allow the ceiling to be set to a different value' do
59
+ task = Rake::BlacklightSitemapTask.new do |sm|
60
+ sm.max = 50
61
+ end
62
+ task.max.should == 50
63
+ end
64
+
65
+ it 'should have a default value for the lastmod_field' do
66
+ @default_task.lastmod_field.should eq('timestamp')
67
+ end
68
+
69
+ it 'should allow a new value for lastmod_field' do
70
+ task = Rake::BlacklightSitemapTask.new do |sm|
71
+ sm.lastmod_field = 'date_created'
72
+ end
73
+ task.lastmod_field.should eq('date_created')
74
+ end
75
+
76
+ it 'should have a default value for the priority_field' do
77
+ @default_task.priority_field.should be_nil
78
+ end
79
+
80
+ it 'should allow a new value for priority_field' do
81
+ task = Rake::BlacklightSitemapTask.new do |sm|
82
+ sm.priority_field = 'priority'
83
+ end
84
+ task.priority_field.should eq('priority')
85
+ end
86
+
87
+ it 'should create the sitemap clobber task' do
88
+ Rake::BlacklightSitemapTask.new
89
+ Rake::Task['blacklight:sitemap:clobber'].should be_a_kind_of Rake::Task
90
+ end
91
+
92
+ end
93
+
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'blacklight-sitemap'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,153 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: blacklight-sitemap
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Jason Ronallo
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-12-02 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ version_requirements: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ hash: 3
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ requirement: *id001
32
+ prerelease: false
33
+ type: :runtime
34
+ name: nokogiri
35
+ - !ruby/object:Gem::Dependency
36
+ version_requirements: &id002 !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ~>
40
+ - !ruby/object:Gem::Version
41
+ hash: 11
42
+ segments:
43
+ - 2
44
+ - 1
45
+ - 0
46
+ version: 2.1.0
47
+ requirement: *id002
48
+ prerelease: false
49
+ type: :development
50
+ name: rspec
51
+ - !ruby/object:Gem::Dependency
52
+ version_requirements: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ~>
56
+ - !ruby/object:Gem::Version
57
+ hash: 23
58
+ segments:
59
+ - 1
60
+ - 0
61
+ - 0
62
+ version: 1.0.0
63
+ requirement: *id003
64
+ prerelease: false
65
+ type: :development
66
+ name: bundler
67
+ - !ruby/object:Gem::Dependency
68
+ version_requirements: &id004 !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ~>
72
+ - !ruby/object:Gem::Version
73
+ hash: 1
74
+ segments:
75
+ - 1
76
+ - 5
77
+ - 1
78
+ version: 1.5.1
79
+ requirement: *id004
80
+ prerelease: false
81
+ type: :development
82
+ name: jeweler
83
+ - !ruby/object:Gem::Dependency
84
+ version_requirements: &id005 !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ hash: 3
90
+ segments:
91
+ - 0
92
+ version: "0"
93
+ requirement: *id005
94
+ prerelease: false
95
+ type: :development
96
+ name: rcov
97
+ description: Blacklight sitemap rake task.
98
+ email: jronallo@gmail.com
99
+ executables: []
100
+
101
+ extensions: []
102
+
103
+ extra_rdoc_files:
104
+ - LICENSE.txt
105
+ - README.rdoc
106
+ files:
107
+ - Gemfile
108
+ - Gemfile.lock
109
+ - LICENSE.txt
110
+ - README.rdoc
111
+ - Rakefile
112
+ - VERSION
113
+ - generators/blacklight_sitemap/blacklight_sitemap_generator.rb
114
+ - lib/blacklight-sitemap.rb
115
+ - spec/blacklight-sitemap_spec.rb
116
+ - spec/spec_helper.rb
117
+ has_rdoc: true
118
+ homepage: http://github.com/jronallo/blacklight-sitemap
119
+ licenses:
120
+ - MIT
121
+ post_install_message:
122
+ rdoc_options: []
123
+
124
+ require_paths:
125
+ - lib
126
+ required_ruby_version: !ruby/object:Gem::Requirement
127
+ none: false
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ hash: 3
132
+ segments:
133
+ - 0
134
+ version: "0"
135
+ required_rubygems_version: !ruby/object:Gem::Requirement
136
+ none: false
137
+ requirements:
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ hash: 3
141
+ segments:
142
+ - 0
143
+ version: "0"
144
+ requirements: []
145
+
146
+ rubyforge_project:
147
+ rubygems_version: 1.3.7
148
+ signing_key:
149
+ specification_version: 3
150
+ summary: Blacklight sitemap rake task.
151
+ test_files:
152
+ - spec/blacklight-sitemap_spec.rb
153
+ - spec/spec_helper.rb