blacklight_dynamic_sitemap 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rspec +3 -0
  4. data/.solr_wrapper +5 -0
  5. data/.travis.yml +8 -0
  6. data/Gemfile +45 -0
  7. data/LICENSE +13 -0
  8. data/README.md +44 -0
  9. data/Rakefile +13 -0
  10. data/app/controllers/blacklight_dynamic_sitemap/sitemap_controller.rb +21 -0
  11. data/app/models/blacklight_dynamic_sitemap/sitemap.rb +74 -0
  12. data/app/views/blacklight_dynamic_sitemap/sitemap/index.xml.builder +14 -0
  13. data/app/views/blacklight_dynamic_sitemap/sitemap/show.xml.builder +15 -0
  14. data/bin/console +14 -0
  15. data/bin/setup +8 -0
  16. data/blacklight_dynamic_sitemap.gemspec +34 -0
  17. data/config/routes.rb +5 -0
  18. data/lib/blacklight_dynamic_sitemap.rb +8 -0
  19. data/lib/blacklight_dynamic_sitemap/engine.rb +12 -0
  20. data/lib/blacklight_dynamic_sitemap/version.rb +3 -0
  21. data/lib/generators/blacklight_dynamic_sitemap/install_generator.rb +13 -0
  22. data/solr/conf/_rest_managed.json +3 -0
  23. data/solr/conf/admin-extra.html +31 -0
  24. data/solr/conf/elevate.xml +36 -0
  25. data/solr/conf/mapping-ISOLatin1Accent.txt +246 -0
  26. data/solr/conf/protwords.txt +21 -0
  27. data/solr/conf/schema.xml +388 -0
  28. data/solr/conf/scripts.conf +24 -0
  29. data/solr/conf/solrconfig.xml +214 -0
  30. data/solr/conf/spellings.txt +2 -0
  31. data/solr/conf/stopwords.txt +58 -0
  32. data/solr/conf/stopwords_en.txt +58 -0
  33. data/solr/conf/synonyms.txt +31 -0
  34. data/solr/conf/xslt/example.xsl +132 -0
  35. data/solr/conf/xslt/example_atom.xsl +67 -0
  36. data/solr/conf/xslt/example_rss.xsl +66 -0
  37. data/solr/conf/xslt/luke.xsl +337 -0
  38. data/tasks/blacklight_dynamic_sitemap.rake +45 -0
  39. metadata +166 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1d40182b152ba0d7cb73a9bbc6f01afc84b860baad5f7e8b66357e5beb97ae75
4
+ data.tar.gz: 1cf4281e052c836ce59aee3ef135dfad28c8ed82314ad6f3a4e5111251aa2032
5
+ SHA512:
6
+ metadata.gz: de245a04d2073183f62631d082875fabffc88dbd766ed0f7441eff3beb9dbca1126f2748cbe4f97c8bacab4fb3e0fa9a4121803c20bb5a9322877bb2835ddc46
7
+ data.tar.gz: b4ca278cd5530412c70596c54fe9650f7e3887c292eb65b1489dad37dc750d1e018145dad080e5a2fabfcdd27b121796b63cda84b941ce133cec1aaacfdba316
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
12
+
13
+ Gemfile.lock
14
+
15
+ .internal_test_app
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.solr_wrapper ADDED
@@ -0,0 +1,5 @@
1
+ # Place any default configuration for solr_wrapper here
2
+ # port: 8983
3
+ collection:
4
+ dir: solr/conf/
5
+ name: blacklight-core
data/.travis.yml ADDED
@@ -0,0 +1,8 @@
1
+ ---
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 2.6.3
6
+ notifications:
7
+ email: false
8
+ jdk: openjdk11
data/Gemfile ADDED
@@ -0,0 +1,45 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in blacklight_dynamic_sitemap.gemspec
4
+ gemspec
5
+
6
+ gem 'rake', '~> 12.0'
7
+ gem 'rspec', '~> 3.0'
8
+ # BEGIN ENGINE_CART BLOCK
9
+ # engine_cart: 2.3.0
10
+ # engine_cart stanza: 0.10.0
11
+ # the below comes from engine_cart, a gem used to test this Rails engine gem in the context of a Rails app.
12
+ file = File.expand_path('Gemfile', ENV['ENGINE_CART_DESTINATION'] || ENV['RAILS_ROOT'] || File.expand_path('.internal_test_app', File.dirname(__FILE__)))
13
+ if File.exist?(file)
14
+ begin
15
+ eval_gemfile file
16
+ rescue Bundler::GemfileError => e
17
+ Bundler.ui.warn '[EngineCart] Skipping Rails application dependencies:'
18
+ Bundler.ui.warn e.message
19
+ end
20
+ else
21
+ Bundler.ui.warn "[EngineCart] Unable to find test application dependencies in #{file}, using placeholder dependencies"
22
+
23
+ if ENV['RAILS_VERSION']
24
+ if ENV['RAILS_VERSION'] == 'edge'
25
+ gem 'rails', github: 'rails/rails'
26
+ ENV['ENGINE_CART_RAILS_OPTIONS'] = '--edge --skip-turbolinks'
27
+ else
28
+ gem 'rails', ENV['RAILS_VERSION']
29
+ end
30
+ end
31
+
32
+ case ENV['RAILS_VERSION']
33
+ when /^4.2/
34
+ gem 'responders', '~> 2.0'
35
+ gem 'sass-rails', '>= 5.0'
36
+ gem 'coffee-rails', '~> 4.1.0'
37
+ when /^4.[01]/
38
+ gem 'sass-rails', '< 5.0'
39
+ end
40
+ end
41
+ # END ENGINE_CART BLOCK
42
+
43
+ unless File.exist?(file)
44
+ eval_gemfile File.expand_path('spec/test_app_templates/Gemfile.extra', File.dirname(__FILE__))
45
+ end
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ © 2020 The Board of Trustees of the Leland Stanford Junior University.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,44 @@
1
+ # BlacklightDynamicSitemap
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/blacklight_dynamic_sitemap`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'blacklight_dynamic_sitemap'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle install
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install blacklight_dynamic_sitemap
22
+
23
+ Run the install generator
24
+
25
+ $ bundle exec rails generate blacklight_dynamic_sitemap:install
26
+
27
+ ## Usage
28
+
29
+ ### Solr configuration
30
+ This feature relies on a unique hexadecimal hash field in your solr response for each document. You can have solr automatically do this for you using the `SignatureUpdateProcessorFactory`.
31
+
32
+ https://lucene.apache.org/solr/guide/8_4/update-request-processors.html
33
+
34
+ See the `updateRequestProcessorChain` used in this project's `solrconfig.xml` for an example.
35
+
36
+ ## Development
37
+
38
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
39
+
40
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
41
+
42
+ ## Contributing
43
+
44
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/blacklight_dynamic_sitemap.
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+ Bundler::GemHelper.install_tasks
8
+
9
+ load 'tasks/blacklight_dynamic_sitemap.rake'
10
+
11
+ require 'engine_cart/rake_task'
12
+
13
+ task default: [:ci]
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BlacklightDynamicSitemap
4
+ ##
5
+ # Controller to handle the on-the-fly sitemap generation
6
+ class SitemapController < ApplicationController
7
+ def show
8
+ @sitemap_entries = BlacklightDynamicSitemap::Sitemap.new.get(id)
9
+ end
10
+
11
+ def index
12
+ @sitemaps = BlacklightDynamicSitemap::Sitemap.new.list
13
+ end
14
+
15
+ private
16
+
17
+ def id
18
+ params.require(:id)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BlacklightDynamicSitemap
4
+ ##
5
+ #
6
+ class Sitemap
7
+ delegate :hashed_id_field, :unique_id_field, :last_modified_field, to: :engine_config
8
+
9
+ def get(id)
10
+ index_connection.select(
11
+ params: {
12
+ q: "{!prefix f=#{hashed_id_field} v=#{id}}",
13
+ fl: [unique_id_field, last_modified_field].join(','),
14
+ rows: 2_000_000, # Ensure that we do not page this result
15
+ facet: false
16
+ }
17
+ ).dig('response', 'docs')
18
+ end
19
+
20
+ def list
21
+ access_list
22
+ end
23
+
24
+ private
25
+
26
+ def index_connection
27
+ @index_connection ||= Blacklight.default_index.connection
28
+ end
29
+
30
+ def engine_config
31
+ BlacklightDynamicSitemap::Engine.config
32
+ end
33
+
34
+ def max_documents
35
+ key = 'blacklight_dynamic_sitemap.index_max_docs'
36
+ expiration = BlacklightDynamicSitemap::Engine.config.max_documents_expiration
37
+ Rails.cache.fetch(key, expires_in: expiration) do
38
+ Blacklight.default_index.connection.select(
39
+ params: { q: '*:*', rows: 0, facet: false }
40
+ )['response']['numFound']
41
+ end
42
+ end
43
+
44
+ def average_chunk
45
+ [engine_config.minimum_average_chunk, max_documents].min # Sufficiently less than 50,000 max per sitemap
46
+ end
47
+
48
+ ##
49
+ # Exponent used to calculate the needed number of prefix spaces to query
50
+ # that will effectively chunk the entire number of documents. 16 is the
51
+ # number of characters in hex space (0-9, a-f)
52
+ # Example: 16**1 = 16, 16**2 = 256, 16**3 = 4096
53
+ # x = b**y
54
+ # y = logb(x)
55
+ # y = logb(x) = ln(x) / ln(b)
56
+ def exponent
57
+ @exponent ||= [
58
+ (Math.log(max_documents / average_chunk) / Math.log(16)).ceil,
59
+ 1
60
+ ].max
61
+ end
62
+
63
+ ##
64
+ # Expand the number of documents used off of calculated exponent to create
65
+ # list of sitemaps to access in hex space (0-9, a-f)
66
+ # Example: (exponent as 4)
67
+ # ["0000", "0001", "0002", "0003"..."af74", "af75", "af76", "af77"..."fffc", "fffd", "fffe", "ffff"]
68
+ def access_list
69
+ (0...(16**exponent))
70
+ .to_a
71
+ .map { |v| v.to_s(16).rjust(exponent, '0') }
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ xml.instruct! :xml, version: '1.0', encoding: 'UTF-8'
4
+ xml.sitemapindex(
5
+ 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
6
+ 'xsi:schemaLocation' => 'http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd',
7
+ 'xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9'
8
+ ) do
9
+ @sitemaps.each do |id|
10
+ xml.sitemap do
11
+ xml.loc(sitemap_url(id, format: :xml))
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ xml.instruct! :xml, version: '1.0', encoding: 'UTF-8'
4
+ xml.urlset(
5
+ 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
6
+ 'xsi:schemaLocation' => 'http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd',
7
+ 'xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9'
8
+ ) do
9
+ @sitemap_entries.each do |doc|
10
+ xml.url do
11
+ xml.loc(main_app.solr_document_url(doc[BlacklightDynamicSitemap::Engine.config.unique_id_field]))
12
+ xml.lastmod(doc[BlacklightDynamicSitemap::Engine.config.last_modified_field])
13
+ end
14
+ end
15
+ end
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'blacklight_dynamic_sitemap'
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require 'pry'
11
+ # Pry.start
12
+
13
+ require 'irb'
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,34 @@
1
+ require_relative 'lib/blacklight_dynamic_sitemap/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = 'blacklight_dynamic_sitemap'
5
+ spec.version = BlacklightDynamicSitemap::VERSION
6
+ spec.authors = ['Jack Reed']
7
+ spec.email = ['phillipjreed@gmail.com']
8
+
9
+ spec.summary = 'Dynamic sitemap.xml for BLacklight'
10
+ spec.description = 'Dynamic sitemap.xml for BLacklight'
11
+ spec.homepage = 'https://github.com/sul-dlss/blacklight_dynamic_sitemap'
12
+ spec.license = 'Apache-2.0'
13
+ spec.required_ruby_version = Gem::Requirement.new('>= 2.3.0')
14
+
15
+ spec.metadata['homepage_uri'] = spec.homepage
16
+ spec.metadata['source_code_uri'] = spec.homepage
17
+
18
+ # Specify which files should be added to the gem when it is released.
19
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
20
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
21
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
+ end
23
+ spec.bindir = 'exe'
24
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
+ spec.require_paths = ['lib']
26
+
27
+ spec.add_dependency 'rails', '~> 5.0'
28
+ spec.add_dependency 'blacklight', '> 6.0'
29
+
30
+ spec.add_development_dependency 'capybara'
31
+ spec.add_development_dependency 'engine_cart', '~> 2.0'
32
+ spec.add_development_dependency 'rspec-rails'
33
+ spec.add_development_dependency 'solr_wrapper'
34
+ end
data/config/routes.rb ADDED
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ BlacklightDynamicSitemap::Engine.routes.draw do
4
+ resources :sitemap, controller: 'sitemap', only: %i[index show], defaults: { format: :xml }
5
+ end
@@ -0,0 +1,8 @@
1
+ require 'blacklight_dynamic_sitemap/version'
2
+ require 'blacklight'
3
+
4
+ module BlacklightDynamicSitemap
5
+ require 'blacklight_dynamic_sitemap/engine'
6
+ class Error < StandardError; end
7
+ # Your code goes here...
8
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BlacklightDynamicSitemap
4
+ class Engine < ::Rails::Engine
5
+ isolate_namespace BlacklightDynamicSitemap
6
+ config.max_documents_expiration = 1.day
7
+ config.minimum_average_chunk = 10_000
8
+ config.hashed_id_field = 'hashed_id_ssi'
9
+ config.unique_id_field = 'id'
10
+ config.last_modified_field = 'timestamp'
11
+ end
12
+ end
@@ -0,0 +1,3 @@
1
+ module BlacklightDynamicSitemap
2
+ VERSION = '0.1.0'
3
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rails/generators'
4
+
5
+ module BlacklightDynamicSitemap
6
+ class Install < Rails::Generators::Base
7
+ def add_routes
8
+ inject_into_file 'config/routes.rb', after: "mount Blacklight::Engine => '/'" do
9
+ "\n mount BlacklightDynamicSitemap::Engine => '/'\n"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ {
2
+ "initArgs":{},
3
+ "managedList":[]}
@@ -0,0 +1,31 @@
1
+ <!--
2
+ Licensed to the Apache Software Foundation (ASF) under one or more
3
+ contributor license agreements. See the NOTICE file distributed with
4
+ this work for additional information regarding copyright ownership.
5
+ The ASF licenses this file to You under the Apache License, Version 2.0
6
+ (the "License"); you may not use this file except in compliance with
7
+ the License. You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ -->
17
+
18
+ <!-- The content of this page will be statically included into the top
19
+ of the admin page. Uncomment this as an example to see there the content
20
+ will show up.
21
+
22
+ <hr>
23
+ <i>This line will appear before the first table</i>
24
+ <tr>
25
+ <td colspan="2">
26
+ This row will be appended to the end of the first table
27
+ </td>
28
+ </tr>
29
+ <hr>
30
+
31
+ -->