kithe 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 423d120379d13836cc034d1884b813c8f6a2505c77a1e9edcdd8b4fbb33a0025
4
- data.tar.gz: d9bfc7bc5f60a86f2e2e6726440de4b319cfa64526313a999af269340ec0cf1d
3
+ metadata.gz: d81e5ad94edad9885f8b51d4adc04c64093faa6b105a4e0c9dd5caa683b116cd
4
+ data.tar.gz: 21e0f6ff7aa02a592216c17e3cbda31341ffcec92835e04141d0fb862787de84
5
5
  SHA512:
6
- metadata.gz: 3696eebb93e5975e8336bf79018f3c1c2fd5e49015813442e5586794ad12193a1f843a43ae4a4d238eff906135434547473c49e327c6d03522f92766f6759c5b
7
- data.tar.gz: a7761da6c072f0bdbc48468227177244ab15a8e46d392ae930e8734e555134c237f4aecb928aef6ab669e430752bd838c803844b2fe30a79a44b7e8827121ddf
6
+ metadata.gz: bf07407aa6e23d2460f58e5d636333eb3fa72977d210d993a78e616813a2f47fec1f426cd22e4ad58f0d648a435ae8be77fa003a7c530de9d11a30d768323fd3
7
+ data.tar.gz: ce7f84ec2833f5fa5ed4aa07b7ffc7cbbd187d3aa27adeec2282ccf380bf0d137e73ac3d64900b9b6c6ae65f1ecbc268711d193b767e07e3a348f3f94eac9133
data/README.md CHANGED
@@ -44,6 +44,8 @@ Some guide documentation is available to explain each of kithe's major functiona
44
44
  * Not coupled to any other kithe components, could be used independently, hypothetically on any ActiveRecord model.
45
45
  * Written after review of "prior art" in [sunspot](https://github.com/sunspot/sunspot) and [searchkick](https://github.com/ankane/searchkick) (which both used AR callback-based indexing), and others.
46
46
 
47
+ * A [recommended approach for using Blacklight](./guides/blacklight_approach.md) with search result view templates based on actual ActiveRecord models. It is totally optional to use Blacklight at all with kithe, or to use this approach if you do.
48
+
47
49
  ### Also
48
50
 
49
51
  * [Kithe::Parameters](./app/models/kithe/parameters.rb) provides some shortcuts around Rails "strong params" for attr_json serialized attributes.
@@ -9,7 +9,7 @@ module Kithe
9
9
  # For a complete overview, see the [Guide Documentation](../../../guides/solr_indexing.md)
10
10
  #
11
11
  # The Solr instance to send updates to is global configuration:
12
- # Kithe::Indexable.settings.solr_url = "http://localhost:8983/solr/collection_name"
12
+ # Kithe.indexable_settings.solr_url = "http://localhost:8983/solr/collection_name"
13
13
  #
14
14
  # To configure how a model is mapped to a Solr document, you create a `Kithe::Indexer` sub-class, which
15
15
  # can use our obj_extract method, as well as any other traject indexer code.
@@ -52,91 +52,6 @@ module Kithe
52
52
  module Indexable
53
53
  extend ActiveSupport::Concern
54
54
 
55
- class IndexableSettings
56
- attr_accessor :solr_url, :writer_class_name, :writer_settings, :model_name_solr_field, :disable_callbacks
57
- def initialize(solr_url:, writer_class_name:, writer_settings:, model_name_solr_field:, disable_callbacks: false)
58
- @solr_url = solr_url
59
- @writer_class_name = writer_class_name
60
- @writer_settings = writer_settings
61
- @model_name_solr_field = model_name_solr_field
62
- end
63
-
64
- # Use configured solr_url, and merge together with configured
65
- # writer_settings
66
- def writer_settings
67
- if solr_url
68
- { "solr.url" => solr_url }.merge(@writer_settings)
69
- else
70
- @writer_settings
71
- end
72
- end
73
-
74
- # Turn writer_class_name into an actual Class object.
75
- def writer_class
76
- writer_class_name.constantize
77
- end
78
-
79
- # Instantiate a new writer based on `writer_class_name` and `writer_settings`
80
- def writer_instance!(additional_settings = {})
81
- writer_class.new(writer_settings.merge(additional_settings))
82
- end
83
- end
84
-
85
- # Global Kithe::Indexable settings, actually a Kithe::Indexable::Settings
86
- # object, but you will generally use it as a simple value object with getters
87
- # and setters.
88
- #
89
- # * solr_url: Where to send to Solr when indexing, the base url
90
- #
91
- # Kithe::Indexable.settings.solr_url = "http://localhost:8983/solr/collection_name"
92
- #
93
- # * model_name_solr_field: If you'd like a custom solr field to store model class name in.
94
- #
95
- # Kithe::Indexable.settings.model_name_solr_field = "my_model_name_field"
96
- #
97
- # * writer_settings: Settings to be passed to the Traject writer, by default a
98
- # Traject::SolrJsonWriter. To maintain the default settings, best to merge
99
- # your new ones into defaults.
100
- #
101
- # Kithe::Indexable.settings.writer_settings.merge!(
102
- # # by default we send a softCommit on every update, maybe you
103
- # # want not to?
104
- # "solr_writer.solr_update_args" => {}
105
- # # extra long timeout?
106
- # "solr_writer.http_timeout" => 100
107
- # )
108
- #
109
- # * writer_class_name: By default Traject::SolrJsonWriter, but maybe
110
- # you want to set to some other Traject::Writer. The writer Kithe::Indexable
111
- # will send index add/remove requests to.
112
- #
113
- # Kithe::Indexable.settings.writer_class_name = "Traject::SomeOtherWriter"
114
- #
115
- # * disable_callbacks: set to true to globally disable automatic after_commit
116
- mattr_accessor :settings do
117
- # set up default settings
118
- IndexableSettings.new(
119
- solr_url: "http://localhost:8983/solr/default",
120
- model_name_solr_field: "model_name_ssi",
121
- writer_class_name: "Traject::SolrJsonWriter",
122
- writer_settings: {
123
- # as default we tell the solrjsonwriter to use no threads,
124
- # no batching. softCommit on every update. Least surprising
125
- # default configuration.
126
- "solr_writer.thread_pool" => 0,
127
- "solr_writer.batch_size" => 1,
128
- "solr_writer.solr_update_args" => { softCommit: true },
129
- "solr_writer.http_timeout" => 3,
130
- "logger" => Rails.logger,
131
-
132
- # MAYBE? no skippable exceptions please
133
- # "solr_writer.skippable_exceptions" => []
134
- },
135
- disable_callbacks: false
136
- )
137
- end
138
-
139
-
140
55
  # Set some indexing parameters for the block yielded. For instance, to batch updates:
141
56
  #
142
57
  # Kithe::Indexable.index_with(batching: true)
@@ -163,12 +78,12 @@ module Kithe
163
78
 
164
79
  # Are automatic after_commit callbacks currently enabled? Will check a number
165
80
  # of things to see, as we have a number of places these can be turned on/off.
166
- # * Globally in `Kithe::Indexable.settings.disable_callback`
81
+ # * Globally in `Kithe.indexable_settings.disable_callback`
167
82
  # * On class or instance using class_attribute `kithe_indexable_auto_callbacks`
168
83
  # * If no kithe_indexable_mapper is configured on record, then no callbacks.
169
84
  # * Using thread-current settings usually set by .index_with
170
85
  def self.auto_callbacks?(model)
171
- !Kithe::Indexable.settings.disable_callbacks &&
86
+ !Kithe.indexable_settings.disable_callbacks &&
172
87
  model.kithe_indexable_auto_callbacks &&
173
88
  model.kithe_indexable_mapper &&
174
89
  !ThreadSettings.current.disabled_callbacks?
@@ -68,7 +68,7 @@ module Kithe
68
68
  # Could be an explicit writer passed into #initialize, or a current thread-settings
69
69
  # writer, or a new writer created from global settings.
70
70
  def writer
71
- @writer ||= ThreadSettings.current.writer || Kithe::Indexable.settings.writer_instance!
71
+ @writer ||= ThreadSettings.current.writer || Kithe.indexable_settings.writer_instance!
72
72
  end
73
73
 
74
74
  # Is this record supposed to be represented in the solr index?
@@ -81,7 +81,7 @@ module Kithe
81
81
  @writer ||= begin
82
82
  if @batching
83
83
  @local_writer = true
84
- Kithe::Indexable.settings.writer_instance!("solr_writer.batch_size" => 100)
84
+ Kithe.indexable_settings.writer_instance!("solr_writer.batch_size" => 100)
85
85
  end
86
86
  end
87
87
  end
@@ -18,9 +18,14 @@ module Kithe
18
18
  # * A Kithe::Indexer will automatically index the source record #id to Solr object
19
19
  # #id, and the source record class name to Solr field `model_name_ssi`. (That uses
20
20
  # Blacklight conventions for dynamic field names, if you'd like to change the field name
21
- # used, set `Kithe::Indexable.settings.model_name_solr_field=`)
21
+ # used, set `Kithe.indexable_settings.model_name_solr_field=`)
22
22
  #
23
23
  # * ID and model_name are set, so the AR object can be easily fetched later from Solr results.
24
+ # * You can customize what Solr field the model_name is sent to with
25
+ # `Kithe.indexable_settings.model_name_solr_field=`, by default `model_name_ssi`, using
26
+ # a blacklight dynamic field template `*_ssi`.
27
+ # * You can customize what ActiveRecord model property is sent to Solr `id` field with
28
+ # `Kithe.indexable_settings.solr_id_value_attribute=`, by default the AR pk in model#id.
24
29
  #
25
30
  # Note that there are no built-in facilities for automatically sending every field of your model
26
31
  # to Solr, round-trippable or not. The expected usage pattern is sending to Solr only
@@ -43,9 +48,8 @@ module Kithe
43
48
  #
44
49
  # TODO We might not actually want to do these automatically, or allow it to be disabled?
45
50
  configure do
46
- # hard-coded id -> id for now. id is a UUID. Can be made configurable?
47
- to_field "id", obj_extract("id")
48
- to_field Kithe::Indexable.settings.model_name_solr_field, obj_extract("class", "name")
51
+ to_field "id", obj_extract(Kithe.indexable_settings.solr_id_value_attribute)
52
+ to_field Kithe.indexable_settings.model_name_solr_field, obj_extract("class", "name")
49
53
  end
50
54
  end
51
55
  end
@@ -16,7 +16,7 @@ module Kithe
16
16
  # delete(id)
17
17
  # end
18
18
  #
19
- # It is searching for any Solr object with a `Kithe::Indexable.settings.model_name_solr_field`
19
+ # It is searching for any Solr object with a `Kithe.indexable_settings.model_name_solr_field`
20
20
  # field (default `model_name_ssi`). Then, it takes the ID and makes sure it exists in
21
21
  # the database using Kithe::Model. At the moment we are assuming everything is in Kithe::Model,
22
22
  # rather than trying to use the `model_name_ssi` to fetch from different tables. Could
@@ -26,10 +26,12 @@ module Kithe
26
26
  #
27
27
  # A bit hacky implementation, it might be nice to support a progress bar, we
28
28
  # don't now.
29
- def self.solr_orphan_ids(batch_size: 100, solr_url: Kithe::Indexable.settings.solr_url)
29
+ def self.solr_orphan_ids(batch_size: 100, solr_url: Kithe.indexable_settings.solr_url)
30
30
  return enum_for(:solr_index_orphan_ids) unless block_given?
31
31
 
32
- model_name_solr_field = Kithe::Indexable.settings.model_name_solr_field
32
+ model_name_solr_field = Kithe.indexable_settings.model_name_solr_field
33
+ model_solr_id_attr = Kithe.indexable_settings.solr_id_value_attribute
34
+
33
35
  solr_page = -1
34
36
 
35
37
  rsolr = RSolr.connect :url => solr_url
@@ -46,14 +48,14 @@ module Kithe
46
48
 
47
49
  break if solr_ids.empty?
48
50
 
49
- (solr_ids - Kithe::Model.where(id: solr_ids).pluck(:id)).each do |orphaned_id|
51
+ (solr_ids - Kithe::Model.where(model_solr_id_attr => solr_ids).pluck(model_solr_id_attr)).each do |orphaned_id|
50
52
  yield orphaned_id
51
53
  end
52
54
  end
53
55
  end
54
56
 
55
57
  # Finds any Solr objects that have a `model_name_ssi` field
56
- # (or `Kithe::Indexable.settings.model_name_solr_field` if non-default), but don't
58
+ # (or `Kithe.indexable_settings.model_name_solr_field` if non-default), but don't
57
59
  # exist in the rdbms, and deletes them from Solr, then issues a commit.
58
60
  #
59
61
  # Under normal use, you shouldn't have to do this, but can if your Solr index
@@ -65,7 +67,7 @@ module Kithe
65
67
  # A bit hacky implementation, it might be nice to have a progress bar, we don't now.
66
68
  #
67
69
  # Does return an array of any IDs deleted.
68
- def self.delete_solr_orphans(batch_size: 100, solr_url: Kithe::Indexable.settings.solr_url)
70
+ def self.delete_solr_orphans(batch_size: 100, solr_url: Kithe.indexable_settings.solr_url)
69
71
  rsolr = RSolr.connect :url => solr_url
70
72
  deleted_ids = []
71
73
 
@@ -83,10 +85,20 @@ module Kithe
83
85
  # using Rsolr. Pretty trivial.
84
86
  #
85
87
  # Intended for dev/test instances, not really production.
86
- def self.delete_all(solr_url: Kithe::Indexable.settings.solr_url)
88
+ # @param commit :soft, :hard, or false. Default :hard
89
+ def self.delete_all(solr_url: Kithe.indexable_settings.solr_url, commit: :hard)
87
90
  rsolr = RSolr.connect :url => solr_url
88
- rsolr.delete_by_query("*:*")
89
- rsolr.commit
91
+
92
+ # RSolr is a bit under-doc'd, but this SEEMS to work to send a commit
93
+ # or softCommit instruction with the delete request.
94
+ params = {}
95
+ if commit == :hard
96
+ params[:commit] = true
97
+ elsif commit == :soft
98
+ params[:softCommit] = true
99
+ end
100
+
101
+ rsolr.delete_by_query("*:*", params: params)
90
102
  end
91
103
  end
92
104
  end
@@ -1,4 +1,5 @@
1
1
  require "kithe/engine"
2
+ require 'kithe/indexable_settings'
2
3
 
3
4
  module Kithe
4
5
  # for ruby-progressbar
@@ -15,4 +16,67 @@ module Kithe
15
16
  def self.railtie_namespace
16
17
  Kithe::Engine
17
18
  end
19
+
20
+ # Global Kithe::Indexable settings, actually a Kithe::IndexableSettings
21
+ # object, but you will generally use it as a simple value object with getters
22
+ # and setters.
23
+ #
24
+ # * solr_url: Where to send to Solr when indexing, the base url
25
+ #
26
+ # Kithe.indexable_settings.solr_url = "http://localhost:8983/solr/collection_name"
27
+ #
28
+ # * model_name_solr_field: If you'd like a custom solr field to store model class name in.
29
+ #
30
+ # Kithe.indexable_settings.model_name_solr_field = "my_model_name_field"
31
+ #
32
+ # * solr_id_value_attribute: What attribute from your AR models to send to Solr
33
+ # `id` uniqueKey field, default the AR `id` pk, you may wish to set to `friendlier_id`.
34
+ #
35
+ # * writer_settings: Settings to be passed to the Traject writer, by default a
36
+ # Traject::SolrJsonWriter. To maintain the default settings, best to merge
37
+ # your new ones into defaults.
38
+ #
39
+ # Kithe.indexable_settings.writer_settings.merge!(
40
+ # # by default we send a softCommit on every update, maybe you
41
+ # # want not to?
42
+ # "solr_writer.solr_update_args" => {}
43
+ # # extra long timeout?
44
+ # "solr_writer.http_timeout" => 100
45
+ # )
46
+ #
47
+ # * writer_class_name: By default Traject::SolrJsonWriter, but maybe
48
+ # you want to set to some other Traject::Writer. The writer Kithe::Indexable
49
+ # will send index add/remove requests to.
50
+ #
51
+ # Kithe.indexable_settings.writer_class_name = "Traject::SomeOtherWriter"
52
+ #
53
+ # * disable_callbacks: set to true to globally disable automatic after_commit
54
+ #
55
+ #
56
+ # The settings need to live here not in Kithe::Indexable, to avoid terrible
57
+ # Rails dev-mode class-reloading weirdnesses. This module is not reloaded.
58
+ mattr_accessor :indexable_settings do
59
+ # set up default settings
60
+ IndexableSettings.new(
61
+ solr_url: "http://localhost:8983/solr/default",
62
+ model_name_solr_field: "model_name_ssi",
63
+ solr_id_value_attribute: "id",
64
+ writer_class_name: "Traject::SolrJsonWriter",
65
+ writer_settings: {
66
+ # as default we tell the solrjsonwriter to use no threads,
67
+ # no batching. softCommit on every update. Least surprising
68
+ # default configuration.
69
+ "solr_writer.thread_pool" => 0,
70
+ "solr_writer.batch_size" => 1,
71
+ "solr_writer.solr_update_args" => { softCommit: true },
72
+ "solr_writer.http_timeout" => 3,
73
+ "logger" => Rails.logger,
74
+
75
+ # MAYBE? no skippable exceptions please
76
+ # "solr_writer.skippable_exceptions" => []
77
+ },
78
+ disable_callbacks: false
79
+ )
80
+ end
81
+
18
82
  end
@@ -0,0 +1,38 @@
1
+ require 'kithe/blacklight_tools/search_service_bulk_load'
2
+
3
+ module Kithe
4
+ module BlacklightTools
5
+ # A convenience sub-class of Blacklight::SearchService that
6
+ # _just_ includes Kithe::BlacklightTools::SearchServiceBulkLoad.
7
+ #
8
+ # So if you just need a stock Blacklight::SearchService with this
9
+ # functionality, in your CatalogController you can conveniently simply:
10
+ #
11
+ # require 'kithe/blacklight_tools/bulk_loading_search_service'
12
+ # class CatalogController < ApplicationController
13
+ # include Blacklight::Catalog
14
+ # # ...
15
+ #
16
+ # self.search_service_class = Kithe::BlacklightTools::BulkLoadingSearchService
17
+ #
18
+ # # ...
19
+ # end
20
+ #
21
+ # Do NOT sub-class this BulkLoadingSearchService in a local app or gem.
22
+ # If you need more things in a SearchService, instead make your own
23
+ # SearchService subclass and
24
+ # `include Kithe::BlacklightTools::SearchServiceBulkLoad` directly.
25
+ # This class is simply a convenience for when you need nothing else.
26
+ #
27
+ # Kithe devs: Don't add anything to this class beyond
28
+ # `include Kithe::BlacklightTools::SearchServiceBulkLoad`, so that remains true!
29
+ #
30
+ # Note: This is in `./lib` rather than `./app` so it should never get
31
+ # auto-loaded by the app, as kithe does not require Blacklight and loading
32
+ # this file without Blacklight would produce an error. Thus the need
33
+ # for the explicit "require"
34
+ class BulkLoadingSearchService < Blacklight::SearchService
35
+ include Kithe::BlacklightTools::SearchServiceBulkLoad
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,54 @@
1
+ module Kithe
2
+ module BlacklightTools
3
+ # Mix-in module to a Blacklight::SearchService, that will bulk load actual AR
4
+ # records corresponding to Solr hits, and set them as `model` attribute on each
5
+ # SolrDocument in the results.
6
+ #
7
+ # A very basic rough implementation for basic use cases.
8
+ #
9
+ # * Assumes all documents that come back in the Solr results was indexed Kithe::Model, and
10
+ # their Solr ID's are the Kithe::Model `id` pk, or from the AR model attribute name
11
+ # set in `Kithe.indexable_settings.solr_id_value_attribute`
12
+ #
13
+ # * Requires your SolrDocument class to have a `model` attribute, you can just add
14
+ # `attr_accessor :model` to your local SolrDocument class BL generated in
15
+ # `./app/models/solr_document.rb`. Loaded models will be stored there on your results.
16
+ #
17
+ # Just `include` this model in a Blacklight::SearchService subclass. If you need no
18
+ # additional SearchService customization, but just the standard Blacklight::SearchService
19
+ # with this functionality, for convenience see the Kithe::BlacklightTools::BulkLoadingSearchServicce
20
+ #
21
+ # SORRY: No automated tests at present, too hard for us at the moment to figure out how
22
+ # to test a Blacklight extension in a reliable and sane way.
23
+ module SearchServiceBulkLoad
24
+ extend ActiveSupport::Concern
25
+
26
+ included do
27
+ class_attribute :bulk_load_records, default: true
28
+ class_attribute :bulk_load_scope
29
+ end
30
+
31
+ def search_results
32
+ (response, _documents) = super
33
+
34
+ if bulk_load_records
35
+ id_hash = response.documents.collect {|r| [r.id, r] }.to_h
36
+
37
+ scope = Kithe::Model.where(Kithe.indexable_settings.solr_id_value_attribute => id_hash.keys)
38
+ scope = scope.instance_exec(&bulk_load_scope) if bulk_load_scope
39
+
40
+ scope.find_each do |model|
41
+ id_hash[model.send(Kithe.indexable_settings.solr_id_value_attribute)].model = model
42
+ end
43
+
44
+ orphaned_solr_docs = id_hash.values.select { |doc| doc.model.nil? }
45
+ if orphaned_solr_docs.present?
46
+ Rails.logger.warn("Kithe::Blacklight::BulkLoading: Missing db records for solr doc id's: #{orphaned_solr_docs.collect(&:id).join(' ')}")
47
+ end
48
+ end
49
+
50
+ [response, _documents]
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,34 @@
1
+ module Kithe
2
+ class IndexableSettings
3
+ attr_accessor :solr_url, :writer_class_name, :writer_settings,
4
+ :model_name_solr_field, :solr_id_value_attribute, :disable_callbacks
5
+ def initialize(solr_url:, writer_class_name:, writer_settings:,
6
+ model_name_solr_field:, solr_id_value_attribute:, disable_callbacks: false)
7
+ @solr_url = solr_url
8
+ @writer_class_name = writer_class_name
9
+ @writer_settings = writer_settings
10
+ @model_name_solr_field = model_name_solr_field
11
+ @solr_id_value_attribute = solr_id_value_attribute
12
+ end
13
+
14
+ # Use configured solr_url, and merge together with configured
15
+ # writer_settings
16
+ def writer_settings
17
+ if solr_url
18
+ { "solr.url" => solr_url }.merge(@writer_settings)
19
+ else
20
+ @writer_settings
21
+ end
22
+ end
23
+
24
+ # Turn writer_class_name into an actual Class object.
25
+ def writer_class
26
+ writer_class_name.constantize
27
+ end
28
+
29
+ # Instantiate a new writer based on `writer_class_name` and `writer_settings`
30
+ def writer_instance!(additional_settings = {})
31
+ writer_class.new(writer_settings.merge(additional_settings))
32
+ end
33
+ end
34
+ end