kithe 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 423d120379d13836cc034d1884b813c8f6a2505c77a1e9edcdd8b4fbb33a0025
4
- data.tar.gz: d9bfc7bc5f60a86f2e2e6726440de4b319cfa64526313a999af269340ec0cf1d
3
+ metadata.gz: d81e5ad94edad9885f8b51d4adc04c64093faa6b105a4e0c9dd5caa683b116cd
4
+ data.tar.gz: 21e0f6ff7aa02a592216c17e3cbda31341ffcec92835e04141d0fb862787de84
5
5
  SHA512:
6
- metadata.gz: 3696eebb93e5975e8336bf79018f3c1c2fd5e49015813442e5586794ad12193a1f843a43ae4a4d238eff906135434547473c49e327c6d03522f92766f6759c5b
7
- data.tar.gz: a7761da6c072f0bdbc48468227177244ab15a8e46d392ae930e8734e555134c237f4aecb928aef6ab669e430752bd838c803844b2fe30a79a44b7e8827121ddf
6
+ metadata.gz: bf07407aa6e23d2460f58e5d636333eb3fa72977d210d993a78e616813a2f47fec1f426cd22e4ad58f0d648a435ae8be77fa003a7c530de9d11a30d768323fd3
7
+ data.tar.gz: ce7f84ec2833f5fa5ed4aa07b7ffc7cbbd187d3aa27adeec2282ccf380bf0d137e73ac3d64900b9b6c6ae65f1ecbc268711d193b767e07e3a348f3f94eac9133
data/README.md CHANGED
@@ -44,6 +44,8 @@ Some guide documentation is available to explain each of kithe's major functiona
44
44
  * Not coupled to any other kithe components, could be used independently, hypothetically on any ActiveRecord model.
45
45
  * Written after review of "prior art" in [sunspot](https://github.com/sunspot/sunspot) and [searchkick](https://github.com/ankane/searchkick) (which both used AR callback-based indexing), and others.
46
46
 
47
+ * A [recommended approach for using Blacklight](./guides/blacklight_approach.md) with search result view templates based on actual ActiveRecord models. It is totally optional to use Blacklight at all with kithe, or to use this approach if you do.
48
+
47
49
  ### Also
48
50
 
49
51
  * [Kithe::Parameters](./app/models/kithe/parameters.rb) provides some shortcuts around Rails "strong params" for attr_json serialized attributes.
@@ -9,7 +9,7 @@ module Kithe
9
9
  # For a complete overview, see the [Guide Documentation](../../../guides/solr_indexing.md)
10
10
  #
11
11
  # The Solr instance to send updates to is global configuration:
12
- # Kithe::Indexable.settings.solr_url = "http://localhost:8983/solr/collection_name"
12
+ # Kithe.indexable_settings.solr_url = "http://localhost:8983/solr/collection_name"
13
13
  #
14
14
  # To configure how a model is mapped to a Solr document, you create a `Kithe::Indexer` sub-class, which
15
15
  # can use our obj_extract method, as well as any other traject indexer code.
@@ -52,91 +52,6 @@ module Kithe
52
52
  module Indexable
53
53
  extend ActiveSupport::Concern
54
54
 
55
- class IndexableSettings
56
- attr_accessor :solr_url, :writer_class_name, :writer_settings, :model_name_solr_field, :disable_callbacks
57
- def initialize(solr_url:, writer_class_name:, writer_settings:, model_name_solr_field:, disable_callbacks: false)
58
- @solr_url = solr_url
59
- @writer_class_name = writer_class_name
60
- @writer_settings = writer_settings
61
- @model_name_solr_field = model_name_solr_field
62
- end
63
-
64
- # Use configured solr_url, and merge together with configured
65
- # writer_settings
66
- def writer_settings
67
- if solr_url
68
- { "solr.url" => solr_url }.merge(@writer_settings)
69
- else
70
- @writer_settings
71
- end
72
- end
73
-
74
- # Turn writer_class_name into an actual Class object.
75
- def writer_class
76
- writer_class_name.constantize
77
- end
78
-
79
- # Instantiate a new writer based on `writer_class_name` and `writer_settings`
80
- def writer_instance!(additional_settings = {})
81
- writer_class.new(writer_settings.merge(additional_settings))
82
- end
83
- end
84
-
85
- # Global Kithe::Indexable settings, actually a Kithe::Indexable::Settings
86
- # object, but you will generally use it as a simple value object with getters
87
- # and setters.
88
- #
89
- # * solr_url: Where to send to Solr when indexing, the base url
90
- #
91
- # Kithe::Indexable.settings.solr_url = "http://localhost:8983/solr/collection_name"
92
- #
93
- # * model_name_solr_field: If you'd like a custom solr field to store model class name in.
94
- #
95
- # Kithe::Indexable.settings.model_name_solr_field = "my_model_name_field"
96
- #
97
- # * writer_settings: Settings to be passed to the Traject writer, by default a
98
- # Traject::SolrJsonWriter. To maintain the default settings, best to merge
99
- # your new ones into defaults.
100
- #
101
- # Kithe::Indexable.settings.writer_settings.merge!(
102
- # # by default we send a softCommit on every update, maybe you
103
- # # want not to?
104
- # "solr_writer.solr_update_args" => {}
105
- # # extra long timeout?
106
- # "solr_writer.http_timeout" => 100
107
- # )
108
- #
109
- # * writer_class_name: By default Traject::SolrJsonWriter, but maybe
110
- # you want to set to some other Traject::Writer. The writer Kithe::Indexable
111
- # will send index add/remove requests to.
112
- #
113
- # Kithe::Indexable.settings.writer_class_name = "Traject::SomeOtherWriter"
114
- #
115
- # * disable_callbacks: set to true to globally disable automatic after_commit
116
- mattr_accessor :settings do
117
- # set up default settings
118
- IndexableSettings.new(
119
- solr_url: "http://localhost:8983/solr/default",
120
- model_name_solr_field: "model_name_ssi",
121
- writer_class_name: "Traject::SolrJsonWriter",
122
- writer_settings: {
123
- # as default we tell the solrjsonwriter to use no threads,
124
- # no batching. softCommit on every update. Least surprising
125
- # default configuration.
126
- "solr_writer.thread_pool" => 0,
127
- "solr_writer.batch_size" => 1,
128
- "solr_writer.solr_update_args" => { softCommit: true },
129
- "solr_writer.http_timeout" => 3,
130
- "logger" => Rails.logger,
131
-
132
- # MAYBE? no skippable exceptions please
133
- # "solr_writer.skippable_exceptions" => []
134
- },
135
- disable_callbacks: false
136
- )
137
- end
138
-
139
-
140
55
  # Set some indexing parameters for the block yielded. For instance, to batch updates:
141
56
  #
142
57
  # Kithe::Indexable.index_with(batching: true)
@@ -163,12 +78,12 @@ module Kithe
163
78
 
164
79
  # Are automatic after_commit callbacks currently enabled? Will check a number
165
80
  # of things to see, as we have a number of places these can be turned on/off.
166
- # * Globally in `Kithe::Indexable.settings.disable_callback`
81
+ # * Globally in `Kithe.indexable_settings.disable_callback`
167
82
  # * On class or instance using class_attribute `kithe_indexable_auto_callbacks`
168
83
  # * If no kithe_indexable_mapper is configured on record, then no callbacks.
169
84
  # * Using thread-current settings usually set by .index_with
170
85
  def self.auto_callbacks?(model)
171
- !Kithe::Indexable.settings.disable_callbacks &&
86
+ !Kithe.indexable_settings.disable_callbacks &&
172
87
  model.kithe_indexable_auto_callbacks &&
173
88
  model.kithe_indexable_mapper &&
174
89
  !ThreadSettings.current.disabled_callbacks?
@@ -68,7 +68,7 @@ module Kithe
68
68
  # Could be an explicit writer passed into #initialize, or a current thread-settings
69
69
  # writer, or a new writer created from global settings.
70
70
  def writer
71
- @writer ||= ThreadSettings.current.writer || Kithe::Indexable.settings.writer_instance!
71
+ @writer ||= ThreadSettings.current.writer || Kithe.indexable_settings.writer_instance!
72
72
  end
73
73
 
74
74
  # Is this record supposed to be represented in the solr index?
@@ -81,7 +81,7 @@ module Kithe
81
81
  @writer ||= begin
82
82
  if @batching
83
83
  @local_writer = true
84
- Kithe::Indexable.settings.writer_instance!("solr_writer.batch_size" => 100)
84
+ Kithe.indexable_settings.writer_instance!("solr_writer.batch_size" => 100)
85
85
  end
86
86
  end
87
87
  end
@@ -18,9 +18,14 @@ module Kithe
18
18
  # * A Kithe::Indexer will automatically index the source record #id to Solr object
19
19
  # #id, and the source record class name to Solr field `model_name_ssi`. (That uses
20
20
  # Blacklight conventions for dynamic field names, if you'd like to change the field name
21
- # used, set `Kithe::Indexable.settings.model_name_solr_field=`)
21
+ # used, set `Kithe.indexable_settings.model_name_solr_field=`)
22
22
  #
23
23
  # * ID and model_name are set, so the AR object can be easily fetched later from Solr results.
24
+ # * You can customize what Solr field the model_name is sent to with
25
+ # `Kithe.indexable_settings.model_name_solr_field=`, by default `model_name_ssi`, using
26
+ # a blacklight dynamic field template `*_ssi`.
27
+ # * You can customize what ActiveRecord model property is sent to Solr `id` field with
28
+ # `Kithe.indexable_settings.solr_id_value_attribute=`, by default the AR pk in model#id.
24
29
  #
25
30
  # Note that there are no built-in facilities for automatically sending every field of your model
26
31
  # to Solr, round-trippable or not. The expected usage pattern is sending to Solr only
@@ -43,9 +48,8 @@ module Kithe
43
48
  #
44
49
  # TODO We might not actually want to do these automatically, or allow it to be disabled?
45
50
  configure do
46
- # hard-coded id -> id for now. id is a UUID. Can be made configurable?
47
- to_field "id", obj_extract("id")
48
- to_field Kithe::Indexable.settings.model_name_solr_field, obj_extract("class", "name")
51
+ to_field "id", obj_extract(Kithe.indexable_settings.solr_id_value_attribute)
52
+ to_field Kithe.indexable_settings.model_name_solr_field, obj_extract("class", "name")
49
53
  end
50
54
  end
51
55
  end
@@ -16,7 +16,7 @@ module Kithe
16
16
  # delete(id)
17
17
  # end
18
18
  #
19
- # It is searching for any Solr object with a `Kithe::Indexable.settings.model_name_solr_field`
19
+ # It is searching for any Solr object with a `Kithe.indexable_settings.model_name_solr_field`
20
20
  # field (default `model_name_ssi`). Then, it takes the ID and makes sure it exists in
21
21
  # the database using Kithe::Model. At the moment we are assuming everything is in Kithe::Model,
22
22
  # rather than trying to use the `model_name_ssi` to fetch from different tables. Could
@@ -26,10 +26,12 @@ module Kithe
26
26
  #
27
27
  # A bit hacky implementation, it might be nice to support a progress bar, we
28
28
  # don't now.
29
- def self.solr_orphan_ids(batch_size: 100, solr_url: Kithe::Indexable.settings.solr_url)
29
+ def self.solr_orphan_ids(batch_size: 100, solr_url: Kithe.indexable_settings.solr_url)
30
30
  return enum_for(:solr_index_orphan_ids) unless block_given?
31
31
 
32
- model_name_solr_field = Kithe::Indexable.settings.model_name_solr_field
32
+ model_name_solr_field = Kithe.indexable_settings.model_name_solr_field
33
+ model_solr_id_attr = Kithe.indexable_settings.solr_id_value_attribute
34
+
33
35
  solr_page = -1
34
36
 
35
37
  rsolr = RSolr.connect :url => solr_url
@@ -46,14 +48,14 @@ module Kithe
46
48
 
47
49
  break if solr_ids.empty?
48
50
 
49
- (solr_ids - Kithe::Model.where(id: solr_ids).pluck(:id)).each do |orphaned_id|
51
+ (solr_ids - Kithe::Model.where(model_solr_id_attr => solr_ids).pluck(model_solr_id_attr)).each do |orphaned_id|
50
52
  yield orphaned_id
51
53
  end
52
54
  end
53
55
  end
54
56
 
55
57
  # Finds any Solr objects that have a `model_name_ssi` field
56
- # (or `Kithe::Indexable.settings.model_name_solr_field` if non-default), but don't
58
+ # (or `Kithe.indexable_settings.model_name_solr_field` if non-default), but don't
57
59
  # exist in the rdbms, and deletes them from Solr, then issues a commit.
58
60
  #
59
61
  # Under normal use, you shouldn't have to do this, but can if your Solr index
@@ -65,7 +67,7 @@ module Kithe
65
67
  # A bit hacky implementation, it might be nice to have a progress bar, we don't now.
66
68
  #
67
69
  # Does return an array of any IDs deleted.
68
- def self.delete_solr_orphans(batch_size: 100, solr_url: Kithe::Indexable.settings.solr_url)
70
+ def self.delete_solr_orphans(batch_size: 100, solr_url: Kithe.indexable_settings.solr_url)
69
71
  rsolr = RSolr.connect :url => solr_url
70
72
  deleted_ids = []
71
73
 
@@ -83,10 +85,20 @@ module Kithe
83
85
  # using Rsolr. Pretty trivial.
84
86
  #
85
87
  # Intended for dev/test instances, not really production.
86
- def self.delete_all(solr_url: Kithe::Indexable.settings.solr_url)
88
+ # @param commit :soft, :hard, or false. Default :hard
89
+ def self.delete_all(solr_url: Kithe.indexable_settings.solr_url, commit: :hard)
87
90
  rsolr = RSolr.connect :url => solr_url
88
- rsolr.delete_by_query("*:*")
89
- rsolr.commit
91
+
92
+ # RSolr is a bit under-doc'd, but this SEEMS to work to send a commit
93
+ # or softCommit instruction with the delete request.
94
+ params = {}
95
+ if commit == :hard
96
+ params[:commit] = true
97
+ elsif commit == :soft
98
+ params[:softCommit] = true
99
+ end
100
+
101
+ rsolr.delete_by_query("*:*", params: params)
90
102
  end
91
103
  end
92
104
  end
@@ -1,4 +1,5 @@
1
1
  require "kithe/engine"
2
+ require 'kithe/indexable_settings'
2
3
 
3
4
  module Kithe
4
5
  # for ruby-progressbar
@@ -15,4 +16,67 @@ module Kithe
15
16
  def self.railtie_namespace
16
17
  Kithe::Engine
17
18
  end
19
+
20
+ # Global Kithe::Indexable settings, actually a Kithe::IndexableSettings
21
+ # object, but you will generally use it as a simple value object with getters
22
+ # and setters.
23
+ #
24
+ # * solr_url: Where to send to Solr when indexing, the base url
25
+ #
26
+ # Kithe.indexable_settings.solr_url = "http://localhost:8983/solr/collection_name"
27
+ #
28
+ # * model_name_solr_field: If you'd like a custom solr field to store model class name in.
29
+ #
30
+ # Kithe.indexable_settings.model_name_solr_field = "my_model_name_field"
31
+ #
32
+ # * solr_id_value_attribute: What attribute from your AR models to send to Solr
33
+ # `id` uniqueKey field, default the AR `id` pk, you may wish to set to `friendlier_id`.
34
+ #
35
+ # * writer_settings: Settings to be passed to the Traject writer, by default a
36
+ # Traject::SolrJsonWriter. To maintain the default settings, best to merge
37
+ # your new ones into defaults.
38
+ #
39
+ # Kithe.indexable_settings.writer_settings.merge!(
40
+ # # by default we send a softCommit on every update, maybe you
41
+ # # want not to?
42
+ # "solr_writer.solr_update_args" => {}
43
+ # # extra long timeout?
44
+ # "solr_writer.http_timeout" => 100
45
+ # )
46
+ #
47
+ # * writer_class_name: By default Traject::SolrJsonWriter, but maybe
48
+ # you want to set to some other Traject::Writer. The writer Kithe::Indexable
49
+ # will send index add/remove requests to.
50
+ #
51
+ # Kithe.indexable_settings.writer_class_name = "Traject::SomeOtherWriter"
52
+ #
53
+ # * disable_callbacks: set to true to globally disable automatic after_commit
54
+ #
55
+ #
56
+ # The settings need to live here not in Kithe::Indexable, to avoid terrible
57
+ # Rails dev-mode class-reloading weirdnesses. This module is not reloaded.
58
+ mattr_accessor :indexable_settings do
59
+ # set up default settings
60
+ IndexableSettings.new(
61
+ solr_url: "http://localhost:8983/solr/default",
62
+ model_name_solr_field: "model_name_ssi",
63
+ solr_id_value_attribute: "id",
64
+ writer_class_name: "Traject::SolrJsonWriter",
65
+ writer_settings: {
66
+ # as default we tell the solrjsonwriter to use no threads,
67
+ # no batching. softCommit on every update. Least surprising
68
+ # default configuration.
69
+ "solr_writer.thread_pool" => 0,
70
+ "solr_writer.batch_size" => 1,
71
+ "solr_writer.solr_update_args" => { softCommit: true },
72
+ "solr_writer.http_timeout" => 3,
73
+ "logger" => Rails.logger,
74
+
75
+ # MAYBE? no skippable exceptions please
76
+ # "solr_writer.skippable_exceptions" => []
77
+ },
78
+ disable_callbacks: false
79
+ )
80
+ end
81
+
18
82
  end
@@ -0,0 +1,38 @@
1
+ require 'kithe/blacklight_tools/search_service_bulk_load'
2
+
3
+ module Kithe
4
+ module BlacklightTools
5
+ # A convenience sub-class of Blacklight::SearchService that
6
+ # _just_ includes Kithe::BlacklightTools::SearchServiceBulkLoad.
7
+ #
8
+ # So if you just need a stock Blacklight::SearchService with this
9
+ # functionality, in your CatalogController you can conveniently simply:
10
+ #
11
+ # require 'kithe/blacklight_tools/bulk_loading_search_service'
12
+ # class CatalogController < ApplicationController
13
+ # include Blacklight::Catalog
14
+ # # ...
15
+ #
16
+ # self.search_service_class = Kithe::BlacklightTools::BulkLoadingSearchService
17
+ #
18
+ # # ...
19
+ # end
20
+ #
21
+ # Do NOT sub-class this BulkLoadingSearchService in a local app or gem.
22
+ # If you need more things in a SearchService, instead make your own
23
+ # SearchService subclass and
24
+ # `include Kithe::BlacklightTools::SearchServiceBulkLoad` directly.
25
+ # This class is simply a convenience for when you need nothing else.
26
+ #
27
+ # Kithe devs: Don't add anything to this class beyond
28
+ # `include Kithe::BlacklightTools::SearchServiceBulkLoad`, so that remains true!
29
+ #
30
+ # Note: This is in `./lib` rather than `./app` so it should never get
31
+ # auto-loaded by the app, as kithe does not require Blacklight and loading
32
+ # this file without Blacklight would produce an error. Thus the need
33
+ # for the explicit "require"
34
+ class BulkLoadingSearchService < Blacklight::SearchService
35
+ include Kithe::BlacklightTools::SearchServiceBulkLoad
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,54 @@
1
+ module Kithe
2
+ module BlacklightTools
3
+ # Mix-in module to a Blacklight::SearchService, that will bulk load actual AR
4
+ # records corresponding to Solr hits, and set them as `model` attribute on each
5
+ # SolrDocument in the results.
6
+ #
7
+ # A very basic rough implementation for basic use cases.
8
+ #
9
+ # * Assumes all documents that come back in the Solr results was indexed Kithe::Model, and
10
+ # their Solr ID's are the Kithe::Model `id` pk, or from the AR model attribute name
11
+ # set in `Kithe.indexable_settings.solr_id_value_attribute`
12
+ #
13
+ # * Requires your SolrDocument class to have a `model` attribute, you can just add
14
+ # `attr_accessor :model` to your local SolrDocument class BL generated in
15
+ # `./app/models/solr_document.rb`. Loaded models will be stored there on your results.
16
+ #
17
+ # Just `include` this model in a Blacklight::SearchService subclass. If you need no
18
+ # additional SearchService customization, but just the standard Blacklight::SearchService
19
+ # with this functionality, for convenience see the Kithe::BlacklightTools::BulkLoadingSearchServicce
20
+ #
21
+ # SORRY: No automated tests at present, too hard for us at the moment to figure out how
22
+ # to test a Blacklight extension in a reliable and sane way.
23
+ module SearchServiceBulkLoad
24
+ extend ActiveSupport::Concern
25
+
26
+ included do
27
+ class_attribute :bulk_load_records, default: true
28
+ class_attribute :bulk_load_scope
29
+ end
30
+
31
+ def search_results
32
+ (response, _documents) = super
33
+
34
+ if bulk_load_records
35
+ id_hash = response.documents.collect {|r| [r.id, r] }.to_h
36
+
37
+ scope = Kithe::Model.where(Kithe.indexable_settings.solr_id_value_attribute => id_hash.keys)
38
+ scope = scope.instance_exec(&bulk_load_scope) if bulk_load_scope
39
+
40
+ scope.find_each do |model|
41
+ id_hash[model.send(Kithe.indexable_settings.solr_id_value_attribute)].model = model
42
+ end
43
+
44
+ orphaned_solr_docs = id_hash.values.select { |doc| doc.model.nil? }
45
+ if orphaned_solr_docs.present?
46
+ Rails.logger.warn("Kithe::Blacklight::BulkLoading: Missing db records for solr doc id's: #{orphaned_solr_docs.collect(&:id).join(' ')}")
47
+ end
48
+ end
49
+
50
+ [response, _documents]
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,34 @@
1
+ module Kithe
2
+ class IndexableSettings
3
+ attr_accessor :solr_url, :writer_class_name, :writer_settings,
4
+ :model_name_solr_field, :solr_id_value_attribute, :disable_callbacks
5
+ def initialize(solr_url:, writer_class_name:, writer_settings:,
6
+ model_name_solr_field:, solr_id_value_attribute:, disable_callbacks: false)
7
+ @solr_url = solr_url
8
+ @writer_class_name = writer_class_name
9
+ @writer_settings = writer_settings
10
+ @model_name_solr_field = model_name_solr_field
11
+ @solr_id_value_attribute = solr_id_value_attribute
12
+ end
13
+
14
+ # Use configured solr_url, and merge together with configured
15
+ # writer_settings
16
+ def writer_settings
17
+ if solr_url
18
+ { "solr.url" => solr_url }.merge(@writer_settings)
19
+ else
20
+ @writer_settings
21
+ end
22
+ end
23
+
24
+ # Turn writer_class_name into an actual Class object.
25
+ def writer_class
26
+ writer_class_name.constantize
27
+ end
28
+
29
+ # Instantiate a new writer based on `writer_class_name` and `writer_settings`
30
+ def writer_instance!(additional_settings = {})
31
+ writer_class.new(writer_settings.merge(additional_settings))
32
+ end
33
+ end
34
+ end