common_indexer 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e1a9fecdbf7f2fc69bc396780a18870cf644fbf57d3d2cc8a58d8ce288916660
4
- data.tar.gz: a0308434a2594a856fd864dabfbd45f6c3dd193f3373e37bc415b505071694c1
3
+ metadata.gz: 4461d22a5d4531a8ddf037db23a49802daf2809c997ad5004d1df38efd47b3ce
4
+ data.tar.gz: fe7b4728cb10b2241b8b45b241d08eb0fa34d5ee9904fd09d8f7665d1fc4f047
5
5
  SHA512:
6
- metadata.gz: c4ef48902ee9fcda97963a124914786ccc780a848cf875be093211418330a1e7227e9f1e978239c574bdedee69fbc550a5944a4369ed0313e0c64bf2adcc6bc1
7
- data.tar.gz: 755eb2c497f8a52098635f22f8c5018bcf170a35940ccbb7e71a51c9bb3d0f5647eb5cb25fc6998f128dfa2638c421f81293f62b1864af9e0dc3c75653c52489
6
+ metadata.gz: c402bf32e0fbb80a87a0204269f9c95a3b772f19e82342be4b35db952e3444398958527ec92b3a61082d0dd6c4c75e14285638941756f6efb02f003e68fc29c5
7
+ data.tar.gz: 75210f819de79a6fbcc569bcf4be859d04c1b1f4c8144bbd4783ec4a2d19cb83ddc26fc0e754f7abb8fa5807b2ec48e4278f572252b332c5a8a728804d8d436e
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # CommonIndexer
2
2
  [![Build Status](https://travis-ci.com/nulib/common-indexer.svg?branch=master)](https://travis-ci.com/nulib/common-indexer)
3
3
 
4
- Indexes metadata into a central AWS Elasticsearch instance. The gem indexes based on a hash returned by the method `#to_common_index` defined in your model using an `after_save` hook. The indexer also sanitizes the input by only allowing pre-configured hash keys.
4
+ Indexes metadata into a central AWS Elasticsearch instance. The gem indexes based on a hash returned by the method `#to_common_index` defined in your model using an `after_save` hook.
5
5
 
6
6
  ## Installation
7
7
 
@@ -28,9 +28,6 @@ Add `common_indexer` to the appropriate Rails config file (e.g. `config/settings
28
28
  common_indexer:
29
29
  endpoint: http://localhost:9201/ # default 'http://localhost:9200'
30
30
  index_name: new-index # default 'common'
31
- allowed_keys: # default ['title']
32
- - title
33
- - creator
34
31
  ```
35
32
 
36
33
  Add an initializer to configure the CommonIndexer gem with the app settings:
@@ -40,13 +37,12 @@ Add an initializer to configure the CommonIndexer gem with the app settings:
40
37
  ::CommonIndexer.config do |config|
41
38
  config.endpoint = Settings.common_indexer.endpoint
42
39
  config.index_name = Settings.common_indexer.index_name
43
- config.allowed_keys = Settings.common_indexer.allowed_keys
44
40
  end
45
41
  ```
46
42
 
47
43
  ## Usage
48
44
 
49
- Include the CommonIndexer into your model that you want to index with `include ::CommonIndexer::Base`, and define a `#to_common_index` method in that model. `#to_common_index` should return a hash with metadata key/values that conform to the allowed keys defined in the local configuration or fall back to the defaults.
45
+ Include the CommonIndexer into your model that you want to index with `include ::CommonIndexer::Base`, and define a `#to_common_index` method in that model. `#to_common_index` should return a hash with metadata key/values that conform to the common index mapping.
50
46
 
51
47
  ```ruby
52
48
  class Example < ActiveFedora::Base
@@ -22,6 +22,8 @@ Gem::Specification.new do |spec|
22
22
 
23
23
  spec.add_dependency 'active-fedora'
24
24
  spec.add_dependency 'activesupport'
25
+ spec.add_dependency 'dry-configurable'
26
+ spec.add_dependency 'json'
25
27
  spec.add_dependency 'typhoeus'
26
28
  spec.add_dependency 'elasticsearch'
27
29
  spec.add_development_dependency 'bixby'
@@ -0,0 +1,154 @@
1
+ {
2
+ "properties": {
3
+ "full_text": {
4
+ "type": "text"
5
+ },
6
+ "abstract": {
7
+ "type": "text",
8
+ "copy_to": "full_text"
9
+ },
10
+ "admin_set": {
11
+ "type": "nested",
12
+ "properties": {
13
+ "id": {
14
+ "type": "keyword"
15
+ },
16
+ "title": {
17
+ "type": "text",
18
+ "copy_to": ["full_text", "admin_set_facet"]
19
+ }
20
+ }
21
+ },
22
+ "caption": {
23
+ "type": "text",
24
+ "copy_to": "full_text"
25
+ },
26
+ "collection": {
27
+ "type": "nested",
28
+ "properties": {
29
+ "id": {
30
+ "type": "keyword"
31
+ },
32
+ "title": {
33
+ "type": "text",
34
+ "copy_to": ["full_text", "collection_facet"]
35
+ }
36
+ }
37
+ },
38
+ "contributor": {
39
+ "type": "nested",
40
+ "properties": {
41
+ "role": {
42
+ "type": "keyword"
43
+ },
44
+ "uri": {
45
+ "type": "keyword"
46
+ },
47
+ "label": {
48
+ "type": "text",
49
+ "copy_to": ["full_text", "contributor_facet"]
50
+ }
51
+ }
52
+ },
53
+ "date": {
54
+ "type": "text"
55
+ },
56
+ "description": {
57
+ "type": "text"
58
+ },
59
+ "keyword": {
60
+ "type": "keyword",
61
+ "copy_to": "full_text"
62
+ },
63
+ "language": {
64
+ "type": "nested",
65
+ "properties": {
66
+ "uri": {
67
+ "type": "keyword"
68
+ },
69
+ "label": {
70
+ "type": "text",
71
+ "copy_to": ["full_text", "language_facet"]
72
+ }
73
+ }
74
+ },
75
+ "location": {
76
+ "type": "geo_point"
77
+ },
78
+ "permalink": {
79
+ "type": "keyword"
80
+ },
81
+ "provenance": {
82
+ "type": "text"
83
+ },
84
+ "publisher": {
85
+ "type": "text",
86
+ "copy_to": "publisher_facet"
87
+ },
88
+ "rights_holder": {
89
+ "type": "text"
90
+ },
91
+ "source": {
92
+ "type": "text"
93
+ },
94
+ "subject": {
95
+ "type": "nested",
96
+ "properties": {
97
+ "role": {
98
+ "type": "keyword"
99
+ },
100
+ "uri": {
101
+ "type": "keyword"
102
+ },
103
+ "label": {
104
+ "type": "text",
105
+ "copy_to": ["full_text", "subject_facet"]
106
+ }
107
+ }
108
+ },
109
+ "title": {
110
+ "type": "object",
111
+ "properties": {
112
+ "primary": {
113
+ "type": "text",
114
+ "copy_to": "full_text"
115
+ },
116
+ "alternate": {
117
+ "type": "text",
118
+ "copy_to": "full_text"
119
+ }
120
+ }
121
+ },
122
+ "visibility": {
123
+ "type": "keyword",
124
+ "copy_to": "visibility_facet"
125
+ },
126
+ "expanded_date": {
127
+ "type": "date"
128
+ },
129
+ "year": {
130
+ "type": "integer"
131
+ },
132
+ "extra_fields": {
133
+ "type": "nested"
134
+ }
135
+ },
136
+ "dynamic_templates": [
137
+ {
138
+ "facets": {
139
+ "match": "*_facet",
140
+ "mapping": {
141
+ "type": "keyword"
142
+ }
143
+ }
144
+ },
145
+ {
146
+ "nested": {
147
+ "match": "*_nested",
148
+ "mapping": {
149
+ "type": "nested"
150
+ }
151
+ }
152
+ }
153
+ ]
154
+ }
@@ -7,6 +7,7 @@ module CommonIndexer # :nodoc:
7
7
 
8
8
  included do
9
9
  after_save :update_common_index
10
+ after_destroy :delete_document
10
11
  end
11
12
 
12
13
  # Override to_common_index in the including model and return a hash of common index attributes
@@ -14,11 +15,21 @@ module CommonIndexer # :nodoc:
14
15
  raise CommonIndexer::Error, "Index Error: #{self.class.name} does not implement #to_common_index!"
15
16
  end
16
17
 
18
+ def delete_document
19
+ CommonIndexer.client.delete index: CommonIndexer.index_name,
20
+ id: id,
21
+ type: '_doc'
22
+ rescue Elasticsearch::Transport::Transport::Error => err
23
+ Rails.logger.warn("Common Index delete failure: #{err.message}")
24
+ end
25
+
17
26
  def update_common_index
18
27
  CommonIndexer.client.index index: CommonIndexer.index_name,
19
28
  id: id,
20
- type: self.class.name.underscore,
21
- body: CommonIndexer.sanitize(to_common_index)
29
+ type: '_doc',
30
+ body: to_common_index
31
+ rescue Elasticsearch::Transport::Transport::Error => err
32
+ Rails.logger.warn("Common Indexing failure: #{err.message}")
22
33
  end
23
34
  end
24
35
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module CommonIndexer
4
- VERSION = '0.1.1'
4
+ VERSION = '0.2.0'
5
5
  end
@@ -1,30 +1,45 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'active_support/core_ext/module/delegation'
4
+ require 'dry-configurable'
5
+ require 'json'
3
6
  require 'typhoeus'
4
7
  require 'typhoeus/adapters/faraday'
5
8
  require 'elasticsearch'
6
9
  require 'common_indexer/version'
7
10
  require 'common_indexer/base'
8
- require 'common_indexer/config'
9
11
 
10
12
  module CommonIndexer # :nodoc:
11
13
  class Error < StandardError; end
12
14
 
13
- def self.client
14
- @client ||= Elasticsearch::Client.new(hosts: config.endpoint)
15
- end
15
+ extend Dry::Configurable
16
16
 
17
- def self.index_name
18
- config.index_name
19
- end
17
+ setting :endpoint, 'http://localhost:9200/'
18
+ setting :index_name, 'common'
19
+ setting :schema, JSON.parse(File.read(File.expand_path('../../config/schema.json', __FILE__)))
20
20
 
21
- def self.sanitize(input)
22
- input.select do |key, _value|
23
- config.allowed_keys.include?(key.to_s)
21
+ class << self
22
+ delegate :index_name, to: :config
23
+
24
+ def client
25
+ @client ||= Elasticsearch::Client.new(hosts: config.endpoint)
24
26
  end
25
- end
26
27
 
27
- def self.config
28
- (@config ||= Config.new).tap { |c| yield c if block_given? }
28
+ def configure_index!
29
+ new_index = [index_name, Time.now.utc.strftime('%Y%m%d%H%M%S%3N')].join('_')
30
+ client.indices.create(index: new_index)
31
+ client.indices.put_mapping(index: new_index, type: '_doc', body: { _doc: config.schema })
32
+ reindex_into(new_index) if client.indices.exists(index: index_name)
33
+ client.indices.put_alias(index: new_index, name: index_name)
34
+ end
35
+
36
+ private
37
+
38
+ def reindex_into(new_index)
39
+ existing_index = client.indices.get(index: index_name).keys.first
40
+ raise ArgumentError, "Cannot reindex #{index_name} into itself" if existing_index == index_name
41
+ client.reindex(body: { source: { index: index_name }, dest: { index: new_index } })
42
+ client.indices.delete(index: existing_index)
43
+ end
29
44
  end
30
45
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: common_indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Klein
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-07-26 00:00:00.000000000 Z
12
+ date: 2018-08-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: active-fedora
@@ -39,6 +39,34 @@ dependencies:
39
39
  - - ">="
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: dry-configurable
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: json
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
42
70
  - !ruby/object:Gem::Dependency
43
71
  name: typhoeus
44
72
  requirement: !ruby/object:Gem::Requirement
@@ -156,9 +184,9 @@ files:
156
184
  - bin/console
157
185
  - bin/setup
158
186
  - common_indexer.gemspec
187
+ - config/schema.json
159
188
  - lib/common_indexer.rb
160
189
  - lib/common_indexer/base.rb
161
- - lib/common_indexer/config.rb
162
190
  - lib/common_indexer/version.rb
163
191
  homepage: https://github.com/nulib/common-indexer
164
192
  licenses:
@@ -1,13 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module CommonIndexer
4
- DEFAULT_KEYS = ['title'].freeze
5
- DEFAULT_ENDPOINT = 'http://localhost:9200/'
6
- DEFAULT_INDEX_NAME = 'common'
7
-
8
- Config = Struct.new(:endpoint, :index_name, :allowed_keys) do
9
- def initialize(endpoint = DEFAULT_ENDPOINT, index_name = DEFAULT_INDEX_NAME, allowed_keys = DEFAULT_KEYS)
10
- super
11
- end
12
- end
13
- end