active_triples-solrizer 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitignore +27 -0
- data/.travis.yml +14 -0
- data/AUTHORS.md +3 -0
- data/Gemfile +4 -0
- data/Guardfile +9 -0
- data/LICENSE +12 -0
- data/README.md +253 -0
- data/active_triples-solrizer.gemspec +36 -0
- data/lib/active_triples/solrizer.rb +38 -0
- data/lib/active_triples/solrizer/configuration.rb +54 -0
- data/lib/active_triples/solrizer/indexing_service.rb +52 -0
- data/lib/active_triples/solrizer/profile_indexing_service.rb +53 -0
- data/lib/active_triples/solrizer/properties_indexing_service.rb +139 -0
- data/lib/active_triples/solrizer/solr_service.rb +141 -0
- data/lib/active_triples/solrizer/version.rb +5 -0
- data/solr/schema.xml +438 -0
- data/solr/solrconfig.xml +179 -0
- data/spec/active_triples/solrizer/indexing_service_spec.rb +79 -0
- data/spec/active_triples/solrizer/profile_indexing_service_spec.rb +149 -0
- data/spec/active_triples/solrizer/properties_indexing_service_spec.rb +78 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/support/dummy_resource.rb +1125 -0
- metadata +209 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7557fd15f57f7f52c0adbead308b52ed7d8f0308
|
4
|
+
data.tar.gz: d7994692b486842fa27216f29e9322ebef3b47c7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c62b03d3068b7f2b972c9527bf7098ba83e976e812ace4eea043fcffc93601dd3f121796fb6273a91e0673232b12d699dde9043be1cd473e0a5b674a57bda1f7
|
7
|
+
data.tar.gz: 2fbde62084e942428cb4ea155e21598511a0fdd919a8a5f833b45311801da4503d0ce8224acc1418c834dffb06e147a0bc66f46ab90d0d2c38948b08b41e45b9
|
data/.coveralls.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
service_name: travis-ci
|
data/.gitignore
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
log/*.log
|
2
|
+
.sass-cache
|
3
|
+
*.gem
|
4
|
+
*.rbc
|
5
|
+
.bundle
|
6
|
+
.config
|
7
|
+
.yardoc
|
8
|
+
Gemfile.lock
|
9
|
+
InstalledFiles
|
10
|
+
_yardoc
|
11
|
+
coverage
|
12
|
+
doc/
|
13
|
+
lib/bundler/man
|
14
|
+
pkg
|
15
|
+
rdoc
|
16
|
+
spec/reports
|
17
|
+
test/tmp
|
18
|
+
test/version_tmp
|
19
|
+
tmp
|
20
|
+
*.bundle
|
21
|
+
*.so
|
22
|
+
*.o
|
23
|
+
*.a
|
24
|
+
mkmf.log
|
25
|
+
.idea
|
26
|
+
.ruby-gemset
|
27
|
+
.ruby-version
|
data/.travis.yml
ADDED
data/AUTHORS.md
ADDED
@@ -0,0 +1,3 @@
|
|
1
|
+
* E. Lynette Rayle (elr37@cornell.edu)
|
2
|
+
|
3
|
+
Additional Contributors: Portions of the code were copied and/or modified from [ActiveFedora](https://github.com/projecthydra/active_fedora). See the [contributors list](https://github.com/projecthydra/active_fedora/blob/master/CONTRIBUTORS.md) for that project.
|
data/Gemfile
ADDED
data/Guardfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
##########################################################################
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
3
|
+
# you may not use this file except in compliance with the License.
|
4
|
+
# You may obtain a copy of the License at
|
5
|
+
#
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
7
|
+
#
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
11
|
+
# See the License for the specific language governing permissions and
|
12
|
+
# limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,253 @@
|
|
1
|
+
# ActiveTriples::Solrizer
|
2
|
+
|
3
|
+
[](https://travis-ci.org/ActiveTriples/active_triples-solrizer)
|
4
|
+
[](https://coveralls.io/github/ActiveTriples/active_triples-solrizer?branch=master)
|
5
|
+
[](http://badge.fury.io/rb/active_triples-solrizer)
|
6
|
+
|
7
|
+
Provides a default solr implementation under the [ActiveTriples](https://github.com/ActiveTriples/ActiveTriples) framework.
|
8
|
+
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
Add this line to your application's Gemfile:
|
13
|
+
|
14
|
+
gem 'active_triples-solrizer'
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
$ bundle install
|
19
|
+
|
20
|
+
Or install it yourself as:
|
21
|
+
|
22
|
+
$ gem install active_triples-solrizer
|
23
|
+
|
24
|
+
|
25
|
+
## Usage
|
26
|
+
|
27
|
+
Property definitions for ActiveTriples resources can be extended by adding a block to define indexing data type and modifiers (see table of supported values below).
|
28
|
+
|
29
|
+
```
|
30
|
+
property :title, :predicate => RDF::SCHEMA.title do |index|
|
31
|
+
index.data_type = :text # specify the data type of the field in solr. See (https://github.com/elrayle/active_triples-solrizer/blob/master/solr/schema.xml)[solr/schema.xml] for field type definitions.
|
32
|
+
index.as :indexed, :sortable # specify modifiers for the solr field
|
33
|
+
end
|
34
|
+
```
|
35
|
+
|
36
|
+
| data_type | Notes |
|
37
|
+
| ----------- | ----- |
|
38
|
+
| :text | tokenized text |
|
39
|
+
| :text_en | tokenized English text |
|
40
|
+
| :string | non-tokenized string |
|
41
|
+
| :integer | |
|
42
|
+
| :long | |
|
43
|
+
| :double | |
|
44
|
+
| :float | |
|
45
|
+
| :boolean | |
|
46
|
+
| :date | format for this date field is of the form 1995-12-31T23:59:59Z; Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z |
|
47
|
+
| :coordinate | TBA - used to index the lat and long components for the "location" |
|
48
|
+
| :location | TBA - latitude/longitude|
|
49
|
+
| :guess | allow guessing of the type based on the type of the property value; NOTE: only checks the type of the first value when multiple values |
|
50
|
+
|
51
|
+
|
52
|
+
| index.as modifiers | works with types | Notes |
|
53
|
+
| ------------------ | ---------------- | ----- |
|
54
|
+
| :indexed | all types except :coordinate | searchable, but not returned in solr doc unless also has :stored modifier |
|
55
|
+
| :stored | all types except :coordinate | returned in solr doc, but not searchable unless also has :indexed modifier |
|
56
|
+
| :multiValued | all types except :boolean, :coordinate | NOTE: if not specified and multiple values exist, only the first value is included in the solr doc |
|
57
|
+
| :sortable | all types except :boolean, :coordinate, :location | numbers are stored as trie version of numeric type; :string, :text, :text_XX have an extra alphaSort field |
|
58
|
+
| :range | all numeric types including :integer, :long, :float, :double, :date | optimize for range queries |
|
59
|
+
| :vectored | valid for :text, :text_XX only | |
|
60
|
+
|
61
|
+
NOTE: Modifiers placed on types that do not support the modifier are ignored.
|
62
|
+
|
63
|
+
|
64
|
+
## Examples
|
65
|
+
|
66
|
+
Common prep code for all examples:
|
67
|
+
```ruby
|
68
|
+
require 'active_triples'
|
69
|
+
require 'active_triples/solrizer'
|
70
|
+
|
71
|
+
# create an in-memory repository for ad-hoc testing
|
72
|
+
ActiveTriples::Repositories.add_repository :default, RDF::Repository.new
|
73
|
+
|
74
|
+
# configure the solr url
|
75
|
+
ActiveTriples::Solrizer.configure do |config|
|
76
|
+
config.solr_uri = "http://localhost:8983/solr/#/~cores/active_triples"
|
77
|
+
end
|
78
|
+
|
79
|
+
# create a DummyResource for ad-hoc testing
|
80
|
+
class DummyResource < ActiveTriples::Resource
|
81
|
+
configure :type => RDF::URI('http://example.org/SomeClass')
|
82
|
+
property :title, :predicate => RDF::SCHEMA.title do |index|
|
83
|
+
index.data_type = :text
|
84
|
+
index.as :indexed, :sortable
|
85
|
+
end
|
86
|
+
property :description_si, :predicate => RDF::SCHEMA.description do |index|
|
87
|
+
index.data_type = :text
|
88
|
+
index.as :stored, :indexed
|
89
|
+
end
|
90
|
+
property :borrower_uri_i, :predicate => RDF::SCHEMA.borrower do |index|
|
91
|
+
index.data_type = :string
|
92
|
+
index.as :indexed
|
93
|
+
end
|
94
|
+
property :clip_number_simr, :predicate => RDF::SCHEMA.clipNumber do |index|
|
95
|
+
index.data_type = :integer
|
96
|
+
index.as :stored, :indexed, :multiValued, :range
|
97
|
+
end
|
98
|
+
property :price_s, :predicate => RDF::SCHEMA.price do |index|
|
99
|
+
index.data_type = :float
|
100
|
+
index.as :stored
|
101
|
+
end
|
102
|
+
property :bookEdition, :predicate => RDF::SCHEMA.bookEdition # non-indexed property
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
# initialize solr service with defaults
|
107
|
+
ActiveTriples::Solrizer::SolrService.register
|
108
|
+
```
|
109
|
+
|
110
|
+
### Example: Indexing Service to create solr document
|
111
|
+
|
112
|
+
```ruby
|
113
|
+
# create a new resource
|
114
|
+
dr = DummyResource.new('http://www.example.org/dr')
|
115
|
+
dr.title = 'Test Title'
|
116
|
+
dr.description_si = 'Test text description stored and indexed.'
|
117
|
+
dr.borrower_uri_i = 'http://example.org/i/b2'
|
118
|
+
dr.clip_number_simr = [7,8,9,10]
|
119
|
+
dr.price_s = 789.01
|
120
|
+
dr.bookEdition = 'Ed. 2'
|
121
|
+
dr
|
122
|
+
|
123
|
+
# get solr doc
|
124
|
+
doc = ActiveTriples::Solrizer::IndexingService.new(dr).generate_solr_document
|
125
|
+
# => {
|
126
|
+
# :id=>"http://www.example.org/dr",
|
127
|
+
# :at_model_ssi=>"DummyResource",
|
128
|
+
# :object_profile_ss=>expected_object_profile_short_all_values,
|
129
|
+
# :title_ti=>"Test Title",
|
130
|
+
# :title_ssort=>"Test Title",
|
131
|
+
# :description_si_tsi=>"Test text description stored and indexed.",
|
132
|
+
# :borrower_uri_i_si=>"http://example.org/i/b2",
|
133
|
+
# :clip_number_simr_itsim=>[7,8,9,10],
|
134
|
+
# :price_s_fs=>789.01
|
135
|
+
# }
|
136
|
+
|
137
|
+
|
138
|
+
# persist doc to solr
|
139
|
+
ActiveTriples::Solrizer::SolrService.add(doc)
|
140
|
+
ActiveTriples::Solrizer::SolrService.commit
|
141
|
+
```
|
142
|
+
|
143
|
+
### Example: Profile Indexing Service to serialize/deserialize resource
|
144
|
+
|
145
|
+
```ruby
|
146
|
+
# create a new resource with all properties having values
|
147
|
+
dr1 = DummyResource.new('http://www.example.org/dr1')
|
148
|
+
dr1.title = 'Test Title'
|
149
|
+
dr1.description_si = 'Test text description stored and indexed.'
|
150
|
+
dr1.borrower_uri_i = 'http://example.org/i/b2'
|
151
|
+
dr1.clip_number_simr = [7,8,9,10]
|
152
|
+
dr1.price_s = 789.01
|
153
|
+
dr1.bookEdition = 'Ed. 2'
|
154
|
+
dr1
|
155
|
+
|
156
|
+
# serialize resource into object profile
|
157
|
+
object_profile1 = ActiveTriples::Solrizer::ProfileIndexingService.new(dr1).export
|
158
|
+
# => '{"id":"http://www.example.org/dr1",'\
|
159
|
+
# '"title":["Test Title"],'\
|
160
|
+
# '"description_si":["Test text description stored and indexed."],'\
|
161
|
+
# '"borrower_uri_i":["http://example.org/i/b2"],'\
|
162
|
+
# '"clip_number_simr":[7,8,9,10],'\
|
163
|
+
# '"price_s":[789.01],'\
|
164
|
+
# '"bookEdition":["Ed. 2"]}'
|
165
|
+
|
166
|
+
# deserialize resource from object profile
|
167
|
+
dr1_filled = ActiveTriples::Solrizer::ProfileIndexingService.new().import( object_profile1, DummyResource )
|
168
|
+
dr1_filled.attributes
|
169
|
+
# => {"id"=>"http://www.example.org/dr2",
|
170
|
+
# "title"=>["Test Title"],
|
171
|
+
# "description_si"=>["Test text description stored and indexed."],
|
172
|
+
# "borrower_uri_i"=>["http://example.org/i/b2"],
|
173
|
+
# "clip_number_simr"=>[7, 8, 9, 10],
|
174
|
+
# "borrower_uri_i"=>[],
|
175
|
+
# "clip_number_simr"=>[],
|
176
|
+
# "price_s"=>[789.01],
|
177
|
+
# "bookEdition"=>["Ed. 2"]}
|
178
|
+
|
179
|
+
# create a new resource with some properties with unset values
|
180
|
+
dr2 = DummyResource.new('http://www.example.org/dr2')
|
181
|
+
dr2.title = 'Test Title'
|
182
|
+
dr2.description_si = 'Test text description stored and indexed.'
|
183
|
+
dr2.price_s = 789.01
|
184
|
+
dr2.bookEdition = 'Ed. 2'
|
185
|
+
dr2
|
186
|
+
|
187
|
+
# serialize resource into object profile
|
188
|
+
object_profile2 = ActiveTriples::Solrizer::ProfileIndexingService.new(dr2).export
|
189
|
+
# => '{"id":"http://www.example.org/dr1",'\
|
190
|
+
# '"title":["Test Title"],'\
|
191
|
+
# '"description_si":["Test text description stored and indexed."],'\
|
192
|
+
# '"borrower_uri_i":[],'\
|
193
|
+
# '"clip_number_simr":[],'\
|
194
|
+
# '"price_s":[789.01],'\
|
195
|
+
# '"bookEdition":["Ed. 2"]}'
|
196
|
+
|
197
|
+
# deserialize resource from object profile
|
198
|
+
dr2_filled = ActiveTriples::Solrizer::ProfileIndexingService.new().import( object_profile2, DummyResource )
|
199
|
+
dr2_filled.attributes
|
200
|
+
# => {"id"=>"http://www.example.org/dr2",
|
201
|
+
# "title"=>["Test Title"],
|
202
|
+
# "description_si"=>["Test text description stored and indexed."],
|
203
|
+
# "borrower_uri_i"=>[],
|
204
|
+
# "clip_number_simr"=>[],
|
205
|
+
# "price_s"=>[789.01],
|
206
|
+
# "bookEdition"=>["Ed. 2"]}
|
207
|
+
```
|
208
|
+
|
209
|
+
|
210
|
+
### Example: Properties Indexing Service to generate solr fields based on property definitions
|
211
|
+
|
212
|
+
```ruby
|
213
|
+
# NOTE re-use dr1 and dr2 from object profile examples
|
214
|
+
|
215
|
+
# generate property fields
|
216
|
+
property_fields1 = ActiveTriples::Solrizer::PropertiesIndexingService.new(dr1).export
|
217
|
+
# => {
|
218
|
+
# :title_ti=>"Test Title",
|
219
|
+
# :title_ssort=>"Test Title",
|
220
|
+
# :description_si_tsi=>"Test text description stored and indexed.",
|
221
|
+
# :borrower_uri_i_si=>"http://example.org/i/b2",
|
222
|
+
# :clip_number_simr_itsim=>[7,8,9,10],
|
223
|
+
# :price_s_fs=>789.01
|
224
|
+
# }
|
225
|
+
|
226
|
+
# generate property fields
|
227
|
+
property_fields2 = ActiveTriples::Solrizer::PropertiesIndexingService.new(dr2).export
|
228
|
+
# => {
|
229
|
+
# :title_ti=>"Test Title",
|
230
|
+
# :title_ssort=>"Test Title",
|
231
|
+
# :description_si_tsi=>"Test text description stored and indexed.",
|
232
|
+
# :price_s_fs=>789.01
|
233
|
+
# }
|
234
|
+
```
|
235
|
+
|
236
|
+
## Development Notes:
|
237
|
+
|
238
|
+
* I would like to see this expand to support specification of facets.
|
239
|
+
* The location and coordinate field types have not been tested and do not have examples.
|
240
|
+
* Some of the code in solr_service.rb is untested. It was copied from ActiveFedora as is. Mentions in the code to querying have not been tested. Query code was not copied at the time this document was written.
|
241
|
+
|
242
|
+
|
243
|
+
## Contributing
|
244
|
+
|
245
|
+
Please observe the following guidelines:
|
246
|
+
|
247
|
+
- Do your work in a feature branch based on ```master``` and rebase before submitting a pull request.
|
248
|
+
- Write tests for your contributions.
|
249
|
+
- Document every method you add using YARD annotations. (_Note: Annotations are sparse in the existing codebase, help us fix that!_)
|
250
|
+
- Organize your commits into logical units.
|
251
|
+
- Don't leave trailing whitespace (i.e. run ```git diff --check``` before committing).
|
252
|
+
- Use [well formed](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html) commit messages.
|
253
|
+
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "active_triples/solrizer/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "active_triples-solrizer"
|
7
|
+
s.version = ActiveTriples::Solrizer::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["E. Lynette Rayle"]
|
10
|
+
s.homepage = 'https://github.com/ActiveTriples/active_triples-solrizer'
|
11
|
+
s.email = 'elr37@cornell.edu'
|
12
|
+
s.summary = %q{Provide default solrizer implementation for ActiveTriples.}
|
13
|
+
s.description = %q{active_triples-solrizer provides a default solr indexing implementation for ActiveTriples.}
|
14
|
+
s.license = "APACHE2"
|
15
|
+
s.required_ruby_version = '>= 2.2.0'
|
16
|
+
|
17
|
+
s.add_dependency 'rsolr'
|
18
|
+
s.add_dependency('active-triples')
|
19
|
+
s.add_dependency('solrizer')
|
20
|
+
s.add_dependency('json')
|
21
|
+
|
22
|
+
s.add_development_dependency('pry')
|
23
|
+
s.add_development_dependency('pry-byebug')
|
24
|
+
s.add_development_dependency('rdoc')
|
25
|
+
s.add_development_dependency('rspec')
|
26
|
+
s.add_development_dependency('coveralls')
|
27
|
+
s.add_development_dependency('guard-rspec')
|
28
|
+
|
29
|
+
s.files = `git ls-files`.split("\n")
|
30
|
+
s.test_files = `git ls-files -- {spec}/*`.split("\n")
|
31
|
+
|
32
|
+
s.extra_rdoc_files = [
|
33
|
+
"LICENSE",
|
34
|
+
"README.md"
|
35
|
+
]
|
36
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'active_triples/solrizer/version'
|
2
|
+
require 'active_support'
|
3
|
+
require 'solrizer'
|
4
|
+
|
5
|
+
SOLR_DOCUMENT_ID = Solrizer.default_field_mapper.id_field unless defined?(SOLR_DOCUMENT_ID)
|
6
|
+
|
7
|
+
module ActiveTriples
|
8
|
+
module Solrizer
|
9
|
+
extend ActiveSupport::Autoload
|
10
|
+
eager_autoload do
|
11
|
+
autoload :Configuration
|
12
|
+
autoload :IndexingService
|
13
|
+
autoload :ProfileIndexingService
|
14
|
+
autoload :PropertiesIndexingService
|
15
|
+
autoload :SolrService
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
# Methods for configuring the GEM
|
20
|
+
class << self
|
21
|
+
attr_accessor :configuration
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.configuration
|
25
|
+
@configuration ||= Configuration.new
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.reset
|
29
|
+
@configuration = Configuration.new
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.configure
|
33
|
+
yield(configuration)
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module ActiveTriples::Solrizer
|
2
|
+
class Configuration
|
3
|
+
|
4
|
+
attr_reader :solr_uri
|
5
|
+
attr_reader :read_timeout
|
6
|
+
attr_reader :open_timeout
|
7
|
+
|
8
|
+
|
9
|
+
def self.default_solr_uri
|
10
|
+
@default_solr_uri = "http://localhost:8983/solr/".freeze
|
11
|
+
end
|
12
|
+
private_class_method :default_solr_uri
|
13
|
+
|
14
|
+
def self.default_read_timeout
|
15
|
+
@default_read_timeout = 120
|
16
|
+
end
|
17
|
+
private_class_method :default_read_timeout
|
18
|
+
|
19
|
+
def self.default_open_timeout
|
20
|
+
@default_open_timeout = 120
|
21
|
+
end
|
22
|
+
private_class_method :default_open_timeout
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
@solr_uri = self.class.send(:default_solr_uri)
|
26
|
+
@read_timeout = self.class.send(:default_read_timeout)
|
27
|
+
@open_timeout = self.class.send(:default_open_timeout)
|
28
|
+
end
|
29
|
+
|
30
|
+
def solr_uri=(new_solr_uri)
|
31
|
+
@solr_uri = new_solr_uri
|
32
|
+
end
|
33
|
+
|
34
|
+
def reset_solr_uri
|
35
|
+
@solr_uri = self.class.send(:default_solr_uri)
|
36
|
+
end
|
37
|
+
|
38
|
+
def read_timeout=(new_read_timeout)
|
39
|
+
@read_timeout = new_read_timeout
|
40
|
+
end
|
41
|
+
|
42
|
+
def reset_read_timeout
|
43
|
+
@read_timeout = self.class.send(:default_read_timeout)
|
44
|
+
end
|
45
|
+
|
46
|
+
def open_timeout=(new_open_timeout)
|
47
|
+
@open_timeout = new_open_timeout
|
48
|
+
end
|
49
|
+
|
50
|
+
def reset_open_timeout
|
51
|
+
@open_timeout = self.class.send(:default_open_timeout)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|