active_triples-solrizer 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7557fd15f57f7f52c0adbead308b52ed7d8f0308
4
+ data.tar.gz: d7994692b486842fa27216f29e9322ebef3b47c7
5
+ SHA512:
6
+ metadata.gz: c62b03d3068b7f2b972c9527bf7098ba83e976e812ace4eea043fcffc93601dd3f121796fb6273a91e0673232b12d699dde9043be1cd473e0a5b674a57bda1f7
7
+ data.tar.gz: 2fbde62084e942428cb4ea155e21598511a0fdd919a8a5f833b45311801da4503d0ce8224acc1418c834dffb06e147a0bc66f46ab90d0d2c38948b08b41e45b9
@@ -0,0 +1 @@
1
+ service_name: travis-ci
@@ -0,0 +1,27 @@
1
+ log/*.log
2
+ .sass-cache
3
+ *.gem
4
+ *.rbc
5
+ .bundle
6
+ .config
7
+ .yardoc
8
+ Gemfile.lock
9
+ InstalledFiles
10
+ _yardoc
11
+ coverage
12
+ doc/
13
+ lib/bundler/man
14
+ pkg
15
+ rdoc
16
+ spec/reports
17
+ test/tmp
18
+ test/version_tmp
19
+ tmp
20
+ *.bundle
21
+ *.so
22
+ *.o
23
+ *.a
24
+ mkmf.log
25
+ .idea
26
+ .ruby-gemset
27
+ .ruby-version
@@ -0,0 +1,14 @@
1
+ language: ruby
2
+ bundler_args: --without debug
3
+ script: "bundle exec rspec spec"
4
+ sudo: false
5
+ cache: bundler
6
+ rvm:
7
+ - 2.2.4
8
+ - 2.3.0
9
+ - jruby-9.0.4.0
10
+ - rbx-2
11
+ matrix:
12
+ allow_failures:
13
+ - rvm: jruby-9.0.4.0
14
+ - rvm: rbx-2
@@ -0,0 +1,3 @@
1
+ * E. Lynette Rayle (elr37@cornell.edu)
2
+
3
+ Additional Contributors: Portions of the code were copied and/or modified from [ActiveFedora](https://github.com/projecthydra/active_fedora). See the [contributors list](https://github.com/projecthydra/active_fedora/blob/master/CONTRIBUTORS.md) for that project.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
4
+
@@ -0,0 +1,9 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard :rspec do
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec" }
8
+ end
9
+
data/LICENSE ADDED
@@ -0,0 +1,12 @@
1
+ ##########################################################################
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
@@ -0,0 +1,253 @@
1
+ # ActiveTriples::Solrizer
2
+
3
+ [![Build Status](https://travis-ci.org/ActiveTriples/active_triples-solrizer.png?branch=master)](https://travis-ci.org/ActiveTriples/active_triples-solrizer)
4
+ [![Coverage Status](https://coveralls.io/repos/ActiveTriples/active_triples-solrizer/badge.svg?branch=master&service=github)](https://coveralls.io/github/ActiveTriples/active_triples-solrizer?branch=master)
5
+ [![Gem Version](https://badge.fury.io/rb/active_triples-solrizer.svg)](http://badge.fury.io/rb/active_triples-solrizer)
6
+
7
+ Provides a default solr implementation under the [ActiveTriples](https://github.com/ActiveTriples/ActiveTriples) framework.
8
+
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ gem 'active_triples-solrizer'
15
+
16
+ And then execute:
17
+
18
+ $ bundle install
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install active_triples-solrizer
23
+
24
+
25
+ ## Usage
26
+
27
+ Property definitions for ActiveTriples resources can be extended by adding a block to define indexing data type and modifiers (see table of supported values below).
28
+
29
+ ```
30
+ property :title, :predicate => RDF::SCHEMA.title do |index|
31
+ index.data_type = :text # specify the data type of the field in solr. See (https://github.com/elrayle/active_triples-solrizer/blob/master/solr/schema.xml)[solr/schema.xml] for field type definitions.
32
+ index.as :indexed, :sortable # specify modifiers for the solr field
33
+ end
34
+ ```
35
+
36
+ | data_type | Notes |
37
+ | ----------- | ----- |
38
+ | :text | tokenized text |
39
+ | :text_en | tokenized English text |
40
+ | :string | non-tokenized string |
41
+ | :integer | |
42
+ | :long | |
43
+ | :double | |
44
+ | :float | |
45
+ | :boolean | |
46
+ | :date | format for this date field is of the form 1995-12-31T23:59:59Z; Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z |
47
+ | :coordinate | TBA - used to index the lat and long components for the "location" |
48
+ | :location | TBA - latitude/longitude|
49
+ | :guess | allow guessing of the type based on the type of the property value; NOTE: only checks the type of the first value when multiple values |
50
+
51
+
52
+ | index.as modifiers | works with types | Notes |
53
+ | ------------------ | ---------------- | ----- |
54
+ | :indexed | all types except :coordinate | searchable, but not returned in solr doc unless also has :stored modifier |
55
+ | :stored | all types except :coordinate | returned in solr doc, but not searchable unless also has :indexed modifier |
56
+ | :multiValued | all types except :boolean, :coordinate | NOTE: if not specified and multiple values exist, only the first value is included in the solr doc |
57
+ | :sortable | all types except :boolean, :coordinate, :location | numbers are stored as trie version of numeric type; :string, :text, :text_XX have an extra alphaSort field |
58
+ | :range | all numeric types including :integer, :long, :float, :double, :date | optimize for range queries |
59
+ | :vectored | valid for :text, :text_XX only | |
60
+
61
+ NOTE: Modifiers placed on types that do not support the modifier are ignored.
62
+
63
+
64
+ ## Examples
65
+
66
+ Common prep code for all examples:
67
+ ```ruby
68
+ require 'active_triples'
69
+ require 'active_triples/solrizer'
70
+
71
+ # create an in-memory repository for ad-hoc testing
72
+ ActiveTriples::Repositories.add_repository :default, RDF::Repository.new
73
+
74
+ # configure the solr url
75
+ ActiveTriples::Solrizer.configure do |config|
76
+ config.solr_uri = "http://localhost:8983/solr/#/~cores/active_triples"
77
+ end
78
+
79
+ # create a DummyResource for ad-hoc testing
80
+ class DummyResource < ActiveTriples::Resource
81
+ configure :type => RDF::URI('http://example.org/SomeClass')
82
+ property :title, :predicate => RDF::SCHEMA.title do |index|
83
+ index.data_type = :text
84
+ index.as :indexed, :sortable
85
+ end
86
+ property :description_si, :predicate => RDF::SCHEMA.description do |index|
87
+ index.data_type = :text
88
+ index.as :stored, :indexed
89
+ end
90
+ property :borrower_uri_i, :predicate => RDF::SCHEMA.borrower do |index|
91
+ index.data_type = :string
92
+ index.as :indexed
93
+ end
94
+ property :clip_number_simr, :predicate => RDF::SCHEMA.clipNumber do |index|
95
+ index.data_type = :integer
96
+ index.as :stored, :indexed, :multiValued, :range
97
+ end
98
+ property :price_s, :predicate => RDF::SCHEMA.price do |index|
99
+ index.data_type = :float
100
+ index.as :stored
101
+ end
102
+ property :bookEdition, :predicate => RDF::SCHEMA.bookEdition # non-indexed property
103
+ end
104
+
105
+
106
+ # initialize solr service with defaults
107
+ ActiveTriples::Solrizer::SolrService.register
108
+ ```
109
+
110
+ ### Example: Indexing Service to create solr document
111
+
112
+ ```ruby
113
+ # create a new resource
114
+ dr = DummyResource.new('http://www.example.org/dr')
115
+ dr.title = 'Test Title'
116
+ dr.description_si = 'Test text description stored and indexed.'
117
+ dr.borrower_uri_i = 'http://example.org/i/b2'
118
+ dr.clip_number_simr = [7,8,9,10]
119
+ dr.price_s = 789.01
120
+ dr.bookEdition = 'Ed. 2'
121
+ dr
122
+
123
+ # get solr doc
124
+ doc = ActiveTriples::Solrizer::IndexingService.new(dr).generate_solr_document
125
+ # => {
126
+ # :id=>"http://www.example.org/dr",
127
+ # :at_model_ssi=>"DummyResource",
128
+ # :object_profile_ss=>expected_object_profile_short_all_values,
129
+ # :title_ti=>"Test Title",
130
+ # :title_ssort=>"Test Title",
131
+ # :description_si_tsi=>"Test text description stored and indexed.",
132
+ # :borrower_uri_i_si=>"http://example.org/i/b2",
133
+ # :clip_number_simr_itsim=>[7,8,9,10],
134
+ # :price_s_fs=>789.01
135
+ # }
136
+
137
+
138
+ # persist doc to solr
139
+ ActiveTriples::Solrizer::SolrService.add(doc)
140
+ ActiveTriples::Solrizer::SolrService.commit
141
+ ```
142
+
143
+ ### Example: Profile Indexing Service to serialize/deserialize resource
144
+
145
+ ```ruby
146
+ # create a new resource with all properties having values
147
+ dr1 = DummyResource.new('http://www.example.org/dr1')
148
+ dr1.title = 'Test Title'
149
+ dr1.description_si = 'Test text description stored and indexed.'
150
+ dr1.borrower_uri_i = 'http://example.org/i/b2'
151
+ dr1.clip_number_simr = [7,8,9,10]
152
+ dr1.price_s = 789.01
153
+ dr1.bookEdition = 'Ed. 2'
154
+ dr1
155
+
156
+ # serialize resource into object profile
157
+ object_profile1 = ActiveTriples::Solrizer::ProfileIndexingService.new(dr1).export
158
+ # => '{"id":"http://www.example.org/dr1",'\
159
+ # '"title":["Test Title"],'\
160
+ # '"description_si":["Test text description stored and indexed."],'\
161
+ # '"borrower_uri_i":["http://example.org/i/b2"],'\
162
+ # '"clip_number_simr":[7,8,9,10],'\
163
+ # '"price_s":[789.01],'\
164
+ # '"bookEdition":["Ed. 2"]}'
165
+
166
+ # deserialize resource from object profile
167
+ dr1_filled = ActiveTriples::Solrizer::ProfileIndexingService.new().import( object_profile1, DummyResource )
168
+ dr1_filled.attributes
169
+ # => {"id"=>"http://www.example.org/dr2",
170
+ # "title"=>["Test Title"],
171
+ # "description_si"=>["Test text description stored and indexed."],
172
+ # "borrower_uri_i"=>["http://example.org/i/b2"],
173
+ # "clip_number_simr"=>[7, 8, 9, 10],
174
+ # "borrower_uri_i"=>[],
175
+ # "clip_number_simr"=>[],
176
+ # "price_s"=>[789.01],
177
+ # "bookEdition"=>["Ed. 2"]}
178
+
179
+ # create a new resource with some properties with unset values
180
+ dr2 = DummyResource.new('http://www.example.org/dr2')
181
+ dr2.title = 'Test Title'
182
+ dr2.description_si = 'Test text description stored and indexed.'
183
+ dr2.price_s = 789.01
184
+ dr2.bookEdition = 'Ed. 2'
185
+ dr2
186
+
187
+ # serialize resource into object profile
188
+ object_profile2 = ActiveTriples::Solrizer::ProfileIndexingService.new(dr2).export
189
+ # => '{"id":"http://www.example.org/dr1",'\
190
+ # '"title":["Test Title"],'\
191
+ # '"description_si":["Test text description stored and indexed."],'\
192
+ # '"borrower_uri_i":[],'\
193
+ # '"clip_number_simr":[],'\
194
+ # '"price_s":[789.01],'\
195
+ # '"bookEdition":["Ed. 2"]}'
196
+
197
+ # deserialize resource from object profile
198
+ dr2_filled = ActiveTriples::Solrizer::ProfileIndexingService.new().import( object_profile2, DummyResource )
199
+ dr2_filled.attributes
200
+ # => {"id"=>"http://www.example.org/dr2",
201
+ # "title"=>["Test Title"],
202
+ # "description_si"=>["Test text description stored and indexed."],
203
+ # "borrower_uri_i"=>[],
204
+ # "clip_number_simr"=>[],
205
+ # "price_s"=>[789.01],
206
+ # "bookEdition"=>["Ed. 2"]}
207
+ ```
208
+
209
+
210
+ ### Example: Properties Indexing Service to generate solr fields based on property definitions
211
+
212
+ ```ruby
213
+ # NOTE re-use dr1 and dr2 from object profile examples
214
+
215
+ # generate property fields
216
+ property_fields1 = ActiveTriples::Solrizer::PropertiesIndexingService.new(dr1).export
217
+ # => {
218
+ # :title_ti=>"Test Title",
219
+ # :title_ssort=>"Test Title",
220
+ # :description_si_tsi=>"Test text description stored and indexed.",
221
+ # :borrower_uri_i_si=>"http://example.org/i/b2",
222
+ # :clip_number_simr_itsim=>[7,8,9,10],
223
+ # :price_s_fs=>789.01
224
+ # }
225
+
226
+ # generate property fields
227
+ property_fields2 = ActiveTriples::Solrizer::PropertiesIndexingService.new(dr2).export
228
+ # => {
229
+ # :title_ti=>"Test Title",
230
+ # :title_ssort=>"Test Title",
231
+ # :description_si_tsi=>"Test text description stored and indexed.",
232
+ # :price_s_fs=>789.01
233
+ # }
234
+ ```
235
+
236
+ ## Development Notes:
237
+
238
+ * I would like to see this expand to support specification of facets.
239
+ * The location and coordinate field types have not been tested and do not have examples.
240
+ * Some of the code in solr_service.rb is untested. It was copied from ActiveFedora as is. Mentions in the code to querying have not been tested. Query code was not copied at the time this document was written.
241
+
242
+
243
+ ## Contributing
244
+
245
+ Please observe the following guidelines:
246
+
247
+ - Do your work in a feature branch based on ```master``` and rebase before submitting a pull request.
248
+ - Write tests for your contributions.
249
+ - Document every method you add using YARD annotations. (_Note: Annotations are sparse in the existing codebase, help us fix that!_)
250
+ - Organize your commits into logical units.
251
+ - Don't leave trailing whitespace (i.e. run ```git diff --check``` before committing).
252
+ - Use [well formed](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html) commit messages.
253
+
@@ -0,0 +1,36 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "active_triples/solrizer/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "active_triples-solrizer"
7
+ s.version = ActiveTriples::Solrizer::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["E. Lynette Rayle"]
10
+ s.homepage = 'https://github.com/ActiveTriples/active_triples-solrizer'
11
+ s.email = 'elr37@cornell.edu'
12
+ s.summary = %q{Provide default solrizer implementation for ActiveTriples.}
13
+ s.description = %q{active_triples-solrizer provides a default solr indexing implementation for ActiveTriples.}
14
+ s.license = "APACHE2"
15
+ s.required_ruby_version = '>= 2.2.0'
16
+
17
+ s.add_dependency 'rsolr'
18
+ s.add_dependency('active-triples')
19
+ s.add_dependency('solrizer')
20
+ s.add_dependency('json')
21
+
22
+ s.add_development_dependency('pry')
23
+ s.add_development_dependency('pry-byebug')
24
+ s.add_development_dependency('rdoc')
25
+ s.add_development_dependency('rspec')
26
+ s.add_development_dependency('coveralls')
27
+ s.add_development_dependency('guard-rspec')
28
+
29
+ s.files = `git ls-files`.split("\n")
30
+ s.test_files = `git ls-files -- {spec}/*`.split("\n")
31
+
32
+ s.extra_rdoc_files = [
33
+ "LICENSE",
34
+ "README.md"
35
+ ]
36
+ end
@@ -0,0 +1,38 @@
1
+ require 'active_triples/solrizer/version'
2
+ require 'active_support'
3
+ require 'solrizer'
4
+
5
+ SOLR_DOCUMENT_ID = Solrizer.default_field_mapper.id_field unless defined?(SOLR_DOCUMENT_ID)
6
+
7
+ module ActiveTriples
8
+ module Solrizer
9
+ extend ActiveSupport::Autoload
10
+ eager_autoload do
11
+ autoload :Configuration
12
+ autoload :IndexingService
13
+ autoload :ProfileIndexingService
14
+ autoload :PropertiesIndexingService
15
+ autoload :SolrService
16
+ end
17
+
18
+
19
+ # Methods for configuring the GEM
20
+ class << self
21
+ attr_accessor :configuration
22
+ end
23
+
24
+ def self.configuration
25
+ @configuration ||= Configuration.new
26
+ end
27
+
28
+ def self.reset
29
+ @configuration = Configuration.new
30
+ end
31
+
32
+ def self.configure
33
+ yield(configuration)
34
+ end
35
+
36
+ end
37
+ end
38
+
@@ -0,0 +1,54 @@
1
+ module ActiveTriples::Solrizer
2
+ class Configuration
3
+
4
+ attr_reader :solr_uri
5
+ attr_reader :read_timeout
6
+ attr_reader :open_timeout
7
+
8
+
9
+ def self.default_solr_uri
10
+ @default_solr_uri = "http://localhost:8983/solr/".freeze
11
+ end
12
+ private_class_method :default_solr_uri
13
+
14
+ def self.default_read_timeout
15
+ @default_read_timeout = 120
16
+ end
17
+ private_class_method :default_read_timeout
18
+
19
+ def self.default_open_timeout
20
+ @default_open_timeout = 120
21
+ end
22
+ private_class_method :default_open_timeout
23
+
24
+ def initialize
25
+ @solr_uri = self.class.send(:default_solr_uri)
26
+ @read_timeout = self.class.send(:default_read_timeout)
27
+ @open_timeout = self.class.send(:default_open_timeout)
28
+ end
29
+
30
+ def solr_uri=(new_solr_uri)
31
+ @solr_uri = new_solr_uri
32
+ end
33
+
34
+ def reset_solr_uri
35
+ @solr_uri = self.class.send(:default_solr_uri)
36
+ end
37
+
38
+ def read_timeout=(new_read_timeout)
39
+ @read_timeout = new_read_timeout
40
+ end
41
+
42
+ def reset_read_timeout
43
+ @read_timeout = self.class.send(:default_read_timeout)
44
+ end
45
+
46
+ def open_timeout=(new_open_timeout)
47
+ @open_timeout = new_open_timeout
48
+ end
49
+
50
+ def reset_open_timeout
51
+ @open_timeout = self.class.send(:default_open_timeout)
52
+ end
53
+ end
54
+ end