active_triples-solrizer 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7557fd15f57f7f52c0adbead308b52ed7d8f0308
4
+ data.tar.gz: d7994692b486842fa27216f29e9322ebef3b47c7
5
+ SHA512:
6
+ metadata.gz: c62b03d3068b7f2b972c9527bf7098ba83e976e812ace4eea043fcffc93601dd3f121796fb6273a91e0673232b12d699dde9043be1cd473e0a5b674a57bda1f7
7
+ data.tar.gz: 2fbde62084e942428cb4ea155e21598511a0fdd919a8a5f833b45311801da4503d0ce8224acc1418c834dffb06e147a0bc66f46ab90d0d2c38948b08b41e45b9
@@ -0,0 +1 @@
1
+ service_name: travis-ci
@@ -0,0 +1,27 @@
1
+ log/*.log
2
+ .sass-cache
3
+ *.gem
4
+ *.rbc
5
+ .bundle
6
+ .config
7
+ .yardoc
8
+ Gemfile.lock
9
+ InstalledFiles
10
+ _yardoc
11
+ coverage
12
+ doc/
13
+ lib/bundler/man
14
+ pkg
15
+ rdoc
16
+ spec/reports
17
+ test/tmp
18
+ test/version_tmp
19
+ tmp
20
+ *.bundle
21
+ *.so
22
+ *.o
23
+ *.a
24
+ mkmf.log
25
+ .idea
26
+ .ruby-gemset
27
+ .ruby-version
@@ -0,0 +1,14 @@
1
+ language: ruby
2
+ bundler_args: --without debug
3
+ script: "bundle exec rspec spec"
4
+ sudo: false
5
+ cache: bundler
6
+ rvm:
7
+ - 2.2.4
8
+ - 2.3.0
9
+ - jruby-9.0.4.0
10
+ - rbx-2
11
+ matrix:
12
+ allow_failures:
13
+ - rvm: jruby-9.0.4.0
14
+ - rvm: rbx-2
@@ -0,0 +1,3 @@
1
+ * E. Lynette Rayle (elr37@cornell.edu)
2
+
3
+ Additional Contributors: Portions of the code were copied and/or modified from [ActiveFedora](https://github.com/projecthydra/active_fedora). See the [contributors list](https://github.com/projecthydra/active_fedora/blob/master/CONTRIBUTORS.md) for that project.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
4
+
@@ -0,0 +1,9 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard :rspec do
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec" }
8
+ end
9
+
data/LICENSE ADDED
@@ -0,0 +1,12 @@
1
+ ##########################################################################
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
@@ -0,0 +1,253 @@
1
+ # ActiveTriples::Solrizer
2
+
3
+ [![Build Status](https://travis-ci.org/ActiveTriples/active_triples-solrizer.png?branch=master)](https://travis-ci.org/ActiveTriples/active_triples-solrizer)
4
+ [![Coverage Status](https://coveralls.io/repos/ActiveTriples/active_triples-solrizer/badge.svg?branch=master&service=github)](https://coveralls.io/github/ActiveTriples/active_triples-solrizer?branch=master)
5
+ [![Gem Version](https://badge.fury.io/rb/active_triples-solrizer.svg)](http://badge.fury.io/rb/active_triples-solrizer)
6
+
7
+ Provides a default solr implementation under the [ActiveTriples](https://github.com/ActiveTriples/ActiveTriples) framework.
8
+
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ gem 'active_triples-solrizer'
15
+
16
+ And then execute:
17
+
18
+ $ bundle install
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install active_triples-solrizer
23
+
24
+
25
+ ## Usage
26
+
27
+ Property definitions for ActiveTriples resources can be extended by adding a block to define indexing data type and modifiers (see table of supported values below).
28
+
29
+ ```
30
+ property :title, :predicate => RDF::SCHEMA.title do |index|
31
+ index.data_type = :text # specify the data type of the field in solr. See (https://github.com/elrayle/active_triples-solrizer/blob/master/solr/schema.xml)[solr/schema.xml] for field type definitions.
32
+ index.as :indexed, :sortable # specify modifiers for the solr field
33
+ end
34
+ ```
35
+
36
+ | data_type | Notes |
37
+ | ----------- | ----- |
38
+ | :text | tokenized text |
39
+ | :text_en | tokenized English text |
40
+ | :string | non-tokenized string |
41
+ | :integer | |
42
+ | :long | |
43
+ | :double | |
44
+ | :float | |
45
+ | :boolean | |
46
+ | :date | format for this date field is of the form 1995-12-31T23:59:59Z; Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z |
47
+ | :coordinate | TBA - used to index the lat and long components for the "location" |
48
+ | :location | TBA - latitude/longitude|
49
+ | :guess | allow guessing of the type based on the type of the property value; NOTE: only checks the type of the first value when multiple values |
50
+
51
+
52
+ | index.as modifiers | works with types | Notes |
53
+ | ------------------ | ---------------- | ----- |
54
+ | :indexed | all types except :coordinate | searchable, but not returned in solr doc unless also has :stored modifier |
55
+ | :stored | all types except :coordinate | returned in solr doc, but not searchable unless also has :indexed modifier |
56
+ | :multiValued | all types except :boolean, :coordinate | NOTE: if not specified and multiple values exist, only the first value is included in the solr doc |
57
+ | :sortable | all types except :boolean, :coordinate, :location | numbers are stored as trie version of numeric type; :string, :text, :text_XX have an extra alphaSort field |
58
+ | :range | all numeric types including :integer, :long, :float, :double, :date | optimize for range queries |
59
+ | :vectored | valid for :text, :text_XX only | |
60
+
61
+ NOTE: Modifiers placed on types that do not support the modifier are ignored.
62
+
63
+
64
+ ## Examples
65
+
66
+ Common prep code for all examples:
67
+ ```ruby
68
+ require 'active_triples'
69
+ require 'active_triples/solrizer'
70
+
71
+ # create an in-memory repository for ad-hoc testing
72
+ ActiveTriples::Repositories.add_repository :default, RDF::Repository.new
73
+
74
+ # configure the solr url
75
+ ActiveTriples::Solrizer.configure do |config|
76
+ config.solr_uri = "http://localhost:8983/solr/#/~cores/active_triples"
77
+ end
78
+
79
+ # create a DummyResource for ad-hoc testing
80
+ class DummyResource < ActiveTriples::Resource
81
+ configure :type => RDF::URI('http://example.org/SomeClass')
82
+ property :title, :predicate => RDF::SCHEMA.title do |index|
83
+ index.data_type = :text
84
+ index.as :indexed, :sortable
85
+ end
86
+ property :description_si, :predicate => RDF::SCHEMA.description do |index|
87
+ index.data_type = :text
88
+ index.as :stored, :indexed
89
+ end
90
+ property :borrower_uri_i, :predicate => RDF::SCHEMA.borrower do |index|
91
+ index.data_type = :string
92
+ index.as :indexed
93
+ end
94
+ property :clip_number_simr, :predicate => RDF::SCHEMA.clipNumber do |index|
95
+ index.data_type = :integer
96
+ index.as :stored, :indexed, :multiValued, :range
97
+ end
98
+ property :price_s, :predicate => RDF::SCHEMA.price do |index|
99
+ index.data_type = :float
100
+ index.as :stored
101
+ end
102
+ property :bookEdition, :predicate => RDF::SCHEMA.bookEdition # non-indexed property
103
+ end
104
+
105
+
106
+ # initialize solr service with defaults
107
+ ActiveTriples::Solrizer::SolrService.register
108
+ ```
109
+
110
+ ### Example: Indexing Service to create solr document
111
+
112
+ ```ruby
113
+ # create a new resource
114
+ dr = DummyResource.new('http://www.example.org/dr')
115
+ dr.title = 'Test Title'
116
+ dr.description_si = 'Test text description stored and indexed.'
117
+ dr.borrower_uri_i = 'http://example.org/i/b2'
118
+ dr.clip_number_simr = [7,8,9,10]
119
+ dr.price_s = 789.01
120
+ dr.bookEdition = 'Ed. 2'
121
+ dr
122
+
123
+ # get solr doc
124
+ doc = ActiveTriples::Solrizer::IndexingService.new(dr).generate_solr_document
125
+ # => {
126
+ # :id=>"http://www.example.org/dr",
127
+ # :at_model_ssi=>"DummyResource",
128
+ # :object_profile_ss=>expected_object_profile_short_all_values,
129
+ # :title_ti=>"Test Title",
130
+ # :title_ssort=>"Test Title",
131
+ # :description_si_tsi=>"Test text description stored and indexed.",
132
+ # :borrower_uri_i_si=>"http://example.org/i/b2",
133
+ # :clip_number_simr_itsim=>[7,8,9,10],
134
+ # :price_s_fs=>789.01
135
+ # }
136
+
137
+
138
+ # persist doc to solr
139
+ ActiveTriples::Solrizer::SolrService.add(doc)
140
+ ActiveTriples::Solrizer::SolrService.commit
141
+ ```
142
+
143
+ ### Example: Profile Indexing Service to serialize/deserialize resource
144
+
145
+ ```ruby
146
+ # create a new resource with all properties having values
147
+ dr1 = DummyResource.new('http://www.example.org/dr1')
148
+ dr1.title = 'Test Title'
149
+ dr1.description_si = 'Test text description stored and indexed.'
150
+ dr1.borrower_uri_i = 'http://example.org/i/b2'
151
+ dr1.clip_number_simr = [7,8,9,10]
152
+ dr1.price_s = 789.01
153
+ dr1.bookEdition = 'Ed. 2'
154
+ dr1
155
+
156
+ # serialize resource into object profile
157
+ object_profile1 = ActiveTriples::Solrizer::ProfileIndexingService.new(dr1).export
158
+ # => '{"id":"http://www.example.org/dr1",'\
159
+ # '"title":["Test Title"],'\
160
+ # '"description_si":["Test text description stored and indexed."],'\
161
+ # '"borrower_uri_i":["http://example.org/i/b2"],'\
162
+ # '"clip_number_simr":[7,8,9,10],'\
163
+ # '"price_s":[789.01],'\
164
+ # '"bookEdition":["Ed. 2"]}'
165
+
166
+ # deserialize resource from object profile
167
+ dr1_filled = ActiveTriples::Solrizer::ProfileIndexingService.new().import( object_profile1, DummyResource )
168
+ dr1_filled.attributes
169
+ # => {"id"=>"http://www.example.org/dr2",
170
+ # "title"=>["Test Title"],
171
+ # "description_si"=>["Test text description stored and indexed."],
172
+ # "borrower_uri_i"=>["http://example.org/i/b2"],
173
+ # "clip_number_simr"=>[7, 8, 9, 10],
174
+ # "borrower_uri_i"=>[],
175
+ # "clip_number_simr"=>[],
176
+ # "price_s"=>[789.01],
177
+ # "bookEdition"=>["Ed. 2"]}
178
+
179
+ # create a new resource with some properties with unset values
180
+ dr2 = DummyResource.new('http://www.example.org/dr2')
181
+ dr2.title = 'Test Title'
182
+ dr2.description_si = 'Test text description stored and indexed.'
183
+ dr2.price_s = 789.01
184
+ dr2.bookEdition = 'Ed. 2'
185
+ dr2
186
+
187
+ # serialize resource into object profile
188
+ object_profile2 = ActiveTriples::Solrizer::ProfileIndexingService.new(dr2).export
189
+ # => '{"id":"http://www.example.org/dr1",'\
190
+ # '"title":["Test Title"],'\
191
+ # '"description_si":["Test text description stored and indexed."],'\
192
+ # '"borrower_uri_i":[],'\
193
+ # '"clip_number_simr":[],'\
194
+ # '"price_s":[789.01],'\
195
+ # '"bookEdition":["Ed. 2"]}'
196
+
197
+ # deserialize resource from object profile
198
+ dr2_filled = ActiveTriples::Solrizer::ProfileIndexingService.new().import( object_profile2, DummyResource )
199
+ dr2_filled.attributes
200
+ # => {"id"=>"http://www.example.org/dr2",
201
+ # "title"=>["Test Title"],
202
+ # "description_si"=>["Test text description stored and indexed."],
203
+ # "borrower_uri_i"=>[],
204
+ # "clip_number_simr"=>[],
205
+ # "price_s"=>[789.01],
206
+ # "bookEdition"=>["Ed. 2"]}
207
+ ```
208
+
209
+
210
+ ### Example: Properties Indexing Service to generate solr fields based on property definitions
211
+
212
+ ```ruby
213
+ # NOTE re-use dr1 and dr2 from object profile examples
214
+
215
+ # generate property fields
216
+ property_fields1 = ActiveTriples::Solrizer::PropertiesIndexingService.new(dr1).export
217
+ # => {
218
+ # :title_ti=>"Test Title",
219
+ # :title_ssort=>"Test Title",
220
+ # :description_si_tsi=>"Test text description stored and indexed.",
221
+ # :borrower_uri_i_si=>"http://example.org/i/b2",
222
+ # :clip_number_simr_itsim=>[7,8,9,10],
223
+ # :price_s_fs=>789.01
224
+ # }
225
+
226
+ # generate property fields
227
+ property_fields2 = ActiveTriples::Solrizer::PropertiesIndexingService.new(dr2).export
228
+ # => {
229
+ # :title_ti=>"Test Title",
230
+ # :title_ssort=>"Test Title",
231
+ # :description_si_tsi=>"Test text description stored and indexed.",
232
+ # :price_s_fs=>789.01
233
+ # }
234
+ ```
235
+
236
+ ## Development Notes:
237
+
238
+ * I would like to see this expand to support specification of facets.
239
+ * The location and coordinate field types have not been tested and do not have examples.
240
+ * Some of the code in solr_service.rb is untested. It was copied from ActiveFedora as is. Mentions in the code to querying have not been tested. Query code was not copied at the time this document was written.
241
+
242
+
243
+ ## Contributing
244
+
245
+ Please observe the following guidelines:
246
+
247
+ - Do your work in a feature branch based on ```master``` and rebase before submitting a pull request.
248
+ - Write tests for your contributions.
249
+ - Document every method you add using YARD annotations. (_Note: Annotations are sparse in the existing codebase, help us fix that!_)
250
+ - Organize your commits into logical units.
251
+ - Don't leave trailing whitespace (i.e. run ```git diff --check``` before committing).
252
+ - Use [well formed](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html) commit messages.
253
+
@@ -0,0 +1,36 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "active_triples/solrizer/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "active_triples-solrizer"
7
+ s.version = ActiveTriples::Solrizer::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["E. Lynette Rayle"]
10
+ s.homepage = 'https://github.com/ActiveTriples/active_triples-solrizer'
11
+ s.email = 'elr37@cornell.edu'
12
+ s.summary = %q{Provide default solrizer implementation for ActiveTriples.}
13
+ s.description = %q{active_triples-solrizer provides a default solr indexing implementation for ActiveTriples.}
14
+ s.license = "APACHE2"
15
+ s.required_ruby_version = '>= 2.2.0'
16
+
17
+ s.add_dependency 'rsolr'
18
+ s.add_dependency('active-triples')
19
+ s.add_dependency('solrizer')
20
+ s.add_dependency('json')
21
+
22
+ s.add_development_dependency('pry')
23
+ s.add_development_dependency('pry-byebug')
24
+ s.add_development_dependency('rdoc')
25
+ s.add_development_dependency('rspec')
26
+ s.add_development_dependency('coveralls')
27
+ s.add_development_dependency('guard-rspec')
28
+
29
+ s.files = `git ls-files`.split("\n")
30
+ s.test_files = `git ls-files -- {spec}/*`.split("\n")
31
+
32
+ s.extra_rdoc_files = [
33
+ "LICENSE",
34
+ "README.md"
35
+ ]
36
+ end
@@ -0,0 +1,38 @@
1
+ require 'active_triples/solrizer/version'
2
+ require 'active_support'
3
+ require 'solrizer'
4
+
5
+ SOLR_DOCUMENT_ID = Solrizer.default_field_mapper.id_field unless defined?(SOLR_DOCUMENT_ID)
6
+
7
+ module ActiveTriples
8
+ module Solrizer
9
+ extend ActiveSupport::Autoload
10
+ eager_autoload do
11
+ autoload :Configuration
12
+ autoload :IndexingService
13
+ autoload :ProfileIndexingService
14
+ autoload :PropertiesIndexingService
15
+ autoload :SolrService
16
+ end
17
+
18
+
19
+ # Methods for configuring the GEM
20
+ class << self
21
+ attr_accessor :configuration
22
+ end
23
+
24
+ def self.configuration
25
+ @configuration ||= Configuration.new
26
+ end
27
+
28
+ def self.reset
29
+ @configuration = Configuration.new
30
+ end
31
+
32
+ def self.configure
33
+ yield(configuration)
34
+ end
35
+
36
+ end
37
+ end
38
+
@@ -0,0 +1,54 @@
1
+ module ActiveTriples::Solrizer
2
+ class Configuration
3
+
4
+ attr_reader :solr_uri
5
+ attr_reader :read_timeout
6
+ attr_reader :open_timeout
7
+
8
+
9
+ def self.default_solr_uri
10
+ @default_solr_uri = "http://localhost:8983/solr/".freeze
11
+ end
12
+ private_class_method :default_solr_uri
13
+
14
+ def self.default_read_timeout
15
+ @default_read_timeout = 120
16
+ end
17
+ private_class_method :default_read_timeout
18
+
19
+ def self.default_open_timeout
20
+ @default_open_timeout = 120
21
+ end
22
+ private_class_method :default_open_timeout
23
+
24
+ def initialize
25
+ @solr_uri = self.class.send(:default_solr_uri)
26
+ @read_timeout = self.class.send(:default_read_timeout)
27
+ @open_timeout = self.class.send(:default_open_timeout)
28
+ end
29
+
30
+ def solr_uri=(new_solr_uri)
31
+ @solr_uri = new_solr_uri
32
+ end
33
+
34
+ def reset_solr_uri
35
+ @solr_uri = self.class.send(:default_solr_uri)
36
+ end
37
+
38
+ def read_timeout=(new_read_timeout)
39
+ @read_timeout = new_read_timeout
40
+ end
41
+
42
+ def reset_read_timeout
43
+ @read_timeout = self.class.send(:default_read_timeout)
44
+ end
45
+
46
+ def open_timeout=(new_open_timeout)
47
+ @open_timeout = new_open_timeout
48
+ end
49
+
50
+ def reset_open_timeout
51
+ @open_timeout = self.class.send(:default_open_timeout)
52
+ end
53
+ end
54
+ end