solrizer 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +44 -0
- data/History.txt +8 -0
- data/Rakefile +10 -3
- data/VERSION +1 -1
- data/config/solr_mappings.yml +16 -13
- data/config/solr_mappings_af_0.1.yml +18 -0
- data/lib/solrizer/extractor.rb +31 -72
- data/lib/solrizer/field_mapper.rb +351 -0
- data/lib/solrizer/field_name_mapper.rb +37 -51
- data/lib/solrizer/html/extractor.rb +36 -0
- data/lib/solrizer/html.rb +7 -0
- data/lib/solrizer/xml/extractor.rb +31 -0
- data/lib/solrizer/xml/terminology_based_solrizer.rb +25 -29
- data/lib/solrizer/xml.rb +4 -1
- data/lib/solrizer.rb +2 -113
- data/lib/tasks/solrizer.rake +7 -27
- data/solrizer.gemspec +46 -26
- data/spec/{spec.opts → .rspec} +0 -0
- data/spec/fixtures/test_solr_mappings.yml +16 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/units/extractor_spec.rb +43 -34
- data/spec/units/field_mapper_spec.rb +227 -0
- data/spec/units/field_name_mapper_spec.rb +16 -29
- data/spec/units/xml_extractor_spec.rb +28 -0
- data/spec/units/xml_terminology_based_solrizer_spec.rb +18 -5
- metadata +128 -35
- data/lib/solrizer/configuration.rb +0 -8
- data/lib/solrizer/indexer.rb +0 -261
- data/lib/solrizer/main.rb +0 -17
- data/lib/solrizer/replicator.rb +0 -143
- data/lib/solrizer/repository.rb +0 -54
- data/spec/fixtures/rels_ext_cmodel.xml +0 -8
- data/spec/fixtures/solr_mappings_af_0.1.yml +0 -16
- data/spec/integration/indexer_spec.rb +0 -18
- data/spec/units/indexer_spec.rb +0 -127
- data/spec/units/shelver_spec.rb +0 -42
data/.gitignore
CHANGED
data/Gemfile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
gem "solr-ruby"
|
4
|
+
gem "nokogiri"
|
5
|
+
gem "om", ">= 1.0.0" # only required by xml/terminology_based_solrizer ...
|
6
|
+
gem "mediashelf-loggable"
|
7
|
+
|
8
|
+
group :development, :test do
|
9
|
+
gem "jeweler"
|
10
|
+
gem 'ruby-debug'
|
11
|
+
gem 'ruby-debug-base'
|
12
|
+
gem 'rspec', '<2.0.0'
|
13
|
+
gem 'mocha'
|
14
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
columnize (0.3.1)
|
5
|
+
facets (2.9.0)
|
6
|
+
gemcutter (0.6.1)
|
7
|
+
git (1.2.5)
|
8
|
+
jeweler (1.4.0)
|
9
|
+
gemcutter (>= 0.1.0)
|
10
|
+
git (>= 1.2.5)
|
11
|
+
rubyforge (>= 2.0.0)
|
12
|
+
json_pure (1.4.6)
|
13
|
+
linecache (0.43)
|
14
|
+
mediashelf-loggable (0.4.0)
|
15
|
+
mocha (0.9.9)
|
16
|
+
rake
|
17
|
+
nokogiri (1.4.3.1)
|
18
|
+
om (1.0.0)
|
19
|
+
facets
|
20
|
+
nokogiri (>= 1.4.2)
|
21
|
+
rake (0.8.7)
|
22
|
+
rspec (1.3.1)
|
23
|
+
ruby-debug (0.10.3)
|
24
|
+
columnize (>= 0.1)
|
25
|
+
ruby-debug-base (~> 0.10.3.0)
|
26
|
+
ruby-debug-base (0.10.3)
|
27
|
+
linecache (>= 0.3)
|
28
|
+
rubyforge (2.0.4)
|
29
|
+
json_pure (>= 1.1.7)
|
30
|
+
solr-ruby (0.0.8)
|
31
|
+
|
32
|
+
PLATFORMS
|
33
|
+
ruby
|
34
|
+
|
35
|
+
DEPENDENCIES
|
36
|
+
jeweler
|
37
|
+
mediashelf-loggable
|
38
|
+
mocha
|
39
|
+
nokogiri
|
40
|
+
om (>= 1.0.0)
|
41
|
+
rspec (< 2.0.0)
|
42
|
+
ruby-debug
|
43
|
+
ruby-debug-base
|
44
|
+
solr-ruby
|
data/History.txt
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
h2. 0.3.0
|
2
|
+
|
3
|
+
HYDRA-286 Re-structure Solrizer to separate solrizer base from fedora-solrizer
|
4
|
+
|
5
|
+
Added TerminologyBasedSolrizer
|
6
|
+
Added Extremely Configurable FieldMapper
|
7
|
+
Updated FieldNameMapper to use new FieldMapper
|
8
|
+
|
1
9
|
h2. 0.1.2
|
2
10
|
|
3
11
|
Minor: switched active-fedora gem requirement to >= 1.1.5 instead of = 1.1.5 (was breaking apps that use later versions of active-fedora)
|
data/Rakefile
CHANGED
@@ -10,9 +10,16 @@ begin
|
|
10
10
|
gem.email = "matt.zumwalt@yourmediashelf.com"
|
11
11
|
gem.homepage = "http://github.com/projecthydra/solrizer"
|
12
12
|
gem.authors = ["Matt Zumwalt"]
|
13
|
-
gem.add_dependency "
|
14
|
-
gem.add_dependency "
|
15
|
-
gem.
|
13
|
+
gem.add_dependency "solr-ruby"
|
14
|
+
gem.add_dependency "nokogiri"
|
15
|
+
gem.add_dependency "om"
|
16
|
+
gem.add_dependency "nokogiri"
|
17
|
+
gem.add_dependency "mediashelf-loggable"
|
18
|
+
gem.add_development_dependency "jeweler"
|
19
|
+
gem.add_development_dependency 'ruby-debug'
|
20
|
+
gem.add_development_dependency 'ruby-debug-base'
|
21
|
+
gem.add_development_dependency 'rspec', '<2.0.0'
|
22
|
+
gem.add_development_dependency 'mocha'
|
16
23
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
17
24
|
end
|
18
25
|
Jeweler::GemcutterTasks.new
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/config/solr_mappings.yml
CHANGED
@@ -1,14 +1,17 @@
|
|
1
1
|
id: id
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
2
|
+
default: searchable
|
3
|
+
searchable:
|
4
|
+
default: _t
|
5
|
+
date: _dt
|
6
|
+
string: _t
|
7
|
+
text: _t
|
8
|
+
symbol: _s
|
9
|
+
integer: _i
|
10
|
+
long: _l
|
11
|
+
boolean: _b
|
12
|
+
float: _f
|
13
|
+
double: _d
|
14
|
+
displayable: _display
|
15
|
+
facetable: _facet
|
16
|
+
sortable: _sort
|
17
|
+
unstemmed_searchable: _unstem_search
|
@@ -0,0 +1,18 @@
|
|
1
|
+
id: id
|
2
|
+
default: searchable
|
3
|
+
searchable:
|
4
|
+
date: _date
|
5
|
+
string: _field
|
6
|
+
text: _field
|
7
|
+
symbol: _field
|
8
|
+
integer: _field
|
9
|
+
long: _field
|
10
|
+
boolean: _field
|
11
|
+
float: _field
|
12
|
+
double: _field
|
13
|
+
displayable: _display
|
14
|
+
facetable: _facet
|
15
|
+
sortable: _sort
|
16
|
+
unstemmed_searchable: _unstem_search
|
17
|
+
|
18
|
+
|
data/lib/solrizer/extractor.rb
CHANGED
@@ -4,85 +4,44 @@ require "nokogiri"
|
|
4
4
|
require 'yaml'
|
5
5
|
|
6
6
|
module Solrizer
|
7
|
-
class Extractor
|
8
|
-
|
9
|
-
|
10
|
-
def extract_tags(text)
|
11
|
-
doc = REXML::Document.new( text )
|
12
|
-
extract_tag(doc, 'archivist_tags').merge(extract_tag(doc, 'donor_tags'))
|
13
|
-
end
|
14
7
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
8
|
+
# Provides utilities for extracting solr fields from a variety of objects and/or creating solr documents from a given object
|
9
|
+
# Note: These utilities are optional. You can implement .to_solr directly on your classes if you want to bypass using Extractors.
|
10
|
+
#
|
11
|
+
# Each of the Solrizer implementations provides its own Extractor module that extends the behaviors of Solrizer::Extractor
|
12
|
+
# with methods specific to that implementation (ie. extract_tag, extract_rels_ext, xml_to_solr, html_to_solr)
|
13
|
+
#
|
14
|
+
class Extractor
|
20
15
|
|
21
|
-
|
22
|
-
#
|
23
|
-
#
|
16
|
+
# Populates a solr doc with values from a hash.
|
17
|
+
# Accepts two forms of hashes:
|
18
|
+
# => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}
|
19
|
+
# or
|
20
|
+
# => {:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]} }
|
24
21
|
#
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
doc = Nokogiri::XML(text)
|
36
|
-
doc.xpath( '//foo:hasModel', 'foo' => 'info:fedora/fedora-system:def/model#' ).each do |element|
|
37
|
-
cmodel = element.attributes['resource'].to_s
|
38
|
-
solr_doc << Solr::Field.new( :cmodel_t => cmodel )
|
39
|
-
|
40
|
-
if map.has_key?(cmodel)
|
41
|
-
solr_doc << Solr::Field.new( :hydra_type_t => map[cmodel] )
|
22
|
+
# Note that values for individual fields can be a single string or an array of strings.
|
23
|
+
def extract_hash( input_hash, solr_doc=Solr::Document.new )
|
24
|
+
facets = input_hash.has_key?(:facets) ? input_hash[:facets] : input_hash
|
25
|
+
facets.each_pair do |facet_name, value|
|
26
|
+
case value.class.to_s
|
27
|
+
when "String"
|
28
|
+
solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{value}" )
|
29
|
+
when "Array"
|
30
|
+
value.each { |v| solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{v}" ) }
|
42
31
|
end
|
43
32
|
end
|
44
|
-
|
45
|
-
return solr_doc
|
46
|
-
end
|
47
|
-
|
48
|
-
#
|
49
|
-
# This method extracts solr fields from simple xml
|
50
|
-
#
|
51
|
-
def xml_to_solr( text, solr_doc=Solr::Document.new )
|
52
|
-
doc = REXML::Document.new( text )
|
53
|
-
doc.root.elements.each do |element|
|
54
|
-
solr_doc << Solr::Field.new( :"#{element.name}_t" => "#{element.text}" )
|
55
|
-
end
|
56
|
-
|
57
|
-
return solr_doc
|
58
|
-
end
|
59
|
-
|
60
|
-
#
|
61
|
-
# This method strips html tags out and returns content to be indexed in solr
|
62
|
-
#
|
63
|
-
def html_content_to_solr( ds, solr_doc=Solr::Document.new )
|
64
|
-
|
65
|
-
text = CGI.unescapeHTML(ds.content)
|
66
|
-
doc = Nokogiri::HTML(text)
|
67
33
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
34
|
+
if input_hash.has_key?(:symbols)
|
35
|
+
input_hash[:symbols].each do |symbol_name, value|
|
36
|
+
case value.class.to_s
|
37
|
+
when "String"
|
38
|
+
solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{value}" )
|
39
|
+
when "Array"
|
40
|
+
value.each { |v| solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{v}" ) }
|
41
|
+
end
|
42
|
+
end
|
73
43
|
end
|
74
|
-
|
75
|
-
#strip out text and put in story_t
|
76
|
-
text_nodes = doc.xpath("//text()")
|
77
|
-
text = String.new
|
78
|
-
|
79
|
-
text_nodes.each do |text_node|
|
80
|
-
text << text_node.content
|
81
|
-
end
|
82
|
-
|
83
|
-
solr_doc << Solr::Field.new(:story_t => text)
|
84
|
-
|
85
|
-
return solr_doc
|
44
|
+
return solr_doc
|
86
45
|
end
|
87
46
|
|
88
47
|
end
|
@@ -0,0 +1,351 @@
|
|
1
|
+
require "loggable"
|
2
|
+
module Solrizer
|
3
|
+
|
4
|
+
# Maps Term names and values to Solr fields, based on the Term's data type and any index_as options.
|
5
|
+
#
|
6
|
+
# The basic structure of a mapper is:
|
7
|
+
#
|
8
|
+
# == Mapping on Index Type
|
9
|
+
#
|
10
|
+
# To define a custom mapper:
|
11
|
+
#
|
12
|
+
# class CustomMapper < Solrizer::FieldMapper
|
13
|
+
# index_as :searchable, :suffix => '_search'
|
14
|
+
# index_as :edible, :suffix => '_food'
|
15
|
+
# end
|
16
|
+
#
|
17
|
+
# # t.dish_name :index_as => [:searchable] -maps to-> dish_name_search
|
18
|
+
# # t.ingredients :index_as => [:searchable, :edible] -maps to-> ingredients_search, ingredients_food
|
19
|
+
#
|
20
|
+
# (See Solrizer::XML::TerminologyBasedSolrizer for instructions on applying a custom mapping once you have defined it.)
|
21
|
+
#
|
22
|
+
# == Default Index Types
|
23
|
+
#
|
24
|
+
# You can mark a particular index type as a default. It will then always be included unless terms explicity
|
25
|
+
# exclude it with the "not_" prefix:
|
26
|
+
#
|
27
|
+
# class CustomMapper < Solrizer::FieldMapper
|
28
|
+
# index_as :searchable, :suffix => '_search', :default => true
|
29
|
+
# index_as :edible, :suffix => '_food'
|
30
|
+
# end
|
31
|
+
#
|
32
|
+
# # t.dish_name -maps to-> dish_name_search
|
33
|
+
# # t.ingredients :index_as => [:edible] -maps to-> ingredients_search, ingredients_food
|
34
|
+
# # t.secret_ingredients :index_as => [:not_searchable, :edible] -maps to-> secret_ingredients_food
|
35
|
+
#
|
36
|
+
# == Mapping on Data Type
|
37
|
+
#
|
38
|
+
# A mapper can apply different suffixes based on a term's data type:
|
39
|
+
#
|
40
|
+
# class CustomMapper < Solrizer::FieldMapper
|
41
|
+
# index_as :searchable, :suffix => '_search' do |type|
|
42
|
+
# type.date :suffix => '_date'
|
43
|
+
# type.integer :suffix => '_numeric'
|
44
|
+
# type.float :suffix => '_numeric'
|
45
|
+
# end
|
46
|
+
# index_as :edible, :suffix => '_food'
|
47
|
+
# end
|
48
|
+
#
|
49
|
+
# # t.published :type => :date, :index_as => [:searchable] -maps to-> published_date
|
50
|
+
# # t.votes :type => :integer, :index_as => [:searchable] -maps to-> votes_numeric
|
51
|
+
#
|
52
|
+
# If a specific data type doesn't appear in the list, the mapper falls back to the index_as:
|
53
|
+
#
|
54
|
+
# # t.description :type => :text, :index_as => [:searchable] -maps to-> description_search
|
55
|
+
#
|
56
|
+
# == Custom Value Converters
|
57
|
+
#
|
58
|
+
# All of the above applies to the generation of Solr names. Mappers can also provide custom conversion logic for the
|
59
|
+
# generation of Solr values by attaching a custom value converter block to a data type:
|
60
|
+
#
|
61
|
+
# require 'time'
|
62
|
+
#
|
63
|
+
# class CustomMapper < Solrizer::FieldMapper
|
64
|
+
# index_as :searchable, :suffix => '_search' do |type|
|
65
|
+
# type.date do |value|
|
66
|
+
# Time.parse(value).utc.to_i
|
67
|
+
# end
|
68
|
+
# end
|
69
|
+
# end
|
70
|
+
#
|
71
|
+
# Note that the nesting order is always:
|
72
|
+
#
|
73
|
+
# FieldMapper definition
|
74
|
+
# index_as
|
75
|
+
# data type
|
76
|
+
# value converter
|
77
|
+
#
|
78
|
+
# You can use the special data type "default" to apply custom value conversion to any data type:
|
79
|
+
#
|
80
|
+
# require 'time'
|
81
|
+
#
|
82
|
+
# class CustomMapper < Solrizer::FieldMapper
|
83
|
+
# index_as :searchable do |type|
|
84
|
+
# type.date :suffix => '_date' do |value|
|
85
|
+
# Time.parse(value).utc.to_i
|
86
|
+
# end
|
87
|
+
# type.default :suffix => '_search' do |value|
|
88
|
+
# value.to_s.strip
|
89
|
+
# end
|
90
|
+
# end
|
91
|
+
# end
|
92
|
+
#
|
93
|
+
# This example converts searchable dates to milliseconds, and strips extra whitespace from all other searchable data types.
|
94
|
+
#
|
95
|
+
# Note that the :suffix option may appear on the data types and the index_as. The search order for the suffix on a field
|
96
|
+
# of type foo is:
|
97
|
+
# 1. type.foo
|
98
|
+
# 2. type.default
|
99
|
+
# 3. index_as
|
100
|
+
# The suffix is optional in all three places.
|
101
|
+
#
|
102
|
+
# Note that a single Term with multiple index types can translate into multiple Solr fields, because we may want Solr to
|
103
|
+
# index a single field in multiple ways. However, if two different mappings generate both the same solr field name
|
104
|
+
# _and_ the same value, the mapper will only emit a single field.
|
105
|
+
#
|
106
|
+
# == ID Field
|
107
|
+
#
|
108
|
+
# In addition to the normal field mappings, Solrizer gives special treatment to an ID field. If you want that
|
109
|
+
# logic (and you probably do), specify a name for this field:
|
110
|
+
#
|
111
|
+
# class CustomMapper < Solrizer::FieldMapper
|
112
|
+
# id_field 'id'
|
113
|
+
# end
|
114
|
+
#
|
115
|
+
# == Extending the Default
|
116
|
+
#
|
117
|
+
# The default mapper is Solrizer::FieldMapper::Default. You can customize the default mapping by subclassing it.
|
118
|
+
# For example, to override the ID field name and the default suffix for sortable, and inherit everything else:
|
119
|
+
#
|
120
|
+
# class CustomMapperBasedOnDefault < Solrizer::FieldMapper::Default
|
121
|
+
# id_field 'guid'
|
122
|
+
# index_as :sortable, :suffix => '_xsort'
|
123
|
+
# end
|
124
|
+
|
125
|
+
class FieldMapper
|
126
|
+
|
127
|
+
include Loggable
|
128
|
+
|
129
|
+
# ------ Class methods ------
|
130
|
+
|
131
|
+
@@instance_init_actions = Hash.new { |h,k| h[k] = [] }
|
132
|
+
|
133
|
+
def self.id_field(field_name)
|
134
|
+
add_instance_init_action do
|
135
|
+
@id_field = field_name
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def self.index_as(index_type, opts = {}, &block)
|
140
|
+
add_instance_init_action do
|
141
|
+
mapping = (@mappings[index_type] ||= IndexTypeMapping.new)
|
142
|
+
mapping.opts.merge! opts
|
143
|
+
yield DataTypeMappingBuilder.new(mapping) if block_given?
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# Loads solr mappings from yml file.
|
148
|
+
# Assumes that string values are solr field name suffixes.
|
149
|
+
# This is meant as a simple entry point for working with solr mappings. For more powerful control over solr mappings, create your own subclasses of FieldMapper instead of using a yml file.
|
150
|
+
# @param [String] config_path This is the path to the directory where your mappings file is stored. Defaults to "RAILS_ROOT/config/solr_mappings.yml"
|
151
|
+
def self.load_mappings( config_path=nil )
|
152
|
+
|
153
|
+
if config_path.nil?
|
154
|
+
if defined?(RAILS_ROOT)
|
155
|
+
config_path = File.join(RAILS_ROOT, "config", "solr_mappings.yml")
|
156
|
+
end
|
157
|
+
# Default to using the config file within the gem
|
158
|
+
if !File.exist?(config_path.to_s)
|
159
|
+
config_path = File.join(File.dirname(__FILE__), "..", "..", "config", "solr_mappings.yml")
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
logger.info("SOLRIZER: loading field name mappings from #{File.expand_path(config_path)}")
|
164
|
+
mappings_from_file = YAML::load(File.open(config_path))
|
165
|
+
|
166
|
+
self.clear_mappings
|
167
|
+
|
168
|
+
# Set id_field from file if it is available
|
169
|
+
id_field_from_file = mappings_from_file.delete("id")
|
170
|
+
if id_field_from_file.nil?
|
171
|
+
id_field "id"
|
172
|
+
else
|
173
|
+
id_field id_field_from_file
|
174
|
+
end
|
175
|
+
|
176
|
+
default_index_type = mappings_from_file.delete("default")
|
177
|
+
mappings_from_file.each_pair do |index_type, type_settings|
|
178
|
+
if type_settings.kind_of?(Hash)
|
179
|
+
index_as index_type.to_sym, :default => index_type == default_index_type do |t|
|
180
|
+
type_settings.each_pair do |field_type, suffix|
|
181
|
+
eval("t.#{field_type} :suffix=>\"#{suffix}\"")
|
182
|
+
end
|
183
|
+
end
|
184
|
+
else
|
185
|
+
index_as index_type.to_sym, :default => index_type == default_index_type, :suffix=>type_settings
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
private
|
191
|
+
|
192
|
+
def self.add_instance_init_action(&block)
|
193
|
+
@@instance_init_actions[self] << lambda do |mapper|
|
194
|
+
mapper.instance_eval &block
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def self.apply_instance_init_actions(instance)
|
199
|
+
if self.superclass.respond_to? :apply_instance_init_actions
|
200
|
+
self.superclass.apply_instance_init_actions(instance)
|
201
|
+
end
|
202
|
+
@@instance_init_actions[self].each do |action|
|
203
|
+
action.call(instance)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
# Reset all of the mappings
|
208
|
+
def self.clear_mappings
|
209
|
+
logger.debug "resetting mappings for #{self.to_s}"
|
210
|
+
@@instance_init_actions[self] = []
|
211
|
+
end
|
212
|
+
|
213
|
+
public
|
214
|
+
|
215
|
+
# ------ Instance methods ------
|
216
|
+
|
217
|
+
attr_reader :id_field, :default_index_types, :mappings
|
218
|
+
|
219
|
+
def initialize
|
220
|
+
@mappings = {}
|
221
|
+
self.class.apply_instance_init_actions(self)
|
222
|
+
@default_index_types = @mappings.select { |ix_type, mapping| mapping.opts[:default] }.map(&:first)
|
223
|
+
end
|
224
|
+
|
225
|
+
# Given a specific field name, data type, and index type, returns the corresponding solr name.
|
226
|
+
|
227
|
+
def solr_name(field_name, field_type, index_type = :searchable)
|
228
|
+
name, mapping, data_type_mapping = solr_name_and_mappings(field_name, field_type, index_type)
|
229
|
+
name
|
230
|
+
end
|
231
|
+
|
232
|
+
# Given a field name-value pair, a data type, and an array of index types, returns a hash of
|
233
|
+
# mapped names and values. The values in the hash are _arrays_, and may contain multiple values.
|
234
|
+
|
235
|
+
def solr_names_and_values(field_name, field_value, field_type, index_types)
|
236
|
+
# Determine the set of index types, adding defaults and removing not_xyz
|
237
|
+
|
238
|
+
index_types ||= []
|
239
|
+
index_types += default_index_types
|
240
|
+
index_types.uniq!
|
241
|
+
index_types.dup.each do |index_type|
|
242
|
+
if index_type.to_s =~ /^not_(.*)/
|
243
|
+
index_types.delete index_type # not_foo
|
244
|
+
index_types.delete $1.to_sym # foo
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
# Map names and values
|
249
|
+
|
250
|
+
results = {}
|
251
|
+
|
252
|
+
index_types.each do |index_type|
|
253
|
+
# Get mapping for field
|
254
|
+
name, mapping, data_type_mapping = solr_name_and_mappings(field_name, field_type, index_type)
|
255
|
+
next unless name
|
256
|
+
|
257
|
+
# Is there a custom converter?
|
258
|
+
value = if data_type_mapping && data_type_mapping.converter
|
259
|
+
converter = data_type_mapping.converter
|
260
|
+
if converter.arity == 1
|
261
|
+
converter.call(field_value)
|
262
|
+
else
|
263
|
+
converter.call(field_value, field_name)
|
264
|
+
end
|
265
|
+
else
|
266
|
+
field_value
|
267
|
+
end
|
268
|
+
|
269
|
+
# Add mapped name & value, unless it's a duplicate
|
270
|
+
values = (results[name] ||= [])
|
271
|
+
values << value unless values.contains?(value)
|
272
|
+
end
|
273
|
+
|
274
|
+
results
|
275
|
+
end
|
276
|
+
|
277
|
+
private
|
278
|
+
|
279
|
+
def solr_name_and_mappings(field_name, field_type, index_type)
|
280
|
+
field_name = field_name.to_s
|
281
|
+
mapping = @mappings[index_type]
|
282
|
+
unless mapping
|
283
|
+
logger.debug "Unknown index type '#{index_type}' for field #{field_name}"
|
284
|
+
return nil
|
285
|
+
end
|
286
|
+
|
287
|
+
data_type_mapping = mapping.data_types[field_type] || mapping.data_types[:default]
|
288
|
+
|
289
|
+
suffix = data_type_mapping.opts[:suffix] if data_type_mapping
|
290
|
+
suffix ||= mapping.opts[:suffix]
|
291
|
+
name = field_name + suffix
|
292
|
+
|
293
|
+
[name, mapping, data_type_mapping]
|
294
|
+
end
|
295
|
+
|
296
|
+
class IndexTypeMapping
|
297
|
+
attr_accessor :opts, :data_types
|
298
|
+
|
299
|
+
def initialize
|
300
|
+
@opts = {}
|
301
|
+
@data_types = {}
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
class DataTypeMapping
|
306
|
+
attr_accessor :opts, :converter
|
307
|
+
|
308
|
+
def initialize
|
309
|
+
@opts = {}
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
class DataTypeMappingBuilder
|
314
|
+
def initialize(index_type_mapping)
|
315
|
+
@index_type_mapping = index_type_mapping
|
316
|
+
end
|
317
|
+
|
318
|
+
def method_missing(method, *args, &block)
|
319
|
+
data_type_mapping = (@index_type_mapping.data_types[method] ||= DataTypeMapping.new)
|
320
|
+
data_type_mapping.opts.merge! args[0] if args.length > 0
|
321
|
+
data_type_mapping.converter = block if block_given?
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
# ------ Default mapper ------
|
326
|
+
|
327
|
+
public
|
328
|
+
|
329
|
+
class Default < FieldMapper
|
330
|
+
id_field 'id'
|
331
|
+
index_as :searchable, :default => true do |t|
|
332
|
+
t.default :suffix => '_t'
|
333
|
+
t.date :suffix => '_dt'
|
334
|
+
t.string :suffix => '_t'
|
335
|
+
t.text :suffix => '_t'
|
336
|
+
t.symbol :suffix => '_s'
|
337
|
+
t.integer :suffix => '_i'
|
338
|
+
t.long :suffix => '_l'
|
339
|
+
t.boolean :suffix => '_b'
|
340
|
+
t.float :suffix => '_f'
|
341
|
+
t.double :suffix => '_d'
|
342
|
+
end
|
343
|
+
index_as :displayable, :suffix => '_display'
|
344
|
+
index_as :facetable, :suffix => '_facet'
|
345
|
+
index_as :sortable, :suffix => '_sort'
|
346
|
+
index_as :unstemmed_searchable, :suffix => '_unstem_search'
|
347
|
+
end
|
348
|
+
|
349
|
+
end
|
350
|
+
|
351
|
+
end
|