solrizer 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +44 -0
- data/History.txt +8 -0
- data/Rakefile +10 -3
- data/VERSION +1 -1
- data/config/solr_mappings.yml +16 -13
- data/config/solr_mappings_af_0.1.yml +18 -0
- data/lib/solrizer/extractor.rb +31 -72
- data/lib/solrizer/field_mapper.rb +351 -0
- data/lib/solrizer/field_name_mapper.rb +37 -51
- data/lib/solrizer/html/extractor.rb +36 -0
- data/lib/solrizer/html.rb +7 -0
- data/lib/solrizer/xml/extractor.rb +31 -0
- data/lib/solrizer/xml/terminology_based_solrizer.rb +25 -29
- data/lib/solrizer/xml.rb +4 -1
- data/lib/solrizer.rb +2 -113
- data/lib/tasks/solrizer.rake +7 -27
- data/solrizer.gemspec +46 -26
- data/spec/{spec.opts → .rspec} +0 -0
- data/spec/fixtures/test_solr_mappings.yml +16 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/units/extractor_spec.rb +43 -34
- data/spec/units/field_mapper_spec.rb +227 -0
- data/spec/units/field_name_mapper_spec.rb +16 -29
- data/spec/units/xml_extractor_spec.rb +28 -0
- data/spec/units/xml_terminology_based_solrizer_spec.rb +18 -5
- metadata +128 -35
- data/lib/solrizer/configuration.rb +0 -8
- data/lib/solrizer/indexer.rb +0 -261
- data/lib/solrizer/main.rb +0 -17
- data/lib/solrizer/replicator.rb +0 -143
- data/lib/solrizer/repository.rb +0 -54
- data/spec/fixtures/rels_ext_cmodel.xml +0 -8
- data/spec/fixtures/solr_mappings_af_0.1.yml +0 -16
- data/spec/integration/indexer_spec.rb +0 -18
- data/spec/units/indexer_spec.rb +0 -127
- data/spec/units/shelver_spec.rb +0 -42
data/.gitignore
CHANGED
data/Gemfile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
gem "solr-ruby"
|
4
|
+
gem "nokogiri"
|
5
|
+
gem "om", ">= 1.0.0" # only required by xml/terminology_based_solrizer ...
|
6
|
+
gem "mediashelf-loggable"
|
7
|
+
|
8
|
+
group :development, :test do
|
9
|
+
gem "jeweler"
|
10
|
+
gem 'ruby-debug'
|
11
|
+
gem 'ruby-debug-base'
|
12
|
+
gem 'rspec', '<2.0.0'
|
13
|
+
gem 'mocha'
|
14
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
columnize (0.3.1)
|
5
|
+
facets (2.9.0)
|
6
|
+
gemcutter (0.6.1)
|
7
|
+
git (1.2.5)
|
8
|
+
jeweler (1.4.0)
|
9
|
+
gemcutter (>= 0.1.0)
|
10
|
+
git (>= 1.2.5)
|
11
|
+
rubyforge (>= 2.0.0)
|
12
|
+
json_pure (1.4.6)
|
13
|
+
linecache (0.43)
|
14
|
+
mediashelf-loggable (0.4.0)
|
15
|
+
mocha (0.9.9)
|
16
|
+
rake
|
17
|
+
nokogiri (1.4.3.1)
|
18
|
+
om (1.0.0)
|
19
|
+
facets
|
20
|
+
nokogiri (>= 1.4.2)
|
21
|
+
rake (0.8.7)
|
22
|
+
rspec (1.3.1)
|
23
|
+
ruby-debug (0.10.3)
|
24
|
+
columnize (>= 0.1)
|
25
|
+
ruby-debug-base (~> 0.10.3.0)
|
26
|
+
ruby-debug-base (0.10.3)
|
27
|
+
linecache (>= 0.3)
|
28
|
+
rubyforge (2.0.4)
|
29
|
+
json_pure (>= 1.1.7)
|
30
|
+
solr-ruby (0.0.8)
|
31
|
+
|
32
|
+
PLATFORMS
|
33
|
+
ruby
|
34
|
+
|
35
|
+
DEPENDENCIES
|
36
|
+
jeweler
|
37
|
+
mediashelf-loggable
|
38
|
+
mocha
|
39
|
+
nokogiri
|
40
|
+
om (>= 1.0.0)
|
41
|
+
rspec (< 2.0.0)
|
42
|
+
ruby-debug
|
43
|
+
ruby-debug-base
|
44
|
+
solr-ruby
|
data/History.txt
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
h2. 0.3.0
|
2
|
+
|
3
|
+
HYDRA-286 Re-structure Solrizer to separate solrizer base from fedora-solrizer
|
4
|
+
|
5
|
+
Added TerminologyBasedSolrizer
|
6
|
+
Added Extremely Configurable FieldMapper
|
7
|
+
Updated FieldNameMapper to use new FieldMapper
|
8
|
+
|
1
9
|
h2. 0.1.2
|
2
10
|
|
3
11
|
Minor: switched active-fedora gem requirement to >= 1.1.5 instead of = 1.1.5 (was breaking apps that use later versions of active-fedora)
|
data/Rakefile
CHANGED
@@ -10,9 +10,16 @@ begin
|
|
10
10
|
gem.email = "matt.zumwalt@yourmediashelf.com"
|
11
11
|
gem.homepage = "http://github.com/projecthydra/solrizer"
|
12
12
|
gem.authors = ["Matt Zumwalt"]
|
13
|
-
gem.add_dependency "
|
14
|
-
gem.add_dependency "
|
15
|
-
gem.
|
13
|
+
gem.add_dependency "solr-ruby"
|
14
|
+
gem.add_dependency "nokogiri"
|
15
|
+
gem.add_dependency "om"
|
16
|
+
gem.add_dependency "nokogiri"
|
17
|
+
gem.add_dependency "mediashelf-loggable"
|
18
|
+
gem.add_development_dependency "jeweler"
|
19
|
+
gem.add_development_dependency 'ruby-debug'
|
20
|
+
gem.add_development_dependency 'ruby-debug-base'
|
21
|
+
gem.add_development_dependency 'rspec', '<2.0.0'
|
22
|
+
gem.add_development_dependency 'mocha'
|
16
23
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
17
24
|
end
|
18
25
|
Jeweler::GemcutterTasks.new
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/config/solr_mappings.yml
CHANGED
@@ -1,14 +1,17 @@
|
|
1
1
|
id: id
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
2
|
+
default: searchable
|
3
|
+
searchable:
|
4
|
+
default: _t
|
5
|
+
date: _dt
|
6
|
+
string: _t
|
7
|
+
text: _t
|
8
|
+
symbol: _s
|
9
|
+
integer: _i
|
10
|
+
long: _l
|
11
|
+
boolean: _b
|
12
|
+
float: _f
|
13
|
+
double: _d
|
14
|
+
displayable: _display
|
15
|
+
facetable: _facet
|
16
|
+
sortable: _sort
|
17
|
+
unstemmed_searchable: _unstem_search
|
@@ -0,0 +1,18 @@
|
|
1
|
+
id: id
|
2
|
+
default: searchable
|
3
|
+
searchable:
|
4
|
+
date: _date
|
5
|
+
string: _field
|
6
|
+
text: _field
|
7
|
+
symbol: _field
|
8
|
+
integer: _field
|
9
|
+
long: _field
|
10
|
+
boolean: _field
|
11
|
+
float: _field
|
12
|
+
double: _field
|
13
|
+
displayable: _display
|
14
|
+
facetable: _facet
|
15
|
+
sortable: _sort
|
16
|
+
unstemmed_searchable: _unstem_search
|
17
|
+
|
18
|
+
|
data/lib/solrizer/extractor.rb
CHANGED
@@ -4,85 +4,44 @@ require "nokogiri"
|
|
4
4
|
require 'yaml'
|
5
5
|
|
6
6
|
module Solrizer
|
7
|
-
class Extractor
|
8
|
-
|
9
|
-
|
10
|
-
def extract_tags(text)
|
11
|
-
doc = REXML::Document.new( text )
|
12
|
-
extract_tag(doc, 'archivist_tags').merge(extract_tag(doc, 'donor_tags'))
|
13
|
-
end
|
14
7
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
8
|
+
# Provides utilities for extracting solr fields from a variety of objects and/or creating solr documents from a given object
|
9
|
+
# Note: These utilities are optional. You can implement .to_solr directly on your classes if you want to bypass using Extractors.
|
10
|
+
#
|
11
|
+
# Each of the Solrizer implementations provides its own Extractor module that extends the behaviors of Solrizer::Extractor
|
12
|
+
# with methods specific to that implementation (ie. extract_tag, extract_rels_ext, xml_to_solr, html_to_solr)
|
13
|
+
#
|
14
|
+
class Extractor
|
20
15
|
|
21
|
-
|
22
|
-
#
|
23
|
-
#
|
16
|
+
# Populates a solr doc with values from a hash.
|
17
|
+
# Accepts two forms of hashes:
|
18
|
+
# => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}
|
19
|
+
# or
|
20
|
+
# => {:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]} }
|
24
21
|
#
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
doc = Nokogiri::XML(text)
|
36
|
-
doc.xpath( '//foo:hasModel', 'foo' => 'info:fedora/fedora-system:def/model#' ).each do |element|
|
37
|
-
cmodel = element.attributes['resource'].to_s
|
38
|
-
solr_doc << Solr::Field.new( :cmodel_t => cmodel )
|
39
|
-
|
40
|
-
if map.has_key?(cmodel)
|
41
|
-
solr_doc << Solr::Field.new( :hydra_type_t => map[cmodel] )
|
22
|
+
# Note that values for individual fields can be a single string or an array of strings.
|
23
|
+
def extract_hash( input_hash, solr_doc=Solr::Document.new )
|
24
|
+
facets = input_hash.has_key?(:facets) ? input_hash[:facets] : input_hash
|
25
|
+
facets.each_pair do |facet_name, value|
|
26
|
+
case value.class.to_s
|
27
|
+
when "String"
|
28
|
+
solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{value}" )
|
29
|
+
when "Array"
|
30
|
+
value.each { |v| solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{v}" ) }
|
42
31
|
end
|
43
32
|
end
|
44
|
-
|
45
|
-
return solr_doc
|
46
|
-
end
|
47
|
-
|
48
|
-
#
|
49
|
-
# This method extracts solr fields from simple xml
|
50
|
-
#
|
51
|
-
def xml_to_solr( text, solr_doc=Solr::Document.new )
|
52
|
-
doc = REXML::Document.new( text )
|
53
|
-
doc.root.elements.each do |element|
|
54
|
-
solr_doc << Solr::Field.new( :"#{element.name}_t" => "#{element.text}" )
|
55
|
-
end
|
56
|
-
|
57
|
-
return solr_doc
|
58
|
-
end
|
59
|
-
|
60
|
-
#
|
61
|
-
# This method strips html tags out and returns content to be indexed in solr
|
62
|
-
#
|
63
|
-
def html_content_to_solr( ds, solr_doc=Solr::Document.new )
|
64
|
-
|
65
|
-
text = CGI.unescapeHTML(ds.content)
|
66
|
-
doc = Nokogiri::HTML(text)
|
67
33
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
34
|
+
if input_hash.has_key?(:symbols)
|
35
|
+
input_hash[:symbols].each do |symbol_name, value|
|
36
|
+
case value.class.to_s
|
37
|
+
when "String"
|
38
|
+
solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{value}" )
|
39
|
+
when "Array"
|
40
|
+
value.each { |v| solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{v}" ) }
|
41
|
+
end
|
42
|
+
end
|
73
43
|
end
|
74
|
-
|
75
|
-
#strip out text and put in story_t
|
76
|
-
text_nodes = doc.xpath("//text()")
|
77
|
-
text = String.new
|
78
|
-
|
79
|
-
text_nodes.each do |text_node|
|
80
|
-
text << text_node.content
|
81
|
-
end
|
82
|
-
|
83
|
-
solr_doc << Solr::Field.new(:story_t => text)
|
84
|
-
|
85
|
-
return solr_doc
|
44
|
+
return solr_doc
|
86
45
|
end
|
87
46
|
|
88
47
|
end
|
@@ -0,0 +1,351 @@
|
|
1
|
+
require "loggable"
|
2
|
+
module Solrizer
|
3
|
+
|
4
|
+
# Maps Term names and values to Solr fields, based on the Term's data type and any index_as options.
|
5
|
+
#
|
6
|
+
# The basic structure of a mapper is:
|
7
|
+
#
|
8
|
+
# == Mapping on Index Type
|
9
|
+
#
|
10
|
+
# To define a custom mapper:
|
11
|
+
#
|
12
|
+
# class CustomMapper < Solrizer::FieldMapper
|
13
|
+
# index_as :searchable, :suffix => '_search'
|
14
|
+
# index_as :edible, :suffix => '_food'
|
15
|
+
# end
|
16
|
+
#
|
17
|
+
# # t.dish_name :index_as => [:searchable] -maps to-> dish_name_search
|
18
|
+
# # t.ingredients :index_as => [:searchable, :edible] -maps to-> ingredients_search, ingredients_food
|
19
|
+
#
|
20
|
+
# (See Solrizer::XML::TerminologyBasedSolrizer for instructions on applying a custom mapping once you have defined it.)
|
21
|
+
#
|
22
|
+
# == Default Index Types
|
23
|
+
#
|
24
|
+
# You can mark a particular index type as a default. It will then always be included unless terms explicity
|
25
|
+
# exclude it with the "not_" prefix:
|
26
|
+
#
|
27
|
+
# class CustomMapper < Solrizer::FieldMapper
|
28
|
+
# index_as :searchable, :suffix => '_search', :default => true
|
29
|
+
# index_as :edible, :suffix => '_food'
|
30
|
+
# end
|
31
|
+
#
|
32
|
+
# # t.dish_name -maps to-> dish_name_search
|
33
|
+
# # t.ingredients :index_as => [:edible] -maps to-> ingredients_search, ingredients_food
|
34
|
+
# # t.secret_ingredients :index_as => [:not_searchable, :edible] -maps to-> secret_ingredients_food
|
35
|
+
#
|
36
|
+
# == Mapping on Data Type
|
37
|
+
#
|
38
|
+
# A mapper can apply different suffixes based on a term's data type:
|
39
|
+
#
|
40
|
+
# class CustomMapper < Solrizer::FieldMapper
|
41
|
+
# index_as :searchable, :suffix => '_search' do |type|
|
42
|
+
# type.date :suffix => '_date'
|
43
|
+
# type.integer :suffix => '_numeric'
|
44
|
+
# type.float :suffix => '_numeric'
|
45
|
+
# end
|
46
|
+
# index_as :edible, :suffix => '_food'
|
47
|
+
# end
|
48
|
+
#
|
49
|
+
# # t.published :type => :date, :index_as => [:searchable] -maps to-> published_date
|
50
|
+
# # t.votes :type => :integer, :index_as => [:searchable] -maps to-> votes_numeric
|
51
|
+
#
|
52
|
+
# If a specific data type doesn't appear in the list, the mapper falls back to the index_as:
|
53
|
+
#
|
54
|
+
# # t.description :type => :text, :index_as => [:searchable] -maps to-> description_search
|
55
|
+
#
|
56
|
+
# == Custom Value Converters
|
57
|
+
#
|
58
|
+
# All of the above applies to the generation of Solr names. Mappers can also provide custom conversion logic for the
|
59
|
+
# generation of Solr values by attaching a custom value converter block to a data type:
|
60
|
+
#
|
61
|
+
# require 'time'
|
62
|
+
#
|
63
|
+
# class CustomMapper < Solrizer::FieldMapper
|
64
|
+
# index_as :searchable, :suffix => '_search' do |type|
|
65
|
+
# type.date do |value|
|
66
|
+
# Time.parse(value).utc.to_i
|
67
|
+
# end
|
68
|
+
# end
|
69
|
+
# end
|
70
|
+
#
|
71
|
+
# Note that the nesting order is always:
|
72
|
+
#
|
73
|
+
# FieldMapper definition
|
74
|
+
# index_as
|
75
|
+
# data type
|
76
|
+
# value converter
|
77
|
+
#
|
78
|
+
# You can use the special data type "default" to apply custom value conversion to any data type:
|
79
|
+
#
|
80
|
+
# require 'time'
|
81
|
+
#
|
82
|
+
# class CustomMapper < Solrizer::FieldMapper
|
83
|
+
# index_as :searchable do |type|
|
84
|
+
# type.date :suffix => '_date' do |value|
|
85
|
+
# Time.parse(value).utc.to_i
|
86
|
+
# end
|
87
|
+
# type.default :suffix => '_search' do |value|
|
88
|
+
# value.to_s.strip
|
89
|
+
# end
|
90
|
+
# end
|
91
|
+
# end
|
92
|
+
#
|
93
|
+
# This example converts searchable dates to milliseconds, and strips extra whitespace from all other searchable data types.
|
94
|
+
#
|
95
|
+
# Note that the :suffix option may appear on the data types and the index_as. The search order for the suffix on a field
|
96
|
+
# of type foo is:
|
97
|
+
# 1. type.foo
|
98
|
+
# 2. type.default
|
99
|
+
# 3. index_as
|
100
|
+
# The suffix is optional in all three places.
|
101
|
+
#
|
102
|
+
# Note that a single Term with multiple index types can translate into multiple Solr fields, because we may want Solr to
|
103
|
+
# index a single field in multiple ways. However, if two different mappings generate both the same solr field name
|
104
|
+
# _and_ the same value, the mapper will only emit a single field.
|
105
|
+
#
|
106
|
+
# == ID Field
|
107
|
+
#
|
108
|
+
# In addition to the normal field mappings, Solrizer gives special treatment to an ID field. If you want that
|
109
|
+
# logic (and you probably do), specify a name for this field:
|
110
|
+
#
|
111
|
+
# class CustomMapper < Solrizer::FieldMapper
|
112
|
+
# id_field 'id'
|
113
|
+
# end
|
114
|
+
#
|
115
|
+
# == Extending the Default
|
116
|
+
#
|
117
|
+
# The default mapper is Solrizer::FieldMapper::Default. You can customize the default mapping by subclassing it.
|
118
|
+
# For example, to override the ID field name and the default suffix for sortable, and inherit everything else:
|
119
|
+
#
|
120
|
+
# class CustomMapperBasedOnDefault < Solrizer::FieldMapper::Default
|
121
|
+
# id_field 'guid'
|
122
|
+
# index_as :sortable, :suffix => '_xsort'
|
123
|
+
# end
|
124
|
+
|
125
|
+
class FieldMapper
|
126
|
+
|
127
|
+
include Loggable
|
128
|
+
|
129
|
+
# ------ Class methods ------
|
130
|
+
|
131
|
+
@@instance_init_actions = Hash.new { |h,k| h[k] = [] }
|
132
|
+
|
133
|
+
def self.id_field(field_name)
|
134
|
+
add_instance_init_action do
|
135
|
+
@id_field = field_name
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def self.index_as(index_type, opts = {}, &block)
|
140
|
+
add_instance_init_action do
|
141
|
+
mapping = (@mappings[index_type] ||= IndexTypeMapping.new)
|
142
|
+
mapping.opts.merge! opts
|
143
|
+
yield DataTypeMappingBuilder.new(mapping) if block_given?
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# Loads solr mappings from yml file.
|
148
|
+
# Assumes that string values are solr field name suffixes.
|
149
|
+
# This is meant as a simple entry point for working with solr mappings. For more powerful control over solr mappings, create your own subclasses of FieldMapper instead of using a yml file.
|
150
|
+
# @param [String] config_path This is the path to the directory where your mappings file is stored. Defaults to "RAILS_ROOT/config/solr_mappings.yml"
|
151
|
+
def self.load_mappings( config_path=nil )
|
152
|
+
|
153
|
+
if config_path.nil?
|
154
|
+
if defined?(RAILS_ROOT)
|
155
|
+
config_path = File.join(RAILS_ROOT, "config", "solr_mappings.yml")
|
156
|
+
end
|
157
|
+
# Default to using the config file within the gem
|
158
|
+
if !File.exist?(config_path.to_s)
|
159
|
+
config_path = File.join(File.dirname(__FILE__), "..", "..", "config", "solr_mappings.yml")
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
logger.info("SOLRIZER: loading field name mappings from #{File.expand_path(config_path)}")
|
164
|
+
mappings_from_file = YAML::load(File.open(config_path))
|
165
|
+
|
166
|
+
self.clear_mappings
|
167
|
+
|
168
|
+
# Set id_field from file if it is available
|
169
|
+
id_field_from_file = mappings_from_file.delete("id")
|
170
|
+
if id_field_from_file.nil?
|
171
|
+
id_field "id"
|
172
|
+
else
|
173
|
+
id_field id_field_from_file
|
174
|
+
end
|
175
|
+
|
176
|
+
default_index_type = mappings_from_file.delete("default")
|
177
|
+
mappings_from_file.each_pair do |index_type, type_settings|
|
178
|
+
if type_settings.kind_of?(Hash)
|
179
|
+
index_as index_type.to_sym, :default => index_type == default_index_type do |t|
|
180
|
+
type_settings.each_pair do |field_type, suffix|
|
181
|
+
eval("t.#{field_type} :suffix=>\"#{suffix}\"")
|
182
|
+
end
|
183
|
+
end
|
184
|
+
else
|
185
|
+
index_as index_type.to_sym, :default => index_type == default_index_type, :suffix=>type_settings
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
private
|
191
|
+
|
192
|
+
def self.add_instance_init_action(&block)
|
193
|
+
@@instance_init_actions[self] << lambda do |mapper|
|
194
|
+
mapper.instance_eval &block
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def self.apply_instance_init_actions(instance)
|
199
|
+
if self.superclass.respond_to? :apply_instance_init_actions
|
200
|
+
self.superclass.apply_instance_init_actions(instance)
|
201
|
+
end
|
202
|
+
@@instance_init_actions[self].each do |action|
|
203
|
+
action.call(instance)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
# Reset all of the mappings
|
208
|
+
def self.clear_mappings
|
209
|
+
logger.debug "resetting mappings for #{self.to_s}"
|
210
|
+
@@instance_init_actions[self] = []
|
211
|
+
end
|
212
|
+
|
213
|
+
public
|
214
|
+
|
215
|
+
# ------ Instance methods ------
|
216
|
+
|
217
|
+
attr_reader :id_field, :default_index_types, :mappings
|
218
|
+
|
219
|
+
def initialize
|
220
|
+
@mappings = {}
|
221
|
+
self.class.apply_instance_init_actions(self)
|
222
|
+
@default_index_types = @mappings.select { |ix_type, mapping| mapping.opts[:default] }.map(&:first)
|
223
|
+
end
|
224
|
+
|
225
|
+
# Given a specific field name, data type, and index type, returns the corresponding solr name.
|
226
|
+
|
227
|
+
def solr_name(field_name, field_type, index_type = :searchable)
|
228
|
+
name, mapping, data_type_mapping = solr_name_and_mappings(field_name, field_type, index_type)
|
229
|
+
name
|
230
|
+
end
|
231
|
+
|
232
|
+
# Given a field name-value pair, a data type, and an array of index types, returns a hash of
|
233
|
+
# mapped names and values. The values in the hash are _arrays_, and may contain multiple values.
|
234
|
+
|
235
|
+
def solr_names_and_values(field_name, field_value, field_type, index_types)
|
236
|
+
# Determine the set of index types, adding defaults and removing not_xyz
|
237
|
+
|
238
|
+
index_types ||= []
|
239
|
+
index_types += default_index_types
|
240
|
+
index_types.uniq!
|
241
|
+
index_types.dup.each do |index_type|
|
242
|
+
if index_type.to_s =~ /^not_(.*)/
|
243
|
+
index_types.delete index_type # not_foo
|
244
|
+
index_types.delete $1.to_sym # foo
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
# Map names and values
|
249
|
+
|
250
|
+
results = {}
|
251
|
+
|
252
|
+
index_types.each do |index_type|
|
253
|
+
# Get mapping for field
|
254
|
+
name, mapping, data_type_mapping = solr_name_and_mappings(field_name, field_type, index_type)
|
255
|
+
next unless name
|
256
|
+
|
257
|
+
# Is there a custom converter?
|
258
|
+
value = if data_type_mapping && data_type_mapping.converter
|
259
|
+
converter = data_type_mapping.converter
|
260
|
+
if converter.arity == 1
|
261
|
+
converter.call(field_value)
|
262
|
+
else
|
263
|
+
converter.call(field_value, field_name)
|
264
|
+
end
|
265
|
+
else
|
266
|
+
field_value
|
267
|
+
end
|
268
|
+
|
269
|
+
# Add mapped name & value, unless it's a duplicate
|
270
|
+
values = (results[name] ||= [])
|
271
|
+
values << value unless values.contains?(value)
|
272
|
+
end
|
273
|
+
|
274
|
+
results
|
275
|
+
end
|
276
|
+
|
277
|
+
private
|
278
|
+
|
279
|
+
def solr_name_and_mappings(field_name, field_type, index_type)
|
280
|
+
field_name = field_name.to_s
|
281
|
+
mapping = @mappings[index_type]
|
282
|
+
unless mapping
|
283
|
+
logger.debug "Unknown index type '#{index_type}' for field #{field_name}"
|
284
|
+
return nil
|
285
|
+
end
|
286
|
+
|
287
|
+
data_type_mapping = mapping.data_types[field_type] || mapping.data_types[:default]
|
288
|
+
|
289
|
+
suffix = data_type_mapping.opts[:suffix] if data_type_mapping
|
290
|
+
suffix ||= mapping.opts[:suffix]
|
291
|
+
name = field_name + suffix
|
292
|
+
|
293
|
+
[name, mapping, data_type_mapping]
|
294
|
+
end
|
295
|
+
|
296
|
+
class IndexTypeMapping
|
297
|
+
attr_accessor :opts, :data_types
|
298
|
+
|
299
|
+
def initialize
|
300
|
+
@opts = {}
|
301
|
+
@data_types = {}
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
class DataTypeMapping
|
306
|
+
attr_accessor :opts, :converter
|
307
|
+
|
308
|
+
def initialize
|
309
|
+
@opts = {}
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
class DataTypeMappingBuilder
|
314
|
+
def initialize(index_type_mapping)
|
315
|
+
@index_type_mapping = index_type_mapping
|
316
|
+
end
|
317
|
+
|
318
|
+
def method_missing(method, *args, &block)
|
319
|
+
data_type_mapping = (@index_type_mapping.data_types[method] ||= DataTypeMapping.new)
|
320
|
+
data_type_mapping.opts.merge! args[0] if args.length > 0
|
321
|
+
data_type_mapping.converter = block if block_given?
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
# ------ Default mapper ------
|
326
|
+
|
327
|
+
public
|
328
|
+
|
329
|
+
class Default < FieldMapper
|
330
|
+
id_field 'id'
|
331
|
+
index_as :searchable, :default => true do |t|
|
332
|
+
t.default :suffix => '_t'
|
333
|
+
t.date :suffix => '_dt'
|
334
|
+
t.string :suffix => '_t'
|
335
|
+
t.text :suffix => '_t'
|
336
|
+
t.symbol :suffix => '_s'
|
337
|
+
t.integer :suffix => '_i'
|
338
|
+
t.long :suffix => '_l'
|
339
|
+
t.boolean :suffix => '_b'
|
340
|
+
t.float :suffix => '_f'
|
341
|
+
t.double :suffix => '_d'
|
342
|
+
end
|
343
|
+
index_as :displayable, :suffix => '_display'
|
344
|
+
index_as :facetable, :suffix => '_facet'
|
345
|
+
index_as :sortable, :suffix => '_sort'
|
346
|
+
index_as :unstemmed_searchable, :suffix => '_unstem_search'
|
347
|
+
end
|
348
|
+
|
349
|
+
end
|
350
|
+
|
351
|
+
end
|