solrizer 2.2.0 → 3.0.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -1
- data/History.txt +12 -0
- data/lib/solrizer/common.rb +3 -18
- data/lib/solrizer/default_descriptors.rb +93 -0
- data/lib/solrizer/descriptor.rb +68 -0
- data/lib/solrizer/field_mapper.rb +107 -288
- data/lib/solrizer/version.rb +1 -1
- data/lib/solrizer/xml/extractor.rb +5 -5
- data/lib/solrizer/xml/terminology_based_solrizer.rb +4 -20
- data/lib/solrizer.rb +1 -1
- data/solrizer.gemspec +1 -1
- data/spec/fixtures/mods_article.rb +0 -1
- data/spec/spec_helper.rb +0 -1
- data/spec/units/common_spec.rb +8 -12
- data/spec/units/extractor_spec.rb +3 -2
- data/spec/units/field_mapper_spec.rb +177 -141
- data/spec/units/xml_extractor_spec.rb +13 -14
- data/spec/units/xml_terminology_based_solrizer_spec.rb +13 -22
- metadata +56 -33
- checksums.yaml +0 -7
- data/lib/solrizer/field_name_mapper.rb +0 -51
- data/spec/units/field_name_mapper_spec.rb +0 -28
data/.gitignore
CHANGED
data/History.txt
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
h2. 3.0.0
|
2
|
+
suffix changes:
|
3
|
+
_s -> _si
|
4
|
+
_t -> _tesim
|
5
|
+
_dt -> _dtsi
|
6
|
+
_i -> _isim
|
7
|
+
_sort -> _ssi
|
8
|
+
_display -> _sim
|
9
|
+
_facet -> _sim
|
10
|
+
_unstem_search -> _tim
|
11
|
+
|
12
|
+
|
1
13
|
h2. 2.1.0
|
2
14
|
#11 There should only be one instance of the field mapper. It's now at Solrizer.default_field_mapper
|
3
15
|
Extract create_and_insert_terms into Solrizer::Common. This can be used for RDF datastreams
|
data/lib/solrizer/common.rb
CHANGED
@@ -1,21 +1,6 @@
|
|
1
1
|
# The goal of this method is to have no dependency on OM, so that NOM or RDF datastreams could use this.
|
2
2
|
|
3
3
|
module Solrizer
|
4
|
-
# Instructions on how to solrize the field (types and uses)
|
5
|
-
class Directive
|
6
|
-
attr_accessor :type, :index_as
|
7
|
-
def initialize(*args)
|
8
|
-
case args
|
9
|
-
when Hash
|
10
|
-
self.type = args[:type]
|
11
|
-
self.index_as = args[:index_as]
|
12
|
-
when Array
|
13
|
-
self.type = args[0]
|
14
|
-
self.index_as = args[1]
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
4
|
module Common
|
20
5
|
def self.included(klass)
|
21
6
|
klass.send(:extend, ClassMethods)
|
@@ -24,10 +9,10 @@ module Solrizer
|
|
24
9
|
module ClassMethods
|
25
10
|
# @param [String] field_name_base the name of the solr field (without the type suffix)
|
26
11
|
# @param [Object] value the value to insert into the document
|
27
|
-
# @param [
|
12
|
+
# @param [Array] index_as list of indexers to use (e.g. [:searchable, :facetable])
|
28
13
|
# @param [Hash] solr_doc the solr_doc to insert into.
|
29
|
-
def create_and_insert_terms(field_name_base, value,
|
30
|
-
Solrizer.default_field_mapper.solr_names_and_values(field_name_base, value,
|
14
|
+
def create_and_insert_terms(field_name_base, value, index_as, solr_doc)
|
15
|
+
Solrizer.default_field_mapper.solr_names_and_values(field_name_base, value, index_as).each do |field_name, field_value|
|
31
16
|
unless field_value.join("").strip.empty?
|
32
17
|
::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
|
33
18
|
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module Solrizer
|
2
|
+
module DefaultDescriptors
|
3
|
+
|
4
|
+
# Produces a _sim suffix
|
5
|
+
def self.facetable
|
6
|
+
@facetable ||= Descriptor.new(:string, :indexed, :multivalued)
|
7
|
+
end
|
8
|
+
|
9
|
+
# Most interesting case because the suffixe produced depends on the type parameter
|
10
|
+
# produces suffixes:
|
11
|
+
# _tesim - for strings or text fields
|
12
|
+
# _dtsim - for dates
|
13
|
+
# _isim - for integers
|
14
|
+
def self.searchable
|
15
|
+
@searchable ||= Descriptor.new(searchable_field_definition, converter: searchable_converter, requires_type: true)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Takes fields which are stored as strings, but we want indexed as dates. (e.g. "November 6th, 2012")
|
19
|
+
# produces suffixes:
|
20
|
+
# _dtsi - for dates
|
21
|
+
def self.dateable
|
22
|
+
@dateable ||= Descriptor.new(:date, :stored, :indexed, converter: dateable_converter)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Produces a _ssim suffix
|
26
|
+
def self.symbol
|
27
|
+
@symbol ||= Descriptor.new(:string, :stored, :indexed, :multivalued)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Produces a _ssi suffix
|
31
|
+
def self.sortable
|
32
|
+
@sortable ||= Descriptor.new(:string, :indexed, :stored)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Produces a _ssm suffix
|
36
|
+
def self.displayable
|
37
|
+
@displayable ||= Descriptor.new(:string, :stored, :multivalued)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Produces a _tim suffix (used to be _unstem)
|
41
|
+
def self.unstemmed_searchable
|
42
|
+
@unstemmed_searchable ||= Descriptor.new(:text, :indexed, :multivalued)
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.simple
|
46
|
+
@simple ||= Descriptor.new(lambda {|field_type| [field_type, :indexed]})
|
47
|
+
end
|
48
|
+
protected
|
49
|
+
|
50
|
+
def self.searchable_field_definition
|
51
|
+
lambda do |type|
|
52
|
+
type = :text_en if [:string, :text].include?(type) # for backwards compatibility with old solr schema
|
53
|
+
vals = [type, :indexed, :stored]
|
54
|
+
vals << :multivalued unless type == :date
|
55
|
+
vals
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.searchable_converter
|
60
|
+
lambda do |type|
|
61
|
+
case type
|
62
|
+
when :date
|
63
|
+
lambda { |val| iso8601_date(val)}
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.dateable_converter
|
69
|
+
lambda do |type|
|
70
|
+
lambda do |val|
|
71
|
+
begin
|
72
|
+
iso8601_date(Date.parse(val))
|
73
|
+
rescue ArgumentError
|
74
|
+
nil
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
def self.iso8601_date(value)
|
82
|
+
begin
|
83
|
+
if value.is_a?(Date)
|
84
|
+
DateTime.parse(value.to_s).to_time.utc.iso8601
|
85
|
+
elsif !value.empty?
|
86
|
+
DateTime.parse(value).to_time.utc.iso8601
|
87
|
+
end
|
88
|
+
rescue ArgumentError => e
|
89
|
+
raise ArgumentError, "Unable to parse `#{value}' as a date-time object"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Solrizer
|
2
|
+
class Descriptor
|
3
|
+
attr_reader :index_type
|
4
|
+
def initialize(*args)
|
5
|
+
if args.last.kind_of? Hash
|
6
|
+
opts = args.pop
|
7
|
+
@converter = opts[:converter]
|
8
|
+
@type_required = opts[:requires_type]
|
9
|
+
end
|
10
|
+
@index_type = args
|
11
|
+
raise Solrizer::InvalidIndexDescriptor, "Invalid index type passed to Sorizer.solr_name. It should be an array like [:string, :indexed, :stored, :multivalued]. You provided: `#{@index_type}'" unless index_type.kind_of? Array
|
12
|
+
end
|
13
|
+
|
14
|
+
def name_and_converter(field_name, args=nil)
|
15
|
+
args ||= {}
|
16
|
+
field_type = args[:type]
|
17
|
+
if type_required?
|
18
|
+
raise "Must provide a :type argument when index_type is `#{self}' for #{field_name}" unless field_type
|
19
|
+
end
|
20
|
+
[field_name.to_s + suffix(field_type), converter(field_type)]
|
21
|
+
end
|
22
|
+
|
23
|
+
def type_required?
|
24
|
+
@type_required
|
25
|
+
end
|
26
|
+
|
27
|
+
protected
|
28
|
+
def suffix(field_type)
|
29
|
+
evaluated_type = index_type.first.kind_of?(Proc) ? index_type.first.call(field_type) : index_type.dup
|
30
|
+
stored_suffix = config[:stored_suffix] if evaluated_type.delete(:stored)
|
31
|
+
index_suffix = config[:index_suffix] if evaluated_type.delete(:indexed)
|
32
|
+
multivalued_suffix = config[:multivalued_suffix] if evaluated_type.delete(:multivalued)
|
33
|
+
index_datatype = evaluated_type.first
|
34
|
+
raise Solrizer::InvalidIndexDescriptor, "Missing datatype for #{evaluated_type}" unless index_datatype
|
35
|
+
type_suffix = config[:type_suffix].call(index_datatype)
|
36
|
+
raise Solrizer::InvalidIndexDescriptor, "Invalid datatype `#{index_datatype.inspect}'. Must be one of: :date, :text, :text_en, :string, :integer" unless type_suffix
|
37
|
+
|
38
|
+
suffix = [config[:suffix_delimiter], type_suffix, stored_suffix, index_suffix, multivalued_suffix].join
|
39
|
+
end
|
40
|
+
|
41
|
+
def converter(field_type)
|
42
|
+
@converter.call(field_type) if @converter
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
def config
|
47
|
+
@config ||=
|
48
|
+
{suffix_delimiter: '_',
|
49
|
+
type_suffix: lambda do |type|
|
50
|
+
case type
|
51
|
+
when :string, :symbol # TODO `:symbol' useage ought to be deprecated
|
52
|
+
's'
|
53
|
+
when :text
|
54
|
+
't'
|
55
|
+
when :text_en
|
56
|
+
'te'
|
57
|
+
when :date
|
58
|
+
'dt'
|
59
|
+
when :integer
|
60
|
+
'i'
|
61
|
+
end
|
62
|
+
end,
|
63
|
+
stored_suffix: 's',
|
64
|
+
index_suffix: 'i',
|
65
|
+
multivalued_suffix: 'm'}
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|