solrizer 2.2.0 → 3.0.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -1
- data/History.txt +12 -0
- data/lib/solrizer/common.rb +3 -18
- data/lib/solrizer/default_descriptors.rb +93 -0
- data/lib/solrizer/descriptor.rb +68 -0
- data/lib/solrizer/field_mapper.rb +107 -288
- data/lib/solrizer/version.rb +1 -1
- data/lib/solrizer/xml/extractor.rb +5 -5
- data/lib/solrizer/xml/terminology_based_solrizer.rb +4 -20
- data/lib/solrizer.rb +1 -1
- data/solrizer.gemspec +1 -1
- data/spec/fixtures/mods_article.rb +0 -1
- data/spec/spec_helper.rb +0 -1
- data/spec/units/common_spec.rb +8 -12
- data/spec/units/extractor_spec.rb +3 -2
- data/spec/units/field_mapper_spec.rb +177 -141
- data/spec/units/xml_extractor_spec.rb +13 -14
- data/spec/units/xml_terminology_based_solrizer_spec.rb +13 -22
- metadata +56 -33
- checksums.yaml +0 -7
- data/lib/solrizer/field_name_mapper.rb +0 -51
- data/spec/units/field_name_mapper_spec.rb +0 -28
data/.gitignore
CHANGED
data/History.txt
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
h2. 3.0.0
|
2
|
+
suffix changes:
|
3
|
+
_s -> _si
|
4
|
+
_t -> _tesim
|
5
|
+
_dt -> _dtsi
|
6
|
+
_i -> _isim
|
7
|
+
_sort -> _ssi
|
8
|
+
_display -> _sim
|
9
|
+
_facet -> _sim
|
10
|
+
_unstem_search -> _tim
|
11
|
+
|
12
|
+
|
1
13
|
h2. 2.1.0
|
2
14
|
#11 There should only be one instance of the field mapper. It's now at Solrizer.default_field_mapper
|
3
15
|
Extract create_and_insert_terms into Solrizer::Common. This can be used for RDF datastreams
|
data/lib/solrizer/common.rb
CHANGED
@@ -1,21 +1,6 @@
|
|
1
1
|
# The goal of this method is to have no dependency on OM, so that NOM or RDF datastreams could use this.
|
2
2
|
|
3
3
|
module Solrizer
|
4
|
-
# Instructions on how to solrize the field (types and uses)
|
5
|
-
class Directive
|
6
|
-
attr_accessor :type, :index_as
|
7
|
-
def initialize(*args)
|
8
|
-
case args
|
9
|
-
when Hash
|
10
|
-
self.type = args[:type]
|
11
|
-
self.index_as = args[:index_as]
|
12
|
-
when Array
|
13
|
-
self.type = args[0]
|
14
|
-
self.index_as = args[1]
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
4
|
module Common
|
20
5
|
def self.included(klass)
|
21
6
|
klass.send(:extend, ClassMethods)
|
@@ -24,10 +9,10 @@ module Solrizer
|
|
24
9
|
module ClassMethods
|
25
10
|
# @param [String] field_name_base the name of the solr field (without the type suffix)
|
26
11
|
# @param [Object] value the value to insert into the document
|
27
|
-
# @param [
|
12
|
+
# @param [Array] index_as list of indexers to use (e.g. [:searchable, :facetable])
|
28
13
|
# @param [Hash] solr_doc the solr_doc to insert into.
|
29
|
-
def create_and_insert_terms(field_name_base, value,
|
30
|
-
Solrizer.default_field_mapper.solr_names_and_values(field_name_base, value,
|
14
|
+
def create_and_insert_terms(field_name_base, value, index_as, solr_doc)
|
15
|
+
Solrizer.default_field_mapper.solr_names_and_values(field_name_base, value, index_as).each do |field_name, field_value|
|
31
16
|
unless field_value.join("").strip.empty?
|
32
17
|
::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
|
33
18
|
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module Solrizer
|
2
|
+
module DefaultDescriptors
|
3
|
+
|
4
|
+
# Produces a _sim suffix
|
5
|
+
def self.facetable
|
6
|
+
@facetable ||= Descriptor.new(:string, :indexed, :multivalued)
|
7
|
+
end
|
8
|
+
|
9
|
+
# Most interesting case because the suffixe produced depends on the type parameter
|
10
|
+
# produces suffixes:
|
11
|
+
# _tesim - for strings or text fields
|
12
|
+
# _dtsim - for dates
|
13
|
+
# _isim - for integers
|
14
|
+
def self.searchable
|
15
|
+
@searchable ||= Descriptor.new(searchable_field_definition, converter: searchable_converter, requires_type: true)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Takes fields which are stored as strings, but we want indexed as dates. (e.g. "November 6th, 2012")
|
19
|
+
# produces suffixes:
|
20
|
+
# _dtsi - for dates
|
21
|
+
def self.dateable
|
22
|
+
@dateable ||= Descriptor.new(:date, :stored, :indexed, converter: dateable_converter)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Produces a _ssim suffix
|
26
|
+
def self.symbol
|
27
|
+
@symbol ||= Descriptor.new(:string, :stored, :indexed, :multivalued)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Produces a _ssi suffix
|
31
|
+
def self.sortable
|
32
|
+
@sortable ||= Descriptor.new(:string, :indexed, :stored)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Produces a _ssm suffix
|
36
|
+
def self.displayable
|
37
|
+
@displayable ||= Descriptor.new(:string, :stored, :multivalued)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Produces a _tim suffix (used to be _unstem)
|
41
|
+
def self.unstemmed_searchable
|
42
|
+
@unstemmed_searchable ||= Descriptor.new(:text, :indexed, :multivalued)
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.simple
|
46
|
+
@simple ||= Descriptor.new(lambda {|field_type| [field_type, :indexed]})
|
47
|
+
end
|
48
|
+
protected
|
49
|
+
|
50
|
+
def self.searchable_field_definition
|
51
|
+
lambda do |type|
|
52
|
+
type = :text_en if [:string, :text].include?(type) # for backwards compatibility with old solr schema
|
53
|
+
vals = [type, :indexed, :stored]
|
54
|
+
vals << :multivalued unless type == :date
|
55
|
+
vals
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.searchable_converter
|
60
|
+
lambda do |type|
|
61
|
+
case type
|
62
|
+
when :date
|
63
|
+
lambda { |val| iso8601_date(val)}
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.dateable_converter
|
69
|
+
lambda do |type|
|
70
|
+
lambda do |val|
|
71
|
+
begin
|
72
|
+
iso8601_date(Date.parse(val))
|
73
|
+
rescue ArgumentError
|
74
|
+
nil
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
def self.iso8601_date(value)
|
82
|
+
begin
|
83
|
+
if value.is_a?(Date)
|
84
|
+
DateTime.parse(value.to_s).to_time.utc.iso8601
|
85
|
+
elsif !value.empty?
|
86
|
+
DateTime.parse(value).to_time.utc.iso8601
|
87
|
+
end
|
88
|
+
rescue ArgumentError => e
|
89
|
+
raise ArgumentError, "Unable to parse `#{value}' as a date-time object"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Solrizer
|
2
|
+
class Descriptor
|
3
|
+
attr_reader :index_type
|
4
|
+
def initialize(*args)
|
5
|
+
if args.last.kind_of? Hash
|
6
|
+
opts = args.pop
|
7
|
+
@converter = opts[:converter]
|
8
|
+
@type_required = opts[:requires_type]
|
9
|
+
end
|
10
|
+
@index_type = args
|
11
|
+
raise Solrizer::InvalidIndexDescriptor, "Invalid index type passed to Sorizer.solr_name. It should be an array like [:string, :indexed, :stored, :multivalued]. You provided: `#{@index_type}'" unless index_type.kind_of? Array
|
12
|
+
end
|
13
|
+
|
14
|
+
def name_and_converter(field_name, args=nil)
|
15
|
+
args ||= {}
|
16
|
+
field_type = args[:type]
|
17
|
+
if type_required?
|
18
|
+
raise "Must provide a :type argument when index_type is `#{self}' for #{field_name}" unless field_type
|
19
|
+
end
|
20
|
+
[field_name.to_s + suffix(field_type), converter(field_type)]
|
21
|
+
end
|
22
|
+
|
23
|
+
def type_required?
|
24
|
+
@type_required
|
25
|
+
end
|
26
|
+
|
27
|
+
protected
|
28
|
+
def suffix(field_type)
|
29
|
+
evaluated_type = index_type.first.kind_of?(Proc) ? index_type.first.call(field_type) : index_type.dup
|
30
|
+
stored_suffix = config[:stored_suffix] if evaluated_type.delete(:stored)
|
31
|
+
index_suffix = config[:index_suffix] if evaluated_type.delete(:indexed)
|
32
|
+
multivalued_suffix = config[:multivalued_suffix] if evaluated_type.delete(:multivalued)
|
33
|
+
index_datatype = evaluated_type.first
|
34
|
+
raise Solrizer::InvalidIndexDescriptor, "Missing datatype for #{evaluated_type}" unless index_datatype
|
35
|
+
type_suffix = config[:type_suffix].call(index_datatype)
|
36
|
+
raise Solrizer::InvalidIndexDescriptor, "Invalid datatype `#{index_datatype.inspect}'. Must be one of: :date, :text, :text_en, :string, :integer" unless type_suffix
|
37
|
+
|
38
|
+
suffix = [config[:suffix_delimiter], type_suffix, stored_suffix, index_suffix, multivalued_suffix].join
|
39
|
+
end
|
40
|
+
|
41
|
+
def converter(field_type)
|
42
|
+
@converter.call(field_type) if @converter
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
def config
|
47
|
+
@config ||=
|
48
|
+
{suffix_delimiter: '_',
|
49
|
+
type_suffix: lambda do |type|
|
50
|
+
case type
|
51
|
+
when :string, :symbol # TODO `:symbol' useage ought to be deprecated
|
52
|
+
's'
|
53
|
+
when :text
|
54
|
+
't'
|
55
|
+
when :text_en
|
56
|
+
'te'
|
57
|
+
when :date
|
58
|
+
'dt'
|
59
|
+
when :integer
|
60
|
+
'i'
|
61
|
+
end
|
62
|
+
end,
|
63
|
+
stored_suffix: 's',
|
64
|
+
index_suffix: 'i',
|
65
|
+
multivalued_suffix: 'm'}
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|