solrizer 2.2.0 → 3.0.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -5,7 +5,7 @@ Gemfile.lock
5
5
  *.sqlite3
6
6
  *.log
7
7
  *~
8
- *.swp
8
+ *.sw[pon]
9
9
 
10
10
  pkg/
11
11
  coverage/*
data/History.txt CHANGED
@@ -1,3 +1,15 @@
1
+ h2. 3.0.0
2
+ suffix changes:
3
+ _s -> _si
4
+ _t -> _tesim
5
+ _dt -> _dtsi
6
+ _i -> _isim
7
+ _sort -> _ssi
8
+ _display -> _sim
9
+ _facet -> _sim
10
+ _unstem_search -> _tim
11
+
12
+
1
13
  h2. 2.1.0
2
14
  #11 There should only be one instance of the field mapper. It's now at Solrizer.default_field_mapper
3
15
  Extract create_and_insert_terms into Solrizer::Common. This can be used for RDF datastreams
@@ -1,21 +1,6 @@
1
1
  # The goal of this method is to have no dependency on OM, so that NOM or RDF datastreams could use this.
2
2
 
3
3
  module Solrizer
4
- # Instructions on how to solrize the field (types and uses)
5
- class Directive
6
- attr_accessor :type, :index_as
7
- def initialize(*args)
8
- case args
9
- when Hash
10
- self.type = args[:type]
11
- self.index_as = args[:index_as]
12
- when Array
13
- self.type = args[0]
14
- self.index_as = args[1]
15
- end
16
- end
17
- end
18
-
19
4
  module Common
20
5
  def self.included(klass)
21
6
  klass.send(:extend, ClassMethods)
@@ -24,10 +9,10 @@ module Solrizer
24
9
  module ClassMethods
25
10
  # @param [String] field_name_base the name of the solr field (without the type suffix)
26
11
  # @param [Object] value the value to insert into the document
27
- # @param [Directive] directive instructions on which fields to create
12
+ # @param [Array] index_as list of indexers to use (e.g. [:searchable, :facetable])
28
13
  # @param [Hash] solr_doc the solr_doc to insert into.
29
- def create_and_insert_terms(field_name_base, value, directive, solr_doc)
30
- Solrizer.default_field_mapper.solr_names_and_values(field_name_base, value, directive.type, directive.index_as).each do |field_name, field_value|
14
+ def create_and_insert_terms(field_name_base, value, index_as, solr_doc)
15
+ Solrizer.default_field_mapper.solr_names_and_values(field_name_base, value, index_as).each do |field_name, field_value|
31
16
  unless field_value.join("").strip.empty?
32
17
  ::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
33
18
  end
@@ -0,0 +1,93 @@
1
+ module Solrizer
2
+ module DefaultDescriptors
3
+
4
+ # Produces a _sim suffix
5
+ def self.facetable
6
+ @facetable ||= Descriptor.new(:string, :indexed, :multivalued)
7
+ end
8
+
9
+ # Most interesting case because the suffixe produced depends on the type parameter
10
+ # produces suffixes:
11
+ # _tesim - for strings or text fields
12
+ # _dtsim - for dates
13
+ # _isim - for integers
14
+ def self.searchable
15
+ @searchable ||= Descriptor.new(searchable_field_definition, converter: searchable_converter, requires_type: true)
16
+ end
17
+
18
+ # Takes fields which are stored as strings, but we want indexed as dates. (e.g. "November 6th, 2012")
19
+ # produces suffixes:
20
+ # _dtsi - for dates
21
+ def self.dateable
22
+ @dateable ||= Descriptor.new(:date, :stored, :indexed, converter: dateable_converter)
23
+ end
24
+
25
+ # Produces a _ssim suffix
26
+ def self.symbol
27
+ @symbol ||= Descriptor.new(:string, :stored, :indexed, :multivalued)
28
+ end
29
+
30
+ # Produces a _ssi suffix
31
+ def self.sortable
32
+ @sortable ||= Descriptor.new(:string, :indexed, :stored)
33
+ end
34
+
35
+ # Produces a _ssm suffix
36
+ def self.displayable
37
+ @displayable ||= Descriptor.new(:string, :stored, :multivalued)
38
+ end
39
+
40
+ # Produces a _tim suffix (used to be _unstem)
41
+ def self.unstemmed_searchable
42
+ @unstemmed_searchable ||= Descriptor.new(:text, :indexed, :multivalued)
43
+ end
44
+
45
+ def self.simple
46
+ @simple ||= Descriptor.new(lambda {|field_type| [field_type, :indexed]})
47
+ end
48
+ protected
49
+
50
+ def self.searchable_field_definition
51
+ lambda do |type|
52
+ type = :text_en if [:string, :text].include?(type) # for backwards compatibility with old solr schema
53
+ vals = [type, :indexed, :stored]
54
+ vals << :multivalued unless type == :date
55
+ vals
56
+ end
57
+ end
58
+
59
+ def self.searchable_converter
60
+ lambda do |type|
61
+ case type
62
+ when :date
63
+ lambda { |val| iso8601_date(val)}
64
+ end
65
+ end
66
+ end
67
+
68
+ def self.dateable_converter
69
+ lambda do |type|
70
+ lambda do |val|
71
+ begin
72
+ iso8601_date(Date.parse(val))
73
+ rescue ArgumentError
74
+ nil
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+
81
+ def self.iso8601_date(value)
82
+ begin
83
+ if value.is_a?(Date)
84
+ DateTime.parse(value.to_s).to_time.utc.iso8601
85
+ elsif !value.empty?
86
+ DateTime.parse(value).to_time.utc.iso8601
87
+ end
88
+ rescue ArgumentError => e
89
+ raise ArgumentError, "Unable to parse `#{value}' as a date-time object"
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,68 @@
1
+ module Solrizer
2
+ class Descriptor
3
+ attr_reader :index_type
4
+ def initialize(*args)
5
+ if args.last.kind_of? Hash
6
+ opts = args.pop
7
+ @converter = opts[:converter]
8
+ @type_required = opts[:requires_type]
9
+ end
10
+ @index_type = args
11
+ raise Solrizer::InvalidIndexDescriptor, "Invalid index type passed to Sorizer.solr_name. It should be an array like [:string, :indexed, :stored, :multivalued]. You provided: `#{@index_type}'" unless index_type.kind_of? Array
12
+ end
13
+
14
+ def name_and_converter(field_name, args=nil)
15
+ args ||= {}
16
+ field_type = args[:type]
17
+ if type_required?
18
+ raise "Must provide a :type argument when index_type is `#{self}' for #{field_name}" unless field_type
19
+ end
20
+ [field_name.to_s + suffix(field_type), converter(field_type)]
21
+ end
22
+
23
+ def type_required?
24
+ @type_required
25
+ end
26
+
27
+ protected
28
+ def suffix(field_type)
29
+ evaluated_type = index_type.first.kind_of?(Proc) ? index_type.first.call(field_type) : index_type.dup
30
+ stored_suffix = config[:stored_suffix] if evaluated_type.delete(:stored)
31
+ index_suffix = config[:index_suffix] if evaluated_type.delete(:indexed)
32
+ multivalued_suffix = config[:multivalued_suffix] if evaluated_type.delete(:multivalued)
33
+ index_datatype = evaluated_type.first
34
+ raise Solrizer::InvalidIndexDescriptor, "Missing datatype for #{evaluated_type}" unless index_datatype
35
+ type_suffix = config[:type_suffix].call(index_datatype)
36
+ raise Solrizer::InvalidIndexDescriptor, "Invalid datatype `#{index_datatype.inspect}'. Must be one of: :date, :text, :text_en, :string, :integer" unless type_suffix
37
+
38
+ suffix = [config[:suffix_delimiter], type_suffix, stored_suffix, index_suffix, multivalued_suffix].join
39
+ end
40
+
41
+ def converter(field_type)
42
+ @converter.call(field_type) if @converter
43
+ end
44
+
45
+ private
46
+ def config
47
+ @config ||=
48
+ {suffix_delimiter: '_',
49
+ type_suffix: lambda do |type|
50
+ case type
51
+ when :string, :symbol # TODO `:symbol' useage ought to be deprecated
52
+ 's'
53
+ when :text
54
+ 't'
55
+ when :text_en
56
+ 'te'
57
+ when :date
58
+ 'dt'
59
+ when :integer
60
+ 'i'
61
+ end
62
+ end,
63
+ stored_suffix: 's',
64
+ index_suffix: 'i',
65
+ multivalued_suffix: 'm'}
66
+ end
67
+ end
68
+ end