solrizer 2.2.0 → 3.0.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -5,7 +5,7 @@ Gemfile.lock
5
5
  *.sqlite3
6
6
  *.log
7
7
  *~
8
- *.swp
8
+ *.sw[pon]
9
9
 
10
10
  pkg/
11
11
  coverage/*
data/History.txt CHANGED
@@ -1,3 +1,15 @@
1
+ h2. 3.0.0
2
+ suffix changes:
3
+ _s -> _si
4
+ _t -> _tesim
5
+ _dt -> _dtsi
6
+ _i -> _isim
7
+ _sort -> _ssi
8
+ _display -> _sim
9
+ _facet -> _sim
10
+ _unstem_search -> _tim
11
+
12
+
1
13
  h2. 2.1.0
2
14
  #11 There should only be one instance of the field mapper. It's now at Solrizer.default_field_mapper
3
15
  Extract create_and_insert_terms into Solrizer::Common. This can be used for RDF datastreams
@@ -1,21 +1,6 @@
1
1
  # The goal of this method is to have no dependency on OM, so that NOM or RDF datastreams could use this.
2
2
 
3
3
  module Solrizer
4
- # Instructions on how to solrize the field (types and uses)
5
- class Directive
6
- attr_accessor :type, :index_as
7
- def initialize(*args)
8
- case args
9
- when Hash
10
- self.type = args[:type]
11
- self.index_as = args[:index_as]
12
- when Array
13
- self.type = args[0]
14
- self.index_as = args[1]
15
- end
16
- end
17
- end
18
-
19
4
  module Common
20
5
  def self.included(klass)
21
6
  klass.send(:extend, ClassMethods)
@@ -24,10 +9,10 @@ module Solrizer
24
9
  module ClassMethods
25
10
  # @param [String] field_name_base the name of the solr field (without the type suffix)
26
11
  # @param [Object] value the value to insert into the document
27
- # @param [Directive] directive instructions on which fields to create
12
+ # @param [Array] index_as list of indexers to use (e.g. [:searchable, :facetable])
28
13
  # @param [Hash] solr_doc the solr_doc to insert into.
29
- def create_and_insert_terms(field_name_base, value, directive, solr_doc)
30
- Solrizer.default_field_mapper.solr_names_and_values(field_name_base, value, directive.type, directive.index_as).each do |field_name, field_value|
14
+ def create_and_insert_terms(field_name_base, value, index_as, solr_doc)
15
+ Solrizer.default_field_mapper.solr_names_and_values(field_name_base, value, index_as).each do |field_name, field_value|
31
16
  unless field_value.join("").strip.empty?
32
17
  ::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
33
18
  end
@@ -0,0 +1,93 @@
1
+ module Solrizer
2
+ module DefaultDescriptors
3
+
4
+ # Produces a _sim suffix
5
+ def self.facetable
6
+ @facetable ||= Descriptor.new(:string, :indexed, :multivalued)
7
+ end
8
+
9
+ # Most interesting case because the suffixe produced depends on the type parameter
10
+ # produces suffixes:
11
+ # _tesim - for strings or text fields
12
+ # _dtsim - for dates
13
+ # _isim - for integers
14
+ def self.searchable
15
+ @searchable ||= Descriptor.new(searchable_field_definition, converter: searchable_converter, requires_type: true)
16
+ end
17
+
18
+ # Takes fields which are stored as strings, but we want indexed as dates. (e.g. "November 6th, 2012")
19
+ # produces suffixes:
20
+ # _dtsi - for dates
21
+ def self.dateable
22
+ @dateable ||= Descriptor.new(:date, :stored, :indexed, converter: dateable_converter)
23
+ end
24
+
25
+ # Produces a _ssim suffix
26
+ def self.symbol
27
+ @symbol ||= Descriptor.new(:string, :stored, :indexed, :multivalued)
28
+ end
29
+
30
+ # Produces a _ssi suffix
31
+ def self.sortable
32
+ @sortable ||= Descriptor.new(:string, :indexed, :stored)
33
+ end
34
+
35
+ # Produces a _ssm suffix
36
+ def self.displayable
37
+ @displayable ||= Descriptor.new(:string, :stored, :multivalued)
38
+ end
39
+
40
+ # Produces a _tim suffix (used to be _unstem)
41
+ def self.unstemmed_searchable
42
+ @unstemmed_searchable ||= Descriptor.new(:text, :indexed, :multivalued)
43
+ end
44
+
45
+ def self.simple
46
+ @simple ||= Descriptor.new(lambda {|field_type| [field_type, :indexed]})
47
+ end
48
+ protected
49
+
50
+ def self.searchable_field_definition
51
+ lambda do |type|
52
+ type = :text_en if [:string, :text].include?(type) # for backwards compatibility with old solr schema
53
+ vals = [type, :indexed, :stored]
54
+ vals << :multivalued unless type == :date
55
+ vals
56
+ end
57
+ end
58
+
59
+ def self.searchable_converter
60
+ lambda do |type|
61
+ case type
62
+ when :date
63
+ lambda { |val| iso8601_date(val)}
64
+ end
65
+ end
66
+ end
67
+
68
+ def self.dateable_converter
69
+ lambda do |type|
70
+ lambda do |val|
71
+ begin
72
+ iso8601_date(Date.parse(val))
73
+ rescue ArgumentError
74
+ nil
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+
81
+ def self.iso8601_date(value)
82
+ begin
83
+ if value.is_a?(Date)
84
+ DateTime.parse(value.to_s).to_time.utc.iso8601
85
+ elsif !value.empty?
86
+ DateTime.parse(value).to_time.utc.iso8601
87
+ end
88
+ rescue ArgumentError => e
89
+ raise ArgumentError, "Unable to parse `#{value}' as a date-time object"
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,68 @@
1
+ module Solrizer
2
+ class Descriptor
3
+ attr_reader :index_type
4
+ def initialize(*args)
5
+ if args.last.kind_of? Hash
6
+ opts = args.pop
7
+ @converter = opts[:converter]
8
+ @type_required = opts[:requires_type]
9
+ end
10
+ @index_type = args
11
+ raise Solrizer::InvalidIndexDescriptor, "Invalid index type passed to Sorizer.solr_name. It should be an array like [:string, :indexed, :stored, :multivalued]. You provided: `#{@index_type}'" unless index_type.kind_of? Array
12
+ end
13
+
14
+ def name_and_converter(field_name, args=nil)
15
+ args ||= {}
16
+ field_type = args[:type]
17
+ if type_required?
18
+ raise "Must provide a :type argument when index_type is `#{self}' for #{field_name}" unless field_type
19
+ end
20
+ [field_name.to_s + suffix(field_type), converter(field_type)]
21
+ end
22
+
23
+ def type_required?
24
+ @type_required
25
+ end
26
+
27
+ protected
28
+ def suffix(field_type)
29
+ evaluated_type = index_type.first.kind_of?(Proc) ? index_type.first.call(field_type) : index_type.dup
30
+ stored_suffix = config[:stored_suffix] if evaluated_type.delete(:stored)
31
+ index_suffix = config[:index_suffix] if evaluated_type.delete(:indexed)
32
+ multivalued_suffix = config[:multivalued_suffix] if evaluated_type.delete(:multivalued)
33
+ index_datatype = evaluated_type.first
34
+ raise Solrizer::InvalidIndexDescriptor, "Missing datatype for #{evaluated_type}" unless index_datatype
35
+ type_suffix = config[:type_suffix].call(index_datatype)
36
+ raise Solrizer::InvalidIndexDescriptor, "Invalid datatype `#{index_datatype.inspect}'. Must be one of: :date, :text, :text_en, :string, :integer" unless type_suffix
37
+
38
+ suffix = [config[:suffix_delimiter], type_suffix, stored_suffix, index_suffix, multivalued_suffix].join
39
+ end
40
+
41
+ def converter(field_type)
42
+ @converter.call(field_type) if @converter
43
+ end
44
+
45
+ private
46
+ def config
47
+ @config ||=
48
+ {suffix_delimiter: '_',
49
+ type_suffix: lambda do |type|
50
+ case type
51
+ when :string, :symbol # TODO `:symbol' useage ought to be deprecated
52
+ 's'
53
+ when :text
54
+ 't'
55
+ when :text_en
56
+ 'te'
57
+ when :date
58
+ 'dt'
59
+ when :integer
60
+ 'i'
61
+ end
62
+ end,
63
+ stored_suffix: 's',
64
+ index_suffix: 'i',
65
+ multivalued_suffix: 'm'}
66
+ end
67
+ end
68
+ end