solrizer 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 34eabc78510ac3e701056114b5d5086dcc65257e
4
- data.tar.gz: 88162b2315c00c3cdbba86761f3255c74a7ae0a4
3
+ metadata.gz: 1bfcd13e75eee3622c67f1cfac1222e73eb545b5
4
+ data.tar.gz: 844f374fc4c03f895c99a415439449344b722291
5
5
  SHA512:
6
- metadata.gz: 9c7d84ee9270e01bcec4a5c6112fa01c907897414a98e78d4de0eb7fba1ce78e41900319230d4ddcd3fb6877f1a99cd9f9ecfaf02ad81deca8286496788aa3e7
7
- data.tar.gz: a04661c08372898a9a1890b95c3911dbad938e4f7d33c3b1b353f86d299437c3f1949da6e874465d4fce87fd554f7cf86d016ec8f2039e81a97195fbcfb2e453
6
+ metadata.gz: 909cc4d4eb25b94b595c1eebbbb87921e892bc15f0fe6cb5c2b2b9ffe8865f99f85a478b5cfeb210011076ae5f31a8c75442cd2f35e5f41bd39dfb87a174ad18
7
+ data.tar.gz: f74ed09971903812dc3ae0e1ba8aaf258596226f3f258e3a4d739a6005fdcad90b3bc906bfa1202537c4808cabfcc0995f130921e8b9e7d0253ac906f10092a4
@@ -1,3 +1,8 @@
1
+ h2. 3.1.0
2
+ #16 Inserting non-multivalued fields should not create a solr error
3
+ #20 Time fields should be formatted correctly when using active_support/core_ext/date_time/conversions
4
+ #17 Solrizer should accept DateTime objects
5
+
1
6
  h2. 3.0.0 (2013-03-28)
2
7
  suffix changes:
3
8
  _s -> _sim
@@ -7,6 +7,7 @@ module Solrizer
7
7
  autoload :Descriptor
8
8
  autoload :FieldMapper
9
9
  autoload :DefaultDescriptors
10
+ autoload :Suffix
10
11
  autoload :HTML, 'solrizer/html'
11
12
  autoload :VERSION, 'solrizer/version'
12
13
  autoload :XML, 'solrizer/xml'
@@ -38,7 +39,11 @@ module Solrizer
38
39
  indexer_args = [:stored_searchable] if indexer_args.empty?
39
40
  default_field_mapper.solr_names_and_values(name, value, indexer_args).each do |k, v|
40
41
  doc[k] ||= []
41
- doc[k] += v
42
+ if v.is_a? Array
43
+ doc[k] += v
44
+ else
45
+ doc[k] = v
46
+ end
42
47
  end
43
48
  doc
44
49
  end
@@ -12,10 +12,8 @@ module Solrizer
12
12
  # @param [Array] index_as list of indexers to use (e.g. [:searchable, :facetable])
13
13
  # @param [Hash] solr_doc the solr_doc to insert into.
14
14
  def create_and_insert_terms(field_name_base, value, index_as, solr_doc)
15
- Solrizer.default_field_mapper.solr_names_and_values(field_name_base, value, index_as).each do |field_name, field_value|
16
- unless field_value.join("").strip.empty?
17
- ::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
18
- end
15
+ index_as.each do |indexer|
16
+ Solrizer.insert_field(solr_doc, field_name_base, value, indexer)
19
17
  end
20
18
  end
21
19
  end
@@ -29,7 +29,8 @@ module Solrizer
29
29
  @facetable ||= Descriptor.new(:string, :indexed, :multivalued)
30
30
  end
31
31
 
32
- # Produces _ssim suffix; should probably be deprecated
32
+ # Produces _ssim suffix
33
+ # This is useful for when you only want to match whole words, such as user/group names from the the rightsMetadata datastream
33
34
  def self.symbol
34
35
  @symbol ||= Descriptor.new(:string, :stored, :indexed, :multivalued)
35
36
  end
@@ -113,9 +114,9 @@ module Solrizer
113
114
  def self.iso8601_date(value)
114
115
  begin
115
116
  if value.is_a?(Date) || value.is_a?(Time)
116
- DateTime.parse(value.to_s).to_time.utc.iso8601
117
+ DateTime.parse(value.to_s).to_time.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
117
118
  elsif !value.empty?
118
- DateTime.parse(value).to_time.utc.iso8601
119
+ DateTime.parse(value).to_time.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
119
120
  end
120
121
  rescue ArgumentError => e
121
122
  raise ArgumentError, "Unable to parse `#{value}' as a date-time object"
@@ -15,7 +15,7 @@ module Solrizer
15
15
  args ||= {}
16
16
  field_type = args[:type]
17
17
  if type_required?
18
- raise "Must provide a :type argument when index_type is `#{self}' for #{field_name}" unless field_type
18
+ raise ArgumentError, "Must provide a :type argument when index_type is `#{self}' for #{field_name}" unless field_type
19
19
  end
20
20
  [field_name.to_s + suffix(field_type), converter(field_type)]
21
21
  end
@@ -24,45 +24,20 @@ module Solrizer
24
24
  @type_required
25
25
  end
26
26
 
27
+ def evaluate_suffix(field_type)
28
+ Suffix.new(index_type.first.kind_of?(Proc) ? index_type.first.call(field_type) : index_type.dup)
29
+ end
30
+
27
31
  protected
28
- def suffix(field_type)
29
- evaluated_type = index_type.first.kind_of?(Proc) ? index_type.first.call(field_type) : index_type.dup
30
- stored_suffix = config[:stored_suffix] if evaluated_type.delete(:stored)
31
- index_suffix = config[:index_suffix] if evaluated_type.delete(:indexed)
32
- multivalued_suffix = config[:multivalued_suffix] if evaluated_type.delete(:multivalued)
33
- index_datatype = evaluated_type.first
34
- raise Solrizer::InvalidIndexDescriptor, "Missing datatype for #{evaluated_type}" unless index_datatype
35
- type_suffix = config[:type_suffix].call(index_datatype)
36
- raise Solrizer::InvalidIndexDescriptor, "Invalid datatype `#{index_datatype.inspect}'. Must be one of: :date, :time, :text, :text_en, :string, :integer" unless type_suffix
37
32
 
38
- suffix = [config[:suffix_delimiter], type_suffix, stored_suffix, index_suffix, multivalued_suffix].join
33
+
34
+ # Suffix can be overridden if you want a different method of grabbing the suffix
35
+ def suffix(field_type)
36
+ evaluate_suffix(field_type).to_s
39
37
  end
40
38
 
41
39
  def converter(field_type)
42
40
  @converter.call(field_type) if @converter
43
41
  end
44
-
45
- private
46
- def config
47
- @config ||=
48
- {suffix_delimiter: '_',
49
- type_suffix: lambda do |type|
50
- case type
51
- when :string, :symbol # TODO `:symbol' usage ought to be deprecated
52
- 's'
53
- when :text
54
- 't'
55
- when :text_en
56
- 'te'
57
- when :date, :time
58
- 'dt'
59
- when :integer
60
- 'i'
61
- end
62
- end,
63
- stored_suffix: 's',
64
- index_suffix: 'i',
65
- multivalued_suffix: 'm'}
66
- end
67
42
  end
68
43
  end
@@ -18,8 +18,11 @@ class Extractor
18
18
  # @param [String] field_value
19
19
  def self.insert_solr_field_value(solr_doc, field_name, field_value)
20
20
  formatted_value = self.format_node_value(field_value)
21
- solr_doc[field_name] ||= []
22
- solr_doc[field_name] << formatted_value
21
+ if solr_doc[field_name]
22
+ solr_doc[field_name] = Array(solr_doc[field_name]) << formatted_value
23
+ else
24
+ solr_doc[field_name] = formatted_value
25
+ end
23
26
  return solr_doc
24
27
  end
25
28
 
@@ -168,6 +168,8 @@ module Solrizer
168
168
  when NilClass
169
169
  when Fixnum
170
170
  :integer
171
+ when DateTime
172
+ :time
171
173
  else
172
174
  value.class.to_s.underscore.to_sym
173
175
  end
@@ -199,8 +201,9 @@ module Solrizer
199
201
  index_types.each do |index_type|
200
202
  Array(field_value).each do |single_value|
201
203
  # Get mapping for field
202
- name, converter = indexer(index_type).name_and_converter(field_name, type: extract_type(single_value))
203
- #name, converter = solr_name_and_converter(field_name, index_type, field_type)
204
+ descriptor = indexer(index_type)
205
+ data_type = extract_type(single_value)
206
+ name, converter = descriptor.name_and_converter(field_name, type: data_type)
204
207
  next unless name
205
208
 
206
209
  # Is there a custom converter?
@@ -217,8 +220,13 @@ module Solrizer
217
220
  end
218
221
 
219
222
  # Add mapped name & value, unless it's a duplicate
220
- values = (results[name] ||= [])
221
- values << value unless value.nil? || values.include?(value)
223
+ if descriptor.evaluate_suffix(data_type).multivalued?
224
+ values = (results[name] ||= [])
225
+ values << value unless value.nil? || values.include?(value)
226
+ else
227
+ logger.warn "Setting #{name} to `#{value}', but it already had `#{results[name]}'" if results[name]
228
+ results[name] = value
229
+ end
222
230
  end
223
231
  end
224
232
 
@@ -0,0 +1,59 @@
1
+ module Solrizer
2
+ class Suffix
3
+
4
+ def initialize(fields)
5
+ @fields = fields
6
+ end
7
+
8
+ def multivalued?
9
+ @fields.include? :multivalued
10
+ end
11
+
12
+ def stored?
13
+ @fields.include? :stored
14
+ end
15
+
16
+ def indexed?
17
+ @fields.include? :indexed
18
+ end
19
+
20
+ def data_type
21
+ @fields.first
22
+ end
23
+
24
+ def to_s
25
+ stored_suffix = config[:stored_suffix] if stored?
26
+ index_suffix = config[:index_suffix] if indexed?
27
+ multivalued_suffix = config[:multivalued_suffix] if multivalued?
28
+ raise Solrizer::InvalidIndexDescriptor, "Missing datatype for #{@fields}" unless data_type
29
+ type_suffix = config[:type_suffix].call(data_type)
30
+ raise Solrizer::InvalidIndexDescriptor, "Invalid datatype `#{data_type.inspect}'. Must be one of: :date, :time, :text, :text_en, :string, :integer" unless type_suffix
31
+
32
+ [config[:suffix_delimiter], type_suffix, stored_suffix, index_suffix, multivalued_suffix].join
33
+ end
34
+
35
+
36
+ private
37
+ def config
38
+ @config ||=
39
+ {suffix_delimiter: '_',
40
+ type_suffix: lambda do |type|
41
+ case type
42
+ when :string, :symbol # TODO `:symbol' usage ought to be deprecated
43
+ 's'
44
+ when :text
45
+ 't'
46
+ when :text_en
47
+ 'te'
48
+ when :date, :time
49
+ 'dt'
50
+ when :integer
51
+ 'i'
52
+ end
53
+ end,
54
+ stored_suffix: 's',
55
+ index_suffix: 'i',
56
+ multivalued_suffix: 'm'}
57
+ end
58
+ end
59
+ end
@@ -1,3 +1,3 @@
1
1
  module Solrizer
2
- VERSION = "3.0.0"
2
+ VERSION = "3.1.0"
3
3
  end
@@ -13,7 +13,7 @@ describe Solrizer::Common do
13
13
  it "should handle many field types" do
14
14
  solr_doc = {}
15
15
  Foo.create_and_insert_terms('my_name', 'value', [:displayable, :searchable, :sortable], solr_doc)
16
- solr_doc.should == {'my_name_ssm' => ['value'], 'my_name_si' => ['value'], 'my_name_teim' => ['value']}
16
+ solr_doc.should == {'my_name_ssm' => ['value'], 'my_name_si' => 'value', 'my_name_teim' => ['value']}
17
17
  end
18
18
 
19
19
  it "should handle dates that are searchable" do
@@ -25,7 +25,7 @@ describe Solrizer::Common do
25
25
  it "should handle dates that are stored_sortable" do
26
26
  solr_doc = {}
27
27
  Foo.create_and_insert_terms('my_name', Date.parse('2013-01-10'), [:stored_sortable], solr_doc)
28
- solr_doc.should == {'my_name_dtsi' => ['2013-01-10T00:00:00Z']}
28
+ solr_doc.should == {'my_name_dtsi' => '2013-01-10T00:00:00Z'}
29
29
  end
30
30
 
31
31
  it "should handle dates that are displayable" do
@@ -37,6 +37,6 @@ describe Solrizer::Common do
37
37
  it "should handle dates that are sortable" do
38
38
  solr_doc = {}
39
39
  Foo.create_and_insert_terms('my_name', Date.parse('2013-01-10'), [:sortable], solr_doc)
40
- solr_doc.should == {'my_name_dti' => ['2013-01-10T00:00:00Z']}
40
+ solr_doc.should == {'my_name_dti' => '2013-01-10T00:00:00Z'}
41
41
  end
42
42
  end
@@ -24,8 +24,21 @@ describe Solrizer::Extractor do
24
24
  it "should initialize a solr doc list if it is nil" do
25
25
  solr_doc = {'title_tesim' => nil }
26
26
  Solrizer::Extractor.insert_solr_field_value(solr_doc, 'title_tesim', 'Frank')
27
- solr_doc.should == {"title_tesim"=>["Frank"]}
27
+ solr_doc.should == {"title_tesim"=>"Frank"}
28
28
  end
29
+ it "should insert multiple" do
30
+ solr_doc = {'title_tesim' => nil }
31
+ Solrizer::Extractor.insert_solr_field_value(solr_doc, 'title_tesim', 'Frank')
32
+ Solrizer::Extractor.insert_solr_field_value(solr_doc, 'title_tesim', 'Margret')
33
+ Solrizer::Extractor.insert_solr_field_value(solr_doc, 'title_tesim', 'Joyce')
34
+ solr_doc.should == {"title_tesim"=>["Frank", 'Margret', 'Joyce']}
35
+ end
36
+ it "should not make a list if a single valued field is passed in" do
37
+ solr_doc = {}
38
+ Solrizer::Extractor.insert_solr_field_value(solr_doc, 'title_dtsi', '2013-03-22T12:33:00Z')
39
+ solr_doc.should == {"title_dtsi"=>"2013-03-22T12:33:00Z"}
40
+ end
41
+
29
42
  end
30
43
 
31
44
  end
@@ -37,18 +37,27 @@ describe Solrizer::FieldMapper do
37
37
  def name_and_converter(field_name, args)
38
38
  [field_name + '_s', lambda { |value| "#{value} o'clock" }]
39
39
  end
40
+ def index_type
41
+ [:multivalued]
42
+ end
40
43
  end
41
44
 
42
45
  class StoredSearchableDescriptor < Solrizer::Descriptor
43
46
  def name_and_converter(field_name, args)
44
47
  [field_name.to_s + '_s']
45
48
  end
49
+ def index_type
50
+ [:multivalued]
51
+ end
46
52
  end
47
53
 
48
54
  class EdibleDescriptor < Solrizer::Descriptor
49
55
  def name_and_converter(field_name, args)
50
56
  [field_name + '_food']
51
57
  end
58
+ def index_type
59
+ [:multivalued]
60
+ end
52
61
  end
53
62
 
54
63
  class FungibleDescriptor < Solrizer::Descriptor
@@ -94,6 +103,9 @@ describe Solrizer::FieldMapper do
94
103
  end
95
104
  end
96
105
  end
106
+ def index_type
107
+ [:multivalued]
108
+ end
97
109
  end
98
110
  end
99
111
 
@@ -146,6 +158,7 @@ describe Solrizer::FieldMapper do
146
158
  @mapper.extract_type(nil).should == nil
147
159
  @mapper.extract_type(Date.today).should == :date
148
160
  @mapper.extract_type(Time.now).should == :time
161
+ @mapper.extract_type(DateTime.now).should == :time
149
162
  @mapper.extract_type("Hi").should == :string
150
163
  end
151
164
  end
@@ -269,16 +282,16 @@ describe Solrizer::FieldMapper do
269
282
  "foo_tesim" => ["bar"], #stored_searchable
270
283
  "foo_ssm" => ["bar"], #displayable
271
284
  "foo_sim" => ["bar"], #facetable
272
- "foo_si" => ["bar"], #sortable
273
- "foo_ssi" => ["bar"], #stored_sortable
285
+ "foo_si" => "bar", #sortable
286
+ "foo_ssi" => "bar", #stored_sortable
274
287
  "foo_tim" => ["bar"] #unstemmed_searchable
275
288
  }
276
289
  end
277
290
 
278
291
  it "should support stored_sortable" do
279
292
  time = Time.iso8601("2012-11-06T15:16:17Z")
280
- @mapper.solr_names_and_values('foo', time, :stored_sortable).should == {"foo_dtsi" => ["2012-11-06T15:16:17Z"]}
281
- @mapper.solr_names_and_values('foo', 'bar', :stored_sortable).should == {"foo_ssi" => ["bar"]}
293
+ @mapper.solr_names_and_values('foo', time, :stored_sortable).should == {"foo_dtsi" => "2012-11-06T15:16:17Z"}
294
+ @mapper.solr_names_and_values('foo', 'bar', :stored_sortable).should == {"foo_ssi" => "bar"}
282
295
  end
283
296
  end
284
297
  end
@@ -9,9 +9,13 @@ describe Solrizer do
9
9
  Solrizer.insert_field(doc, 'foo', 'A name')
10
10
  doc.should == {'foo_tesim' => ['A name']}
11
11
  end
12
+ it "should not create an array of fields that are not multivalued" do
13
+ Solrizer.insert_field(doc, 'foo', 'A name', :sortable)
14
+ doc.should == {'foo_si' => 'A name'}
15
+ end
12
16
  it "should insert a field with multiple indexers" do
13
17
  Solrizer.insert_field(doc, 'foo', 'A name', :sortable, :facetable)
14
- doc.should == {'foo_si' => ['A name'], 'foo_sim' => ['A name']}
18
+ doc.should == {'foo_si' => 'A name', 'foo_sim' => ['A name']}
15
19
  end
16
20
  it "should insert Dates" do
17
21
  Solrizer.insert_field(doc, 'foo', Date.parse('2013-01-13'))
@@ -24,17 +28,16 @@ describe Solrizer do
24
28
 
25
29
  it "should insert multiple values" do
26
30
  Solrizer.insert_field(doc, 'foo', ['A name', 'B name'], :sortable, :facetable)
27
- # NOTE: is this desired behavior for non-multivalued fields, like :sortable ?
28
- doc.should == {'foo_si' => ['A name', 'B name'], 'foo_sim' => ['A name', 'B name']}
31
+ doc.should == {'foo_si' => 'B name', 'foo_sim' => ['A name', 'B name']}
29
32
  end
30
33
  end
31
34
 
32
35
  describe "on a document with values" do
33
- before{ @doc = {'foo_si' => ['A name'], 'foo_sim' => ['A name']}}
36
+ before{ @doc = {'foo_si' => 'A name', 'foo_sim' => ['A name']}}
34
37
 
35
- it "should not overwrite values that exist before" do
38
+ it "should not overwrite muli-values that exist before" do
36
39
  Solrizer.insert_field(@doc, 'foo', 'B name', :sortable, :facetable)
37
- @doc.should == {'foo_si' => ['A name', 'B name'], 'foo_sim' => ['A name', 'B name']}
40
+ @doc.should == {'foo_si' => 'B name', 'foo_sim' => ['A name', 'B name']}
38
41
  end
39
42
  end
40
43
  end
@@ -44,7 +47,7 @@ describe Solrizer do
44
47
 
45
48
  it "should overwrite values that exist before" do
46
49
  Solrizer.set_field(@doc, 'foo', 'B name', :sortable, :facetable)
47
- @doc.should == {'foo_si' => ['B name'], 'foo_sim' => ['B name']}
50
+ @doc.should == {'foo_si' => 'B name', 'foo_sim' => ['B name']}
48
51
  end
49
52
  end
50
53
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: solrizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Zumwalt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-28 00:00:00.000000000 Z
11
+ date: 2013-05-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -177,6 +177,7 @@ files:
177
177
  - lib/solrizer/field_mapper.rb
178
178
  - lib/solrizer/html.rb
179
179
  - lib/solrizer/html/extractor.rb
180
+ - lib/solrizer/suffix.rb
180
181
  - lib/solrizer/version.rb
181
182
  - lib/solrizer/xml.rb
182
183
  - lib/solrizer/xml/extractor.rb
@@ -209,7 +210,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
209
210
  version: '0'
210
211
  requirements: []
211
212
  rubyforge_project:
212
- rubygems_version: 2.0.0
213
+ rubygems_version: 2.0.3
213
214
  signing_key:
214
215
  specification_version: 4
215
216
  summary: A utility for building solr indexes, usually from Fedora repository content