solrizer 3.0.0 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 34eabc78510ac3e701056114b5d5086dcc65257e
4
- data.tar.gz: 88162b2315c00c3cdbba86761f3255c74a7ae0a4
3
+ metadata.gz: 1bfcd13e75eee3622c67f1cfac1222e73eb545b5
4
+ data.tar.gz: 844f374fc4c03f895c99a415439449344b722291
5
5
  SHA512:
6
- metadata.gz: 9c7d84ee9270e01bcec4a5c6112fa01c907897414a98e78d4de0eb7fba1ce78e41900319230d4ddcd3fb6877f1a99cd9f9ecfaf02ad81deca8286496788aa3e7
7
- data.tar.gz: a04661c08372898a9a1890b95c3911dbad938e4f7d33c3b1b353f86d299437c3f1949da6e874465d4fce87fd554f7cf86d016ec8f2039e81a97195fbcfb2e453
6
+ metadata.gz: 909cc4d4eb25b94b595c1eebbbb87921e892bc15f0fe6cb5c2b2b9ffe8865f99f85a478b5cfeb210011076ae5f31a8c75442cd2f35e5f41bd39dfb87a174ad18
7
+ data.tar.gz: f74ed09971903812dc3ae0e1ba8aaf258596226f3f258e3a4d739a6005fdcad90b3bc906bfa1202537c4808cabfcc0995f130921e8b9e7d0253ac906f10092a4
@@ -1,3 +1,8 @@
1
+ h2. 3.1.0
2
+ #16 Inserting non-multivalued fields should not create a solr error
3
+ #20 Time fields should be formatted correctly when using active_support/core_ext/date_time/conversions
4
+ #17 Solrizer should accept DateTime objects
5
+
1
6
  h2. 3.0.0 (2013-03-28)
2
7
  suffix changes:
3
8
  _s -> _sim
@@ -7,6 +7,7 @@ module Solrizer
7
7
  autoload :Descriptor
8
8
  autoload :FieldMapper
9
9
  autoload :DefaultDescriptors
10
+ autoload :Suffix
10
11
  autoload :HTML, 'solrizer/html'
11
12
  autoload :VERSION, 'solrizer/version'
12
13
  autoload :XML, 'solrizer/xml'
@@ -38,7 +39,11 @@ module Solrizer
38
39
  indexer_args = [:stored_searchable] if indexer_args.empty?
39
40
  default_field_mapper.solr_names_and_values(name, value, indexer_args).each do |k, v|
40
41
  doc[k] ||= []
41
- doc[k] += v
42
+ if v.is_a? Array
43
+ doc[k] += v
44
+ else
45
+ doc[k] = v
46
+ end
42
47
  end
43
48
  doc
44
49
  end
@@ -12,10 +12,8 @@ module Solrizer
12
12
  # @param [Array] index_as list of indexers to use (e.g. [:searchable, :facetable])
13
13
  # @param [Hash] solr_doc the solr_doc to insert into.
14
14
  def create_and_insert_terms(field_name_base, value, index_as, solr_doc)
15
- Solrizer.default_field_mapper.solr_names_and_values(field_name_base, value, index_as).each do |field_name, field_value|
16
- unless field_value.join("").strip.empty?
17
- ::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
18
- end
15
+ index_as.each do |indexer|
16
+ Solrizer.insert_field(solr_doc, field_name_base, value, indexer)
19
17
  end
20
18
  end
21
19
  end
@@ -29,7 +29,8 @@ module Solrizer
29
29
  @facetable ||= Descriptor.new(:string, :indexed, :multivalued)
30
30
  end
31
31
 
32
- # Produces _ssim suffix; should probably be deprecated
32
+ # Produces _ssim suffix
33
+ # This is useful for when you only want to match whole words, such as user/group names from the the rightsMetadata datastream
33
34
  def self.symbol
34
35
  @symbol ||= Descriptor.new(:string, :stored, :indexed, :multivalued)
35
36
  end
@@ -113,9 +114,9 @@ module Solrizer
113
114
  def self.iso8601_date(value)
114
115
  begin
115
116
  if value.is_a?(Date) || value.is_a?(Time)
116
- DateTime.parse(value.to_s).to_time.utc.iso8601
117
+ DateTime.parse(value.to_s).to_time.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
117
118
  elsif !value.empty?
118
- DateTime.parse(value).to_time.utc.iso8601
119
+ DateTime.parse(value).to_time.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
119
120
  end
120
121
  rescue ArgumentError => e
121
122
  raise ArgumentError, "Unable to parse `#{value}' as a date-time object"
@@ -15,7 +15,7 @@ module Solrizer
15
15
  args ||= {}
16
16
  field_type = args[:type]
17
17
  if type_required?
18
- raise "Must provide a :type argument when index_type is `#{self}' for #{field_name}" unless field_type
18
+ raise ArgumentError, "Must provide a :type argument when index_type is `#{self}' for #{field_name}" unless field_type
19
19
  end
20
20
  [field_name.to_s + suffix(field_type), converter(field_type)]
21
21
  end
@@ -24,45 +24,20 @@ module Solrizer
24
24
  @type_required
25
25
  end
26
26
 
27
+ def evaluate_suffix(field_type)
28
+ Suffix.new(index_type.first.kind_of?(Proc) ? index_type.first.call(field_type) : index_type.dup)
29
+ end
30
+
27
31
  protected
28
- def suffix(field_type)
29
- evaluated_type = index_type.first.kind_of?(Proc) ? index_type.first.call(field_type) : index_type.dup
30
- stored_suffix = config[:stored_suffix] if evaluated_type.delete(:stored)
31
- index_suffix = config[:index_suffix] if evaluated_type.delete(:indexed)
32
- multivalued_suffix = config[:multivalued_suffix] if evaluated_type.delete(:multivalued)
33
- index_datatype = evaluated_type.first
34
- raise Solrizer::InvalidIndexDescriptor, "Missing datatype for #{evaluated_type}" unless index_datatype
35
- type_suffix = config[:type_suffix].call(index_datatype)
36
- raise Solrizer::InvalidIndexDescriptor, "Invalid datatype `#{index_datatype.inspect}'. Must be one of: :date, :time, :text, :text_en, :string, :integer" unless type_suffix
37
32
 
38
- suffix = [config[:suffix_delimiter], type_suffix, stored_suffix, index_suffix, multivalued_suffix].join
33
+
34
+ # Suffix can be overridden if you want a different method of grabbing the suffix
35
+ def suffix(field_type)
36
+ evaluate_suffix(field_type).to_s
39
37
  end
40
38
 
41
39
  def converter(field_type)
42
40
  @converter.call(field_type) if @converter
43
41
  end
44
-
45
- private
46
- def config
47
- @config ||=
48
- {suffix_delimiter: '_',
49
- type_suffix: lambda do |type|
50
- case type
51
- when :string, :symbol # TODO `:symbol' usage ought to be deprecated
52
- 's'
53
- when :text
54
- 't'
55
- when :text_en
56
- 'te'
57
- when :date, :time
58
- 'dt'
59
- when :integer
60
- 'i'
61
- end
62
- end,
63
- stored_suffix: 's',
64
- index_suffix: 'i',
65
- multivalued_suffix: 'm'}
66
- end
67
42
  end
68
43
  end
@@ -18,8 +18,11 @@ class Extractor
18
18
  # @param [String] field_value
19
19
  def self.insert_solr_field_value(solr_doc, field_name, field_value)
20
20
  formatted_value = self.format_node_value(field_value)
21
- solr_doc[field_name] ||= []
22
- solr_doc[field_name] << formatted_value
21
+ if solr_doc[field_name]
22
+ solr_doc[field_name] = Array(solr_doc[field_name]) << formatted_value
23
+ else
24
+ solr_doc[field_name] = formatted_value
25
+ end
23
26
  return solr_doc
24
27
  end
25
28
 
@@ -168,6 +168,8 @@ module Solrizer
168
168
  when NilClass
169
169
  when Fixnum
170
170
  :integer
171
+ when DateTime
172
+ :time
171
173
  else
172
174
  value.class.to_s.underscore.to_sym
173
175
  end
@@ -199,8 +201,9 @@ module Solrizer
199
201
  index_types.each do |index_type|
200
202
  Array(field_value).each do |single_value|
201
203
  # Get mapping for field
202
- name, converter = indexer(index_type).name_and_converter(field_name, type: extract_type(single_value))
203
- #name, converter = solr_name_and_converter(field_name, index_type, field_type)
204
+ descriptor = indexer(index_type)
205
+ data_type = extract_type(single_value)
206
+ name, converter = descriptor.name_and_converter(field_name, type: data_type)
204
207
  next unless name
205
208
 
206
209
  # Is there a custom converter?
@@ -217,8 +220,13 @@ module Solrizer
217
220
  end
218
221
 
219
222
  # Add mapped name & value, unless it's a duplicate
220
- values = (results[name] ||= [])
221
- values << value unless value.nil? || values.include?(value)
223
+ if descriptor.evaluate_suffix(data_type).multivalued?
224
+ values = (results[name] ||= [])
225
+ values << value unless value.nil? || values.include?(value)
226
+ else
227
+ logger.warn "Setting #{name} to `#{value}', but it already had `#{results[name]}'" if results[name]
228
+ results[name] = value
229
+ end
222
230
  end
223
231
  end
224
232
 
@@ -0,0 +1,59 @@
1
+ module Solrizer
2
+ class Suffix
3
+
4
+ def initialize(fields)
5
+ @fields = fields
6
+ end
7
+
8
+ def multivalued?
9
+ @fields.include? :multivalued
10
+ end
11
+
12
+ def stored?
13
+ @fields.include? :stored
14
+ end
15
+
16
+ def indexed?
17
+ @fields.include? :indexed
18
+ end
19
+
20
+ def data_type
21
+ @fields.first
22
+ end
23
+
24
+ def to_s
25
+ stored_suffix = config[:stored_suffix] if stored?
26
+ index_suffix = config[:index_suffix] if indexed?
27
+ multivalued_suffix = config[:multivalued_suffix] if multivalued?
28
+ raise Solrizer::InvalidIndexDescriptor, "Missing datatype for #{@fields}" unless data_type
29
+ type_suffix = config[:type_suffix].call(data_type)
30
+ raise Solrizer::InvalidIndexDescriptor, "Invalid datatype `#{data_type.inspect}'. Must be one of: :date, :time, :text, :text_en, :string, :integer" unless type_suffix
31
+
32
+ [config[:suffix_delimiter], type_suffix, stored_suffix, index_suffix, multivalued_suffix].join
33
+ end
34
+
35
+
36
+ private
37
+ def config
38
+ @config ||=
39
+ {suffix_delimiter: '_',
40
+ type_suffix: lambda do |type|
41
+ case type
42
+ when :string, :symbol # TODO `:symbol' usage ought to be deprecated
43
+ 's'
44
+ when :text
45
+ 't'
46
+ when :text_en
47
+ 'te'
48
+ when :date, :time
49
+ 'dt'
50
+ when :integer
51
+ 'i'
52
+ end
53
+ end,
54
+ stored_suffix: 's',
55
+ index_suffix: 'i',
56
+ multivalued_suffix: 'm'}
57
+ end
58
+ end
59
+ end
@@ -1,3 +1,3 @@
1
1
  module Solrizer
2
- VERSION = "3.0.0"
2
+ VERSION = "3.1.0"
3
3
  end
@@ -13,7 +13,7 @@ describe Solrizer::Common do
13
13
  it "should handle many field types" do
14
14
  solr_doc = {}
15
15
  Foo.create_and_insert_terms('my_name', 'value', [:displayable, :searchable, :sortable], solr_doc)
16
- solr_doc.should == {'my_name_ssm' => ['value'], 'my_name_si' => ['value'], 'my_name_teim' => ['value']}
16
+ solr_doc.should == {'my_name_ssm' => ['value'], 'my_name_si' => 'value', 'my_name_teim' => ['value']}
17
17
  end
18
18
 
19
19
  it "should handle dates that are searchable" do
@@ -25,7 +25,7 @@ describe Solrizer::Common do
25
25
  it "should handle dates that are stored_sortable" do
26
26
  solr_doc = {}
27
27
  Foo.create_and_insert_terms('my_name', Date.parse('2013-01-10'), [:stored_sortable], solr_doc)
28
- solr_doc.should == {'my_name_dtsi' => ['2013-01-10T00:00:00Z']}
28
+ solr_doc.should == {'my_name_dtsi' => '2013-01-10T00:00:00Z'}
29
29
  end
30
30
 
31
31
  it "should handle dates that are displayable" do
@@ -37,6 +37,6 @@ describe Solrizer::Common do
37
37
  it "should handle dates that are sortable" do
38
38
  solr_doc = {}
39
39
  Foo.create_and_insert_terms('my_name', Date.parse('2013-01-10'), [:sortable], solr_doc)
40
- solr_doc.should == {'my_name_dti' => ['2013-01-10T00:00:00Z']}
40
+ solr_doc.should == {'my_name_dti' => '2013-01-10T00:00:00Z'}
41
41
  end
42
42
  end
@@ -24,8 +24,21 @@ describe Solrizer::Extractor do
24
24
  it "should initialize a solr doc list if it is nil" do
25
25
  solr_doc = {'title_tesim' => nil }
26
26
  Solrizer::Extractor.insert_solr_field_value(solr_doc, 'title_tesim', 'Frank')
27
- solr_doc.should == {"title_tesim"=>["Frank"]}
27
+ solr_doc.should == {"title_tesim"=>"Frank"}
28
28
  end
29
+ it "should insert multiple" do
30
+ solr_doc = {'title_tesim' => nil }
31
+ Solrizer::Extractor.insert_solr_field_value(solr_doc, 'title_tesim', 'Frank')
32
+ Solrizer::Extractor.insert_solr_field_value(solr_doc, 'title_tesim', 'Margret')
33
+ Solrizer::Extractor.insert_solr_field_value(solr_doc, 'title_tesim', 'Joyce')
34
+ solr_doc.should == {"title_tesim"=>["Frank", 'Margret', 'Joyce']}
35
+ end
36
+ it "should not make a list if a single valued field is passed in" do
37
+ solr_doc = {}
38
+ Solrizer::Extractor.insert_solr_field_value(solr_doc, 'title_dtsi', '2013-03-22T12:33:00Z')
39
+ solr_doc.should == {"title_dtsi"=>"2013-03-22T12:33:00Z"}
40
+ end
41
+
29
42
  end
30
43
 
31
44
  end
@@ -37,18 +37,27 @@ describe Solrizer::FieldMapper do
37
37
  def name_and_converter(field_name, args)
38
38
  [field_name + '_s', lambda { |value| "#{value} o'clock" }]
39
39
  end
40
+ def index_type
41
+ [:multivalued]
42
+ end
40
43
  end
41
44
 
42
45
  class StoredSearchableDescriptor < Solrizer::Descriptor
43
46
  def name_and_converter(field_name, args)
44
47
  [field_name.to_s + '_s']
45
48
  end
49
+ def index_type
50
+ [:multivalued]
51
+ end
46
52
  end
47
53
 
48
54
  class EdibleDescriptor < Solrizer::Descriptor
49
55
  def name_and_converter(field_name, args)
50
56
  [field_name + '_food']
51
57
  end
58
+ def index_type
59
+ [:multivalued]
60
+ end
52
61
  end
53
62
 
54
63
  class FungibleDescriptor < Solrizer::Descriptor
@@ -94,6 +103,9 @@ describe Solrizer::FieldMapper do
94
103
  end
95
104
  end
96
105
  end
106
+ def index_type
107
+ [:multivalued]
108
+ end
97
109
  end
98
110
  end
99
111
 
@@ -146,6 +158,7 @@ describe Solrizer::FieldMapper do
146
158
  @mapper.extract_type(nil).should == nil
147
159
  @mapper.extract_type(Date.today).should == :date
148
160
  @mapper.extract_type(Time.now).should == :time
161
+ @mapper.extract_type(DateTime.now).should == :time
149
162
  @mapper.extract_type("Hi").should == :string
150
163
  end
151
164
  end
@@ -269,16 +282,16 @@ describe Solrizer::FieldMapper do
269
282
  "foo_tesim" => ["bar"], #stored_searchable
270
283
  "foo_ssm" => ["bar"], #displayable
271
284
  "foo_sim" => ["bar"], #facetable
272
- "foo_si" => ["bar"], #sortable
273
- "foo_ssi" => ["bar"], #stored_sortable
285
+ "foo_si" => "bar", #sortable
286
+ "foo_ssi" => "bar", #stored_sortable
274
287
  "foo_tim" => ["bar"] #unstemmed_searchable
275
288
  }
276
289
  end
277
290
 
278
291
  it "should support stored_sortable" do
279
292
  time = Time.iso8601("2012-11-06T15:16:17Z")
280
- @mapper.solr_names_and_values('foo', time, :stored_sortable).should == {"foo_dtsi" => ["2012-11-06T15:16:17Z"]}
281
- @mapper.solr_names_and_values('foo', 'bar', :stored_sortable).should == {"foo_ssi" => ["bar"]}
293
+ @mapper.solr_names_and_values('foo', time, :stored_sortable).should == {"foo_dtsi" => "2012-11-06T15:16:17Z"}
294
+ @mapper.solr_names_and_values('foo', 'bar', :stored_sortable).should == {"foo_ssi" => "bar"}
282
295
  end
283
296
  end
284
297
  end
@@ -9,9 +9,13 @@ describe Solrizer do
9
9
  Solrizer.insert_field(doc, 'foo', 'A name')
10
10
  doc.should == {'foo_tesim' => ['A name']}
11
11
  end
12
+ it "should not create an array of fields that are not multivalued" do
13
+ Solrizer.insert_field(doc, 'foo', 'A name', :sortable)
14
+ doc.should == {'foo_si' => 'A name'}
15
+ end
12
16
  it "should insert a field with multiple indexers" do
13
17
  Solrizer.insert_field(doc, 'foo', 'A name', :sortable, :facetable)
14
- doc.should == {'foo_si' => ['A name'], 'foo_sim' => ['A name']}
18
+ doc.should == {'foo_si' => 'A name', 'foo_sim' => ['A name']}
15
19
  end
16
20
  it "should insert Dates" do
17
21
  Solrizer.insert_field(doc, 'foo', Date.parse('2013-01-13'))
@@ -24,17 +28,16 @@ describe Solrizer do
24
28
 
25
29
  it "should insert multiple values" do
26
30
  Solrizer.insert_field(doc, 'foo', ['A name', 'B name'], :sortable, :facetable)
27
- # NOTE: is this desired behavior for non-multivalued fields, like :sortable ?
28
- doc.should == {'foo_si' => ['A name', 'B name'], 'foo_sim' => ['A name', 'B name']}
31
+ doc.should == {'foo_si' => 'B name', 'foo_sim' => ['A name', 'B name']}
29
32
  end
30
33
  end
31
34
 
32
35
  describe "on a document with values" do
33
- before{ @doc = {'foo_si' => ['A name'], 'foo_sim' => ['A name']}}
36
+ before{ @doc = {'foo_si' => 'A name', 'foo_sim' => ['A name']}}
34
37
 
35
- it "should not overwrite values that exist before" do
38
+ it "should not overwrite muli-values that exist before" do
36
39
  Solrizer.insert_field(@doc, 'foo', 'B name', :sortable, :facetable)
37
- @doc.should == {'foo_si' => ['A name', 'B name'], 'foo_sim' => ['A name', 'B name']}
40
+ @doc.should == {'foo_si' => 'B name', 'foo_sim' => ['A name', 'B name']}
38
41
  end
39
42
  end
40
43
  end
@@ -44,7 +47,7 @@ describe Solrizer do
44
47
 
45
48
  it "should overwrite values that exist before" do
46
49
  Solrizer.set_field(@doc, 'foo', 'B name', :sortable, :facetable)
47
- @doc.should == {'foo_si' => ['B name'], 'foo_sim' => ['B name']}
50
+ @doc.should == {'foo_si' => 'B name', 'foo_sim' => ['B name']}
48
51
  end
49
52
  end
50
53
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: solrizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Zumwalt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-28 00:00:00.000000000 Z
11
+ date: 2013-05-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -177,6 +177,7 @@ files:
177
177
  - lib/solrizer/field_mapper.rb
178
178
  - lib/solrizer/html.rb
179
179
  - lib/solrizer/html/extractor.rb
180
+ - lib/solrizer/suffix.rb
180
181
  - lib/solrizer/version.rb
181
182
  - lib/solrizer/xml.rb
182
183
  - lib/solrizer/xml/extractor.rb
@@ -209,7 +210,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
209
210
  version: '0'
210
211
  requirements: []
211
212
  rubyforge_project:
212
- rubygems_version: 2.0.0
213
+ rubygems_version: 2.0.3
213
214
  signing_key:
214
215
  specification_version: 4
215
216
  summary: A utility for building solr indexes, usually from Fedora repository content