solrizer 3.0.0.pre6 → 3.0.0.pre7

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,16 +5,21 @@ module Solrizer
5
5
  # _tesim - for strings or text fields
6
6
  # _dtsim - for dates
7
7
  # _isim - for integers
8
- # Note that searchable fields are also stored for backwards compatibilitiy
9
- # TODO: make a searchable and stored field type ("searchable_and_displayable" like "search and destroy"? "displearchable" ?)
10
- # and then make searchable not stored.
8
+ def self.stored_searchable
9
+ @stored_searchable ||= Descriptor.new(stored_searchable_field_definition, converter: searchable_converter, requires_type: true)
10
+ end
11
+
12
+ # The suffix produced depends on the type parameter -- produces suffixes:
13
+ # _teim - for strings or text fields
14
+ # _dtim - for dates
15
+ # _iim - for integers
11
16
  def self.searchable
12
17
  @searchable ||= Descriptor.new(searchable_field_definition, converter: searchable_converter, requires_type: true)
13
18
  end
14
19
 
15
20
  # Takes fields which are stored as strings, but we want indexed as dates. (e.g. "November 6th, 2012")
16
21
  # produces suffixes:
17
- # _dtsi - for dates
22
+ # _dtsim - for dates
18
23
  def self.dateable
19
24
  @dateable ||= Descriptor.new(:date, :stored, :indexed, :multivalued, converter: dateable_converter)
20
25
  end
@@ -54,10 +59,18 @@ module Solrizer
54
59
  def self.simple
55
60
  @simple ||= Descriptor.new(lambda {|field_type| [field_type, :indexed]})
56
61
  end
62
+
57
63
  protected
58
64
 
59
- # note that searchable fields are also stored. Probably should change that at some point
60
65
  def self.searchable_field_definition
66
+ lambda do |type|
67
+ type = :text_en if [:string, :text].include?(type) # for backwards compatibility with old solr schema
68
+ vals = [type, :indexed, :multivalued]
69
+ vals
70
+ end
71
+ end
72
+
73
+ def self.stored_searchable_field_definition
61
74
  lambda do |type|
62
75
  type = :text_en if [:string, :text].include?(type) # for backwards compatibility with old solr schema
63
76
  vals = [type, :indexed, :stored, :multivalued]
@@ -113,9 +113,9 @@ module Solrizer
113
113
  # @return [String] name of the solr field, based on the params
114
114
  def solr_name(field_name, *opts)
115
115
  index_type, args = if opts.first.kind_of? Hash
116
- [:searchable, opts.first]
116
+ [:stored_searchable, opts.first]
117
117
  elsif opts.empty?
118
- [:searchable, {type: :text}]
118
+ [:stored_searchable, {type: :text}]
119
119
  else
120
120
  [opts[0], opts[1] || {}]
121
121
  end
@@ -1,3 +1,3 @@
1
1
  module Solrizer
2
- VERSION = "3.0.0.pre6"
2
+ VERSION = "3.0.0.pre7"
3
3
  end
@@ -15,14 +15,14 @@ module Solrizer::XML::Extractor
15
15
  if value.kind_of?(Array)
16
16
  if value.first.kind_of?(Hash)
17
17
  # This deals with the way xml-simple handles nodes with attributes
18
- solr_doc.merge!({mapper.solr_name(name, :searchable, :type=>:text).to_sym => "#{value.first["content"]}"})
18
+ solr_doc.merge!({mapper.solr_name(name, :stored_searchable, :type=>:text).to_sym => "#{value.first["content"]}"})
19
19
  elsif value.length > 1
20
- solr_doc.merge!({mapper.solr_name(name, :searchable, :type=>:text).to_sym => value})
20
+ solr_doc.merge!({mapper.solr_name(name, :stored_searchable, :type=>:text).to_sym => value})
21
21
  else
22
- solr_doc.merge!({mapper.solr_name(name, :searchable, :type=>:text).to_sym => "#{value.first}"})
22
+ solr_doc.merge!({mapper.solr_name(name, :stored_searchable, :type=>:text).to_sym => "#{value.first}"})
23
23
  end
24
24
  else
25
- solr_doc.merge!({mapper.solr_name(name, :searchable, :type=>:text).to_sym => "#{value}"})
25
+ solr_doc.merge!({mapper.solr_name(name, :stored_searchable, :type=>:text).to_sym => "#{value}"})
26
26
  end
27
27
  end
28
28
 
@@ -36,6 +36,7 @@ module Solrizer::XML::TerminologyBasedSolrizer
36
36
 
37
37
  nodeset.each do |n|
38
38
  doc.solrize_node(n, term_pointer, term, solr_doc, field_mapper)
39
+ # FIXME: there should be no dependencies on OM in Solrizer
39
40
  unless term.kind_of? OM::XML::NamedTermProxy
40
41
  term.children.each_pair do |child_term_name, child_term|
41
42
  doc.solrize_term(child_term, solr_doc, field_mapper, opts={:parents=>parents+[{term.name=>nodeset.index(n)}]})
@@ -57,10 +58,12 @@ module Solrizer::XML::TerminologyBasedSolrizer
57
58
  def solrize_node(node_value, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
58
59
  return solr_doc unless term.index_as && !term.index_as.empty?
59
60
 
61
+ # FIXME: there should be no dependencies on OM in Solrizer
60
62
  generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
61
63
  create_and_insert_terms(generic_field_name_base, node_value, term.index_as, solr_doc)
62
64
 
63
65
  if term_pointer.length > 1
66
+ # FIXME: there should be no dependencies on OM in Solrizer
64
67
  hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
65
68
  create_and_insert_terms(hierarchical_field_name_base, node_value, term.index_as, solr_doc)
66
69
  end
data/lib/solrizer.rb CHANGED
@@ -35,7 +35,7 @@ module Solrizer
35
35
  # @returns [Hash] doc the document that was provided with the new field inserted
36
36
  def self.insert_field(doc, name, value, *indexer_args)
37
37
  # adding defaults indexer
38
- indexer_args = [:searchable] if indexer_args.empty?
38
+ indexer_args = [:stored_searchable] if indexer_args.empty?
39
39
  default_field_mapper.solr_names_and_values(name, value, indexer_args).each do |k, v|
40
40
  doc[k] ||= []
41
41
  doc[k] += v
@@ -50,7 +50,7 @@ module Solrizer
50
50
  # @returns [Hash] doc the document that was provided with the new field (replacing any field with the same name)
51
51
  def self.set_field(doc, name, value, *indexer_args)
52
52
  # adding defaults indexer
53
- indexer_args = [:searchable] if indexer_args.empty?
53
+ indexer_args = [:stored_searchable] if indexer_args.empty?
54
54
  doc.merge! default_field_mapper.solr_names_and_values(name, value, indexer_args)
55
55
  doc
56
56
  end
@@ -13,16 +13,16 @@ module Samples
13
13
  }
14
14
  t.french_title(:ref=>[:title_info,:main_title], :attributes=>{"xml:lang"=>"fre"})
15
15
 
16
- t.language(:index_as=>[:facetable, :searchable],:path=>{:attribute=>"lang"})
16
+ t.language(:index_as=>[:facetable, :stored_searchable],:path=>{:attribute=>"lang"})
17
17
  }
18
18
  t.language{
19
19
  t.lang_code(:index_as=>[:facetable], :path=>"languageTerm", :attributes=>{:type=>"code"})
20
20
  }
21
- t.abstract(:index_as=>[:searchable])
21
+ t.abstract(:index_as=>[:stored_searchable])
22
22
  t.subject {
23
23
  t.topic(:index_as=>[:facetable])
24
24
  }
25
- t.topic_tag(:proxy=>[:subject, :topic], :index_as=>[:searchable])
25
+ t.topic_tag(:proxy=>[:subject, :topic], :index_as=>[:stored_searchable])
26
26
  # t.topic_tag(:index_as=>[:facetable],:path=>"subject", :default_content_path=>"topic")
27
27
  # This is a mods:name. The underscore is purely to avoid namespace conflicts.
28
28
  t.name_ {
@@ -35,7 +35,7 @@ module Samples
35
35
  t.role(:ref=>[:role])
36
36
  t.description(:index_as=>[:facetable])
37
37
  t.date(:path=>"namePart", :attributes=>{:type=>"date"})
38
- t.last_name(:path=>"namePart", :attributes=>{:type=>"family"}, :index_as=>[:searchable])
38
+ t.last_name(:path=>"namePart", :attributes=>{:type=>"family"}, :index_as=>[:stored_searchable])
39
39
  t.first_name(:path=>"namePart", :attributes=>{:type=>"given"}, :label=>"first name")
40
40
  t.terms_of_address(:path=>"namePart", :attributes=>{:type=>"termsOfAddress"})
41
41
  t.computing_id
@@ -47,14 +47,14 @@ module Samples
47
47
  t.organization(:ref=>:name, :attributes=>{:type=>"corporate"}, :index_as=>[:facetable])
48
48
  t.conference(:ref=>:name, :attributes=>{:type=>"conference"}, :index_as=>[:facetable])
49
49
  t.role {
50
- t.text(:path=>"roleTerm",:attributes=>{:type=>"text"}, :index_as=>[:searchable])
50
+ t.text(:path=>"roleTerm",:attributes=>{:type=>"text"}, :index_as=>[:stored_searchable])
51
51
  t.code(:path=>"roleTerm",:attributes=>{:type=>"code"})
52
52
  }
53
53
  t.journal(:path=>'relatedItem', :attributes=>{:type=>"host"}) {
54
54
  t.title_info(:index_as=>[:facetable],:ref=>[:title_info])
55
55
  t.origin_info(:path=>"originInfo") {
56
56
  t.publisher
57
- t.date_issued(:path=>"dateIssued", :type => :date, :index_as => [:searchable])
57
+ t.date_issued(:path=>"dateIssued", :type => :date, :index_as => [:stored_searchable])
58
58
  t.issuance(:index_as=>[:facetable])
59
59
  }
60
60
  t.issn(:path=>"identifier", :attributes=>{:type=>"issn"})
@@ -68,7 +68,7 @@ module Samples
68
68
  }
69
69
  t.start_page(:proxy=>[:pages, :start])
70
70
  t.end_page(:proxy=>[:pages, :end])
71
- t.publication_date(:path=>"date", :type => :date, :index_as => [:searchable])
71
+ t.publication_date(:path=>"date", :type => :date, :index_as => [:stored_searchable])
72
72
  }
73
73
  }
74
74
  t.note
@@ -13,11 +13,12 @@ describe Solrizer::Common do
13
13
  it "should handle many field types" do
14
14
  solr_doc = {}
15
15
  Foo.create_and_insert_terms('my_name', 'value', [:displayable, :searchable, :sortable], solr_doc)
16
- solr_doc.should == {'my_name_ssm' => ['value'], 'my_name_si' => ['value'], 'my_name_tesim' => ['value']}
16
+ solr_doc.should == {'my_name_ssm' => ['value'], 'my_name_si' => ['value'], 'my_name_teim' => ['value']}
17
17
  end
18
+
18
19
  it "should handle dates that are searchable" do
19
20
  solr_doc = {}
20
- Foo.create_and_insert_terms('my_name', Date.parse('2013-01-10'), [:searchable], solr_doc)
21
+ Foo.create_and_insert_terms('my_name', Date.parse('2013-01-10'), [:stored_searchable], solr_doc)
21
22
  solr_doc.should == {'my_name_dtsim' => ['2013-01-10T00:00:00Z']}
22
23
  end
23
24
 
@@ -11,14 +11,14 @@ describe Solrizer::FieldMapper do
11
11
  @unstemmed_searchable ||= UnstemmedDescriptor.new()
12
12
  end
13
13
 
14
- # Produces a _s suffix (overrides _tim)
15
- def self.searchable
16
- @searchable ||= SearchableDescriptor.new()
14
+ # Produces a _s suffix (overrides _tesim)
15
+ def self.stored_searchable
16
+ @searchable ||= StoredSearchableDescriptor.new()
17
17
  end
18
18
 
19
- # Produces a _s suffix (overrides _tim)
20
- def self.another_searchable
21
- @another_searchable ||= SearchableDescriptor.new()
19
+ # Produces a _s suffix (overrides _tesim)
20
+ def self.another_stored_searchable
21
+ @another_searchable ||= StoredSearchableDescriptor.new()
22
22
  end
23
23
 
24
24
  def self.edible
@@ -39,7 +39,7 @@ describe Solrizer::FieldMapper do
39
39
  end
40
40
  end
41
41
 
42
- class SearchableDescriptor < Solrizer::Descriptor
42
+ class StoredSearchableDescriptor < Solrizer::Descriptor
43
43
  def name_and_converter(field_name, args)
44
44
  [field_name.to_s + '_s']
45
45
  end
@@ -156,7 +156,7 @@ describe Solrizer::FieldMapper do
156
156
  @mapper.solr_name('bar', :laughable, type: :string).should == 'bar_haha'
157
157
  end
158
158
 
159
- it "should default the index_type to :searchable" do
159
+ it "should default the index_type to :stored_searchable" do
160
160
  @mapper.solr_name('foo').should == 'foo_s'
161
161
  end
162
162
 
@@ -191,7 +191,7 @@ describe Solrizer::FieldMapper do
191
191
 
192
192
  describe '.solr_names_and_values' do
193
193
  it "should map values based on passed descriptors" do
194
- @mapper.solr_names_and_values('foo', 'bar', [:searchable, :laughable, :edible]).should == {
194
+ @mapper.solr_names_and_values('foo', 'bar', [:stored_searchable, :laughable, :edible]).should == {
195
195
  'foo_s' => ['bar'],
196
196
  'foo_food' => ['bar'],
197
197
  'foo_haha' => ["Knock knock. Who's there? Bar. Bar who?"]
@@ -199,7 +199,7 @@ describe Solrizer::FieldMapper do
199
199
  end
200
200
 
201
201
  it "should apply mappings based on data type" do
202
- @mapper.solr_names_and_values('foo', 7, [:searchable, :laughable]).should == {
202
+ @mapper.solr_names_and_values('foo', 7, [:stored_searchable, :laughable]).should == {
203
203
  'foo_s' => ['7'],
204
204
  'foo_ihaha' => ["How many foos does it take to screw in a light bulb? 7."]
205
205
  }
@@ -212,19 +212,19 @@ describe Solrizer::FieldMapper do
212
212
  end
213
213
 
214
214
  it "should generate multiple mappings when two return the _same_ solr name but _different_ values" do
215
- @mapper.solr_names_and_values('roll', 'rock', [:unstemmed_searchable, :searchable]).should == {
215
+ @mapper.solr_names_and_values('roll', 'rock', [:unstemmed_searchable, :stored_searchable]).should == {
216
216
  'roll_s' => ["rock o'clock", 'rock']
217
217
  }
218
218
  end
219
219
 
220
220
  it "should not generate multiple mappings when two return the _same_ solr name and the _same_ value" do
221
- @mapper.solr_names_and_values('roll', 'rock', [:another_searchable, :searchable]).should == {
221
+ @mapper.solr_names_and_values('roll', 'rock', [:another_stored_searchable, :stored_searchable]).should == {
222
222
  'roll_s' => ['rock'],
223
223
  }
224
224
  end
225
225
 
226
226
  it "should return an empty hash when value is nil" do
227
- @mapper.solr_names_and_values('roll', nil, [:another_searchable, :searchable]).should == { }
227
+ @mapper.solr_names_and_values('roll', nil, [:another_stored_searchable, :stored_searchable]).should == { }
228
228
  end
229
229
  end
230
230
 
@@ -237,7 +237,7 @@ describe Solrizer::FieldMapper do
237
237
  @mapper.id_field.should == 'id'
238
238
  end
239
239
 
240
- it "should default the index_type to :searchable" do
240
+ it "should default the index_type to :stored_searchable" do
241
241
  @mapper.solr_name('foo', :type=>:string).should == 'foo_tesim'
242
242
  end
243
243
 
@@ -258,12 +258,15 @@ describe Solrizer::FieldMapper do
258
258
  @mapper.solr_names_and_values('foo', '', [:dateable]).should == { 'foo_dtsim' => [] }
259
259
  end
260
260
 
261
- it "should support searchable, displayable, facetable, sortable, unstemmed" do
262
- @mapper.solr_names_and_values('foo', 'bar', [:searchable, :displayable, :facetable, :sortable, :unstemmed_searchable]).should == {
263
- "foo_tesim" => ["bar"], #searchable
261
+ it "should support searchable, stored_searchable, displayable, facetable, sortable, stored_sortable, unstemmed" do
262
+ descriptors = [:searchable, :stored_searchable, :displayable, :facetable, :sortable, :stored_sortable, :unstemmed_searchable]
263
+ @mapper.solr_names_and_values('foo', 'bar', descriptors).should == {
264
+ "foo_teim" => ["bar"], #searchable
265
+ "foo_tesim" => ["bar"], #stored_searchable
264
266
  "foo_ssm" => ["bar"], #displayable
265
267
  "foo_sim" => ["bar"], #facetable
266
268
  "foo_si" => ["bar"], #sortable
269
+ "foo_ssi" => ["bar"], #stored_sortable
267
270
  "foo_tim" => ["bar"] #unstemmed_searchable
268
271
  }
269
272
  end
@@ -272,7 +275,6 @@ describe Solrizer::FieldMapper do
272
275
  time = Time.iso8601("2012-11-06T15:16:17Z")
273
276
  @mapper.solr_names_and_values('foo', time, :stored_sortable).should == {"foo_dtsi" => ["2012-11-06T15:16:17Z"]}
274
277
  @mapper.solr_names_and_values('foo', 'bar', :stored_sortable).should == {"foo_ssi" => ["bar"]}
275
-
276
278
  end
277
279
  end
278
280
  end
@@ -5,7 +5,7 @@ describe Solrizer do
5
5
  describe ".insert_field" do
6
6
  describe "on an empty document" do
7
7
  let(:doc) { Hash.new }
8
- it "should insert a field with the default (searchable) indexer" do
8
+ it "should insert a field with the default (stored_searchable) indexer" do
9
9
  Solrizer.insert_field(doc, 'foo', 'A name')
10
10
  doc.should == {'foo_tesim' => ['A name']}
11
11
  end
@@ -20,7 +20,6 @@ describe Solrizer::XML::Extractor do
20
20
  result[:format_tesim].should include("application/jp2000")
21
21
  result[:title_tesim].should == "This is a Sample Title"
22
22
  result[:publisher_tesim].should == "Sample Unversity"
23
-
24
23
  end
25
24
  end
26
25
 
@@ -1,6 +1,7 @@
1
1
  require 'spec_helper'
2
2
  require 'fixtures/mods_article'
3
3
 
4
+ # TODO: there should be no dependencies on OM in Solrizer
4
5
  describe Solrizer::XML::TerminologyBasedSolrizer do
5
6
 
6
7
  before(:all) do
@@ -34,9 +35,9 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
34
35
  end
35
36
 
36
37
  it "should use Solr mappings to generate field names" do
37
-
38
38
  solr_doc = @mods_article.to_solr
39
39
  solr_doc["abstract"].should be_nil
40
+ # NOTE: OM's old default expected stored and indexed; this is a change.
40
41
  solr_doc["abstract_tesim"].should == ["ABSTRACT"]
41
42
  solr_doc["title_info_1_language_tesim"].should == ["finnish"]
42
43
  solr_doc["person_1_role_0_text_tesim"].should == ["teacher"]
@@ -44,13 +45,10 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
44
45
  solr_doc["person_1_role_0_code_tesim"].should be_nil
45
46
  solr_doc["person_last_name_tesim"].sort.should == ["FAMILY NAME", "Gautama"]
46
47
  solr_doc["topic_tag_tesim"].sort.should == ["CONTROLLED TERM", "TOPIC 1", "TOPIC 2"]
47
-
48
48
  # These are a holdover from an old verison of OM
49
49
  solr_doc['journal_0_issue_0_publication_date_dtsim'].should == ["2007-02-01T00:00:00Z"]
50
-
51
-
52
50
  end
53
-
51
+
54
52
  end
55
53
 
56
54
  describe ".solrize_term" do
@@ -69,7 +67,7 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
69
67
  @mods_article.solrize_term(term, fake_solr_doc)
70
68
 
71
69
  expected_names = ["DR.", "FAMILY NAME", "GIVEN NAMES"]
72
- %w(_tesim _sim).each do |suffix|
70
+ %w(_teim _sim).each do |suffix|
73
71
  actual_names = fake_solr_doc["name_0_namePart#{suffix}"].sort
74
72
  actual_names.should == expected_names
75
73
  end
@@ -95,7 +93,7 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
95
93
  term.children[:namePart].index_as = []
96
94
 
97
95
  @mods_article.solrize_term(term, fake_solr_doc)
98
- fake_solr_doc["name_0_namePart_tesim"].should be_nil
96
+ fake_solr_doc["name_0_namePart_teim"].should be_nil
99
97
  end
100
98
 
101
99
  it "should index terms where index_as is searchable" do
@@ -105,7 +103,7 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
105
103
 
106
104
  @mods_article.solrize_term(term, fake_solr_doc)
107
105
 
108
- fake_solr_doc["name_0_namePart_tesim"].sort.should == ["DR.", "FAMILY NAME", "GIVEN NAMES"]
106
+ fake_solr_doc["name_0_namePart_teim"].sort.should == ["DR.", "FAMILY NAME", "GIVEN NAMES"]
109
107
  end
110
108
  end
111
109
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: solrizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0.pre6
4
+ version: 3.0.0.pre7
5
5
  prerelease: 6
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-30 00:00:00.000000000 Z
12
+ date: 2013-02-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -207,8 +207,6 @@ files:
207
207
  - SOLRIZING_OM_DOCUMENTS.textile
208
208
  - bin/solrizer
209
209
  - bin/solrizerd
210
- - config/solr_mappings.yml
211
- - config/solr_mappings_af_0.1.yml
212
210
  - lib/solrizer.rb
213
211
  - lib/solrizer/common.rb
214
212
  - lib/solrizer/default_descriptors.rb
@@ -251,9 +249,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
251
249
  - - ">="
252
250
  - !ruby/object:Gem::Version
253
251
  version: '0'
254
- segments:
255
- - 0
256
- hash: 4544422725210300531
257
252
  required_rubygems_version: !ruby/object:Gem::Requirement
258
253
  none: false
259
254
  requirements:
@@ -1,16 +0,0 @@
1
- id: id
2
- searchable:
3
- default: _t
4
- date: _dt
5
- string: _t
6
- text: _t
7
- symbol: _s
8
- integer: _i
9
- long: _l
10
- boolean: _b
11
- float: _f
12
- double: _d
13
- displayable: _display
14
- facetable: _facet
15
- sortable: _sort
16
- unstemmed_searchable: _unstem_search
@@ -1,18 +0,0 @@
1
- id: id
2
- default: searchable
3
- searchable:
4
- date: _date
5
- string: _field
6
- text: _field
7
- symbol: _field
8
- integer: _field
9
- long: _field
10
- boolean: _field
11
- float: _field
12
- double: _field
13
- displayable: _display
14
- facetable: _facet
15
- sortable: _sort
16
- unstemmed_searchable: _unstem_search
17
-
18
-