solrizer 3.0.0.pre7 → 3.0.0.pre8

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,3 @@
1
- require "nokogiri"
2
- require 'yaml'
3
-
4
1
  module Solrizer
5
2
 
6
3
  # Provides utilities for extracting solr fields from a variety of objects and/or creating solr documents from a given object
@@ -34,8 +34,6 @@ module Solrizer
34
34
  # # t.dish_name :index_as => [:some_field_type] -maps to-> dish_name_ssim
35
35
  # # t.ingredients :index_as => [:some_field_type, :edible] -maps to-> ingredients_ssim, ingredients_food
36
36
  #
37
- # (See Solrizer::XML::TerminologyBasedSolrizer for instructions on applying a custom mapping once you have defined it.)
38
- #
39
37
  #
40
38
  # == Custom Value Converters
41
39
  #
@@ -1,3 +1,3 @@
1
1
  module Solrizer
2
- VERSION = "3.0.0.pre7"
2
+ VERSION = "3.0.0.pre8"
3
3
  end
@@ -4,7 +4,7 @@ module Solrizer::XML::Extractor
4
4
 
5
5
  #
6
6
  # This method extracts solr fields from simple xml
7
- # If you want to do anything more nuanced with the xml, use TerminologyBasedSolrizer instead.
7
+ # If you want to do anything more nuanced with the xml, use OM instead.
8
8
  #
9
9
  # @param [xml] text xml content to index
10
10
  # @param [Hash] solr_doc
data/lib/solrizer/xml.rb CHANGED
@@ -1,7 +1,5 @@
1
- require "solrizer"
2
- require "om"
3
1
  module Solrizer::XML
4
2
  end
5
3
  Dir[File.join(File.dirname(__FILE__),"xml","*.rb")].each {|file| require file }
6
4
 
7
- Solrizer::Extractor.send(:include, Solrizer::XML::Extractor)
5
+ Solrizer::Extractor.send(:include, Solrizer::XML::Extractor)
data/solrizer.gemspec CHANGED
@@ -13,7 +13,6 @@ Gem::Specification.new do |s|
13
13
  s.description = %q{Use solrizer to populate solr indexes. You can run solrizer from within your app, using the provided rake tasks, or as a JMS listener}
14
14
 
15
15
  s.add_dependency "nokogiri"
16
- s.add_dependency "om", ">=1.5.0"
17
16
  s.add_dependency "xml-simple"
18
17
  s.add_dependency "mediashelf-loggable", "~>0.4.7"
19
18
  s.add_dependency "stomp"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: solrizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0.pre7
4
+ version: 3.0.0.pre8
5
5
  prerelease: 6
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-02 00:00:00.000000000 Z
12
+ date: 2013-02-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -27,22 +27,6 @@ dependencies:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
- - !ruby/object:Gem::Dependency
31
- name: om
32
- requirement: !ruby/object:Gem::Requirement
33
- none: false
34
- requirements:
35
- - - ">="
36
- - !ruby/object:Gem::Version
37
- version: 1.5.0
38
- type: :runtime
39
- prerelease: false
40
- version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
- requirements:
43
- - - ">="
44
- - !ruby/object:Gem::Version
45
- version: 1.5.0
46
30
  - !ruby/object:Gem::Dependency
47
31
  name: xml-simple
48
32
  requirement: !ruby/object:Gem::Requirement
@@ -218,25 +202,16 @@ files:
218
202
  - lib/solrizer/version.rb
219
203
  - lib/solrizer/xml.rb
220
204
  - lib/solrizer/xml/extractor.rb
221
- - lib/solrizer/xml/terminology_based_solrizer.rb
222
205
  - lib/tasks/solrizer.rake
223
206
  - solrizer.gemspec
224
207
  - spec/.rspec
225
208
  - spec/fixtures/druid-bv448hq0314-descMetadata.xml
226
- - spec/fixtures/druid-bv448hq0314-extProperties.xml
227
- - spec/fixtures/druid-cm234kq4672-extProperties.xml
228
- - spec/fixtures/druid-cm234kq4672-stories.xml
229
- - spec/fixtures/druid-hc513kw4806-descMetadata.xml
230
- - spec/fixtures/mods_article.rb
231
- - spec/fixtures/mods_articles/hydrangea_article1.xml
232
- - spec/fixtures/test_solr_mappings.yml
233
209
  - spec/spec_helper.rb
234
210
  - spec/units/common_spec.rb
235
211
  - spec/units/extractor_spec.rb
236
212
  - spec/units/field_mapper_spec.rb
237
213
  - spec/units/solrizer_spec.rb
238
214
  - spec/units/xml_extractor_spec.rb
239
- - spec/units/xml_terminology_based_solrizer_spec.rb
240
215
  homepage: http://github.com/projecthydra/solrizer
241
216
  licenses: []
242
217
  post_install_message:
@@ -249,6 +224,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
249
224
  - - ">="
250
225
  - !ruby/object:Gem::Version
251
226
  version: '0'
227
+ segments:
228
+ - 0
229
+ hash: 1646376342601153569
252
230
  required_rubygems_version: !ruby/object:Gem::Requirement
253
231
  none: false
254
232
  requirements:
@@ -264,18 +242,10 @@ summary: A utility for building solr indexes, usually from Fedora repository con
264
242
  with solrizer-fedora extension gem.
265
243
  test_files:
266
244
  - spec/fixtures/druid-bv448hq0314-descMetadata.xml
267
- - spec/fixtures/druid-bv448hq0314-extProperties.xml
268
- - spec/fixtures/druid-cm234kq4672-extProperties.xml
269
- - spec/fixtures/druid-cm234kq4672-stories.xml
270
- - spec/fixtures/druid-hc513kw4806-descMetadata.xml
271
- - spec/fixtures/mods_article.rb
272
- - spec/fixtures/mods_articles/hydrangea_article1.xml
273
- - spec/fixtures/test_solr_mappings.yml
274
245
  - spec/spec_helper.rb
275
246
  - spec/units/common_spec.rb
276
247
  - spec/units/extractor_spec.rb
277
248
  - spec/units/field_mapper_spec.rb
278
249
  - spec/units/solrizer_spec.rb
279
250
  - spec/units/xml_extractor_spec.rb
280
- - spec/units/xml_terminology_based_solrizer_spec.rb
281
251
  has_rdoc:
@@ -1,92 +0,0 @@
1
- # This module is only suitable to mix into Classes that use the OM::XML::Document Module
2
- module Solrizer::XML::TerminologyBasedSolrizer
3
- def self.included(klass)
4
- klass.send(:include, Solrizer::Common)
5
- klass.send(:extend, ClassMethods)
6
- end
7
-
8
- # Module Methods
9
- module ClassMethods
10
-
11
- # Build a solr document from +doc+ based on its terminology
12
- # @param [OM::XML::Document] doc
13
- # @param [Hash] (optional) solr_doc (values hash) to populate
14
- def solrize(doc, solr_doc=Hash.new, field_mapper = nil)
15
- unless doc.class.terminology.nil?
16
- doc.class.terminology.terms.each_pair do |term_name,term|
17
- doc.solrize_term(term, solr_doc, field_mapper)
18
- end
19
- end
20
-
21
- return solr_doc
22
- end
23
-
24
- # Populate a solr document with fields based on nodes in +xml+
25
- # Values for a term are gathered by to +term_pointer+ using OM::XML::TermValueOperators.term_values
26
- # and are deserialized by OM according to :type, as determined in its terminology.
27
- # The content of the actual field in solr is each +node+ of the +nodeset+ returned by OM,
28
- # rendered to a string.
29
- # @param [OM::XML::Document] doc xml document to extract values from
30
- # @param [OM::XML::Term] term corresponding to desired xml values
31
- # @param [Hash] (optional) solr_doc (values hash) to populate
32
- def solrize_term(doc, term, solr_doc = Hash.new, field_mapper = nil, opts={})
33
- parents = opts.fetch(:parents, [])
34
- term_pointer = parents+[term.name]
35
- nodeset = doc.term_values(*term_pointer)
36
-
37
- nodeset.each do |n|
38
- doc.solrize_node(n, term_pointer, term, solr_doc, field_mapper)
39
- # FIXME: there should be no dependencies on OM in Solrizer
40
- unless term.kind_of? OM::XML::NamedTermProxy
41
- term.children.each_pair do |child_term_name, child_term|
42
- doc.solrize_term(child_term, solr_doc, field_mapper, opts={:parents=>parents+[{term.name=>nodeset.index(n)}]})
43
- end
44
- end
45
- end
46
- solr_doc
47
- end
48
-
49
- # Populate a solr document with solr fields corresponding to the given xml node
50
- # Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
51
- # If the supplied term does not have an index_as attribute, no indexing will be performed.
52
- # @param [Nokogiri::XML::Node] node to solrize
53
- # @param [OM::XML::Document] doc document the node came from
54
- # @param [Array] term_pointer Array pointing to the term that should be used for solrization settings
55
- # @param [Term] term the term to be solrized
56
- # @param [Hash] (optional) solr_doc (values hash) to populate
57
- # @return [Hash] the solr doc
58
- def solrize_node(node_value, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
59
- return solr_doc unless term.index_as && !term.index_as.empty?
60
-
61
- # FIXME: there should be no dependencies on OM in Solrizer
62
- generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
63
- create_and_insert_terms(generic_field_name_base, node_value, term.index_as, solr_doc)
64
-
65
- if term_pointer.length > 1
66
- # FIXME: there should be no dependencies on OM in Solrizer
67
- hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
68
- create_and_insert_terms(hierarchical_field_name_base, node_value, term.index_as, solr_doc)
69
- end
70
- solr_doc
71
- end
72
-
73
- end
74
-
75
-
76
- # Instance Methods
77
-
78
- attr_accessor :field_mapper
79
-
80
- def to_solr(solr_doc = Hash.new, field_mapper = self.field_mapper) # :nodoc:
81
- self.class.solrize(self, solr_doc, field_mapper)
82
- end
83
-
84
- def solrize_term(term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
85
- self.class.solrize_term(self, term, solr_doc, field_mapper, opts)
86
- end
87
-
88
- def solrize_node(node, term_pointer, term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
89
- self.class.solrize_node(node, self, term_pointer, term, solr_doc, field_mapper, opts)
90
- end
91
-
92
- end
@@ -1,52 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <document>
3
- <attributes>
4
- <attribute type="item">5958</attribute>
5
- <attribute type="objectid">FEI0010-00013142</attribute>
6
- <attribute type="title">Letter from Ellie Engelmore to Professor K. C. Reddy</attribute>
7
- <attribute type="copyright">Presumed under copyright. Do not publish.</attribute>
8
- <attribute type="description"/>
9
- <attribute type="date">1985-12-30</attribute>
10
- <attribute type="datestr">30/12/1985</attribute>
11
- <attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00013142.pdf</attribute>
12
- <attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00013142.png</attribute>
13
- <attribute type="url"/>
14
- <attribute type="industryterm"/>
15
- <attribute type="technology">artificial intelligence</attribute>
16
- <attribute type="company"/>
17
- <attribute type="person">ELLIE ENGELMORE</attribute>
18
- <attribute type="year">1985</attribute>
19
- <attribute type="organization">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</attribute>
20
- <attribute type="sourcelocation">Folder 15</attribute>
21
- </attributes>
22
- <facets>
23
- <facet type="year" id="49">1980s</facet>
24
- <facet type="year" id="49">1985</facet>
25
- <facet type="year" id="42">1980s</facet>
26
- <facet type="sourcelocation" id="592">Feigenbaum</facet>
27
- <facet type="sourcelocation" id="592">eaf7000</facet>
28
- <facet type="sourcelocation" id="592">Box 51A</facet>
29
- <facet type="sourcelocation" id="594">Feigenbaum</facet>
30
- <facet type="sourcelocation" id="594">eaf7000</facet>
31
- <facet type="sourcelocation" id="594">Box 51A</facet>
32
- <facet type="sourcelocation" id="594">Folder 15</facet>
33
- <facet type="sourcelocation" id="691">Feigenbaum</facet>
34
- <facet type="sourcelocation" id="692">Feigenbaum</facet>
35
- <facet type="sourcelocation" id="692">eaf7000</facet>
36
- <facet type="doctype" id="32">Correspondence</facet>
37
- <facet type="city" id="82">Ann Arbor</facet>
38
- <facet type="city" id="910">Hyderabad</facet>
39
- <facet type="city" id="1519">Palo Alto</facet>
40
- <facet type="country" id="68">India</facet>
41
- <facet type="emailaddress" id="288">EENGELMORE@SUMEX-AIM.ARPA</facet>
42
- <facet type="organization" id="5065">Heuristic Programming Project</facet>
43
- <facet type="organization" id="7012">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</facet>
44
- <facet type="organization" id="8878">Professor K. C. Reddy School of Mathematics and Computer/Information Sciences</facet>
45
- <facet type="person" id="5810">ELLIE ENGELMORE</facet>
46
- <facet type="person" id="17934">Reddy</facet>
47
- <facet type="person" id="5787">EDWARD FEIGENBAUM</facet>
48
- <facet type="provinceorstate" id="96">Michigan</facet>
49
- <facet type="provinceorstate" id="27">California</facet>
50
- <facet type="technology" id="1713">artificial intelligence</facet>
51
- </facets>
52
- </document>
@@ -1,5 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <document>
3
- <attributes><attribute type="item">4290</attribute><attribute type="objectid">FEI0010-00011325</attribute><attribute type="title">Letter from Ellie Engelmore to Wemara Lichty</attribute><attribute type="copyright">Presumed under copyright. Do not publish.</attribute><attribute type="description"/><attribute type="date">1984-6-4</attribute><attribute type="datestr">4/6/1984</attribute><attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00011325.pdf</attribute><attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00011325.png</attribute><attribute type="url"/><attribute type="industryterm"/><attribute type="technology">artificial intelligence</attribute><attribute type="company"/><attribute type="person">A. FEIGENBAUM</attribute><attribute type="year">1984</attribute><attribute type="organization">McAlcster Hall University</attribute><attribute type="sourcelocation">Folder 5</attribute></attributes>
4
- <facets><facet type="year" id="48">1980s</facet><facet type="year" id="48">1984</facet><facet type="year" id="42">1980s</facet><facet type="sourcelocation" id="578">Feigenbaum</facet><facet type="sourcelocation" id="578">eaf7000</facet><facet type="sourcelocation" id="578">Box 51</facet><facet type="sourcelocation" id="587">Feigenbaum</facet><facet type="sourcelocation" id="587">eaf7000</facet><facet type="sourcelocation" id="587">Box 51</facet><facet type="sourcelocation" id="587">Folder 5</facet><facet type="sourcelocation" id="692">Feigenbaum</facet><facet type="sourcelocation" id="692">eaf7000</facet><facet type="sourcelocation" id="691">Feigenbaum</facet><facet type="doctype" id="32">Correspondence</facet><facet type="city" id="1948">Stanford</facet><facet type="country" id="33">Columbia</facet><facet type="facility" id="2551">U. Missouri library</facet><facet type="organization" id="5065">Heuristic Programming Project</facet><facet type="organization" id="7026">McAlcster Hall University</facet><facet type="organization" id="9645">STANFORD UNIVERSITY</facet><facet type="organization" id="11964">University of Missouri</facet><facet type="organization" id="12407">Wemara Lichty Psychology Department</facet><facet type="person" id="15650">Morton Hunt</facet><facet type="person" id="37">A. FEIGENBAUM</facet><facet type="person" id="5810">ELLIE ENGELMORE</facet><facet type="provinceorstate" id="27">California</facet><facet type="provinceorstate" id="100">Missouri</facet><facet type="publishedmedium" id="44">Artificial Intelligence</facet><facet type="technology" id="1713">artificial intelligence</facet></facets>
5
- </document>
@@ -1,17 +0,0 @@
1
-
2
- <html>
3
- <body>
4
- <pre>
5
- This is
6
- preformatted text.
7
- It preserves both spaces
8
- and line breaks.
9
- </pre>
10
- <p>The pre tag is good for displaying computer code:</p>
11
- <pre>
12
- for i = 1 to 10
13
- print i
14
- next i
15
- </pre>
16
- </body>
17
- </html>
@@ -1,11 +0,0 @@
1
- <dc xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
2
- <dcterms:type xsi:type="DCMITYPE">text</dcterms:type>
3
- <dcterms:medium>Paper Document</dcterms:medium>
4
- <dcterms:rights>Copyright © 2006 All rights reserved. Distribution for commercial purposes is prohibited.</dcterms:rights>
5
- <dcterms:date/>
6
- <dcterms:format>application/tiff</dcterms:format>
7
- <dcterms:format>application/jp2000</dcterms:format>
8
- <dcterms:format>application/pdf</dcterms:format>
9
- <dcterms:title>The Rise and Fall of the YouTube Empire</dcterms:title>
10
- <dcterms:publisher>Sample Unversity</dcterms:publisher>
11
- </dc>
@@ -1,90 +0,0 @@
1
- module Samples
2
- class ModsArticle
3
-
4
- include OM::XML::Document
5
-
6
- set_terminology do |t|
7
- t.root(:path=>"mods", :xmlns=>"http://www.loc.gov/mods/v3", :schema=>"http://www.loc.gov/standards/mods/v3/mods-3-2.xsd", "xmlns:foo"=>"http://my.custom.namespace")
8
-
9
-
10
- t.title_info(:path=>"titleInfo") {
11
- t.main_title(:index_as=>[:facetable],:path=>"title", :label=>"title") {
12
- t.main_title_lang(:path=>{:attribute=> "xml:lang"})
13
- }
14
- t.french_title(:ref=>[:title_info,:main_title], :attributes=>{"xml:lang"=>"fre"})
15
-
16
- t.language(:index_as=>[:facetable, :stored_searchable],:path=>{:attribute=>"lang"})
17
- }
18
- t.language{
19
- t.lang_code(:index_as=>[:facetable], :path=>"languageTerm", :attributes=>{:type=>"code"})
20
- }
21
- t.abstract(:index_as=>[:stored_searchable])
22
- t.subject {
23
- t.topic(:index_as=>[:facetable])
24
- }
25
- t.topic_tag(:proxy=>[:subject, :topic], :index_as=>[:stored_searchable])
26
- # t.topic_tag(:index_as=>[:facetable],:path=>"subject", :default_content_path=>"topic")
27
- # This is a mods:name. The underscore is purely to avoid namespace conflicts.
28
- t.name_ {
29
- # this is a namepart
30
- t.namePart(:type=>:string, :label=>"generic name")
31
- # affiliations are great
32
- t.affiliation
33
- t.institution(:path=>"affiliation", :index_as=>[:facetable], :label=>"organization")
34
- t.displayForm
35
- t.role(:ref=>[:role])
36
- t.description(:index_as=>[:facetable])
37
- t.date(:path=>"namePart", :attributes=>{:type=>"date"})
38
- t.last_name(:path=>"namePart", :attributes=>{:type=>"family"}, :index_as=>[:stored_searchable])
39
- t.first_name(:path=>"namePart", :attributes=>{:type=>"given"}, :label=>"first name")
40
- t.terms_of_address(:path=>"namePart", :attributes=>{:type=>"termsOfAddress"})
41
- t.computing_id
42
- t.name_content(:path=>"text()")
43
- }
44
- # lookup :person, :first_name
45
- t.person(:ref=>:name, :attributes=>{:type=>"personal"}, :index_as=>[:facetable])
46
- t.department(:proxy=>[:person,:description],:index_as=>[:facetable])
47
- t.organization(:ref=>:name, :attributes=>{:type=>"corporate"}, :index_as=>[:facetable])
48
- t.conference(:ref=>:name, :attributes=>{:type=>"conference"}, :index_as=>[:facetable])
49
- t.role {
50
- t.text(:path=>"roleTerm",:attributes=>{:type=>"text"}, :index_as=>[:stored_searchable])
51
- t.code(:path=>"roleTerm",:attributes=>{:type=>"code"})
52
- }
53
- t.journal(:path=>'relatedItem', :attributes=>{:type=>"host"}) {
54
- t.title_info(:index_as=>[:facetable],:ref=>[:title_info])
55
- t.origin_info(:path=>"originInfo") {
56
- t.publisher
57
- t.date_issued(:path=>"dateIssued", :type => :date, :index_as => [:stored_searchable])
58
- t.issuance(:index_as=>[:facetable])
59
- }
60
- t.issn(:path=>"identifier", :attributes=>{:type=>"issn"})
61
- t.issue(:path=>"part") {
62
- t.volume(:path=>"detail", :attributes=>{:type=>"volume"}, :default_content_path=>"number")
63
- t.level(:path=>"detail", :attributes=>{:type=>"number"}, :default_content_path=>"number")
64
- t.extent
65
- t.pages(:path=>"extent", :attributes=>{:unit=>"pages"}) {
66
- t.start
67
- t.end
68
- }
69
- t.start_page(:proxy=>[:pages, :start])
70
- t.end_page(:proxy=>[:pages, :end])
71
- t.publication_date(:path=>"date", :type => :date, :index_as => [:stored_searchable])
72
- }
73
- }
74
- t.note
75
- t.location(:path=>"location") {
76
- t.url(:path=>"url")
77
- }
78
- t.publication_url(:proxy=>[:location,:url])
79
- t.title(:proxy=>[:title_info, :main_title])
80
- t.journal_title(:proxy=>[:journal, :title_info, :main_title])
81
- t.pub_date(:proxy=>[:journal, :issue, :publication_date])
82
- t.issue_date(:ref=>[:journal, :origin_info, :date_issued], :type=> :date)
83
- end
84
-
85
- # Changes from OM::Properties implementation
86
- # renamed family_name => last_name
87
- # start_page & end_page now accessible as [:journal, :issue, :pages, :start] (etc.)
88
-
89
- end
90
- end
@@ -1,90 +0,0 @@
1
- <mods version="3.0" xsi:schemaLocation="http://www.loc.gov/mods/v3
2
- http://www.loc.gov/standards/mods/v3/mods-3-0.xsd" xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
-
4
- <titleInfo>
5
- <nonSort>THE</nonSort>
6
- <title xml:lang="eng">ARTICLE TITLE HYDRANGEA ARTICLE 1</title>
7
- <subTitle>SUBTITLE</subTitle>
8
- </titleInfo>
9
- <titleInfo lang="finnish">
10
- <title>Artikkelin otsikko Hydrangea artiklan 1</title>
11
- </titleInfo>
12
-
13
- <name type="personal">
14
- <namePart type="family">FAMILY NAME</namePart>
15
- <namePart type="given">GIVEN NAMES</namePart>
16
- <namePart type="termsOfAddress">DR.</namePart>
17
- <displayForm>NAME AS IT APPEARS</displayForm>
18
- <affiliation>FACULTY, UNIVERSITY</affiliation>
19
- <role>
20
- <roleTerm authority="marcrelator" type="text">creator</roleTerm>
21
- </role>
22
- <role>
23
- <roleTerm type="text">submitter</roleTerm>
24
- </role>
25
- </name>
26
-
27
- <name type="personal">
28
- <namePart type="family">Gautama</namePart>
29
- <namePart type="given">Siddartha</namePart>
30
- <namePart type="termsOfAddress">Prince</namePart>
31
- <affiliation>Nirvana</affiliation>
32
- <role>
33
- <roleTerm authority="marcrelator" type="text">teacher</roleTerm>
34
- </role>
35
- </name>
36
-
37
- <typeOfResource>text</typeOfResource>
38
- <genre authority="local">journal article</genre>
39
-
40
- <abstract>ABSTRACT</abstract>
41
- <subject>
42
- <topic>TOPIC 1</topic>
43
- <topic>TOPIC 2</topic>
44
- </subject>
45
- <subject authority="AUTHORITY SOURCE (RFCD, LCSH)">
46
- <topic>CONTROLLED TERM</topic>
47
- </subject>
48
-
49
- <language>
50
- <languageTerm authority="iso639-2b" type="code">en-aus </languageTerm>
51
- </language>
52
-
53
- <physicalDescription>
54
- <internetMediaType>application/pdf</internetMediaType>
55
- <extent>36 p.</extent>
56
- </physicalDescription>
57
-
58
- <relatedItem type="host">
59
- <titleInfo>
60
- <title>TITLE OF HOST JOURNAL</title>
61
- </titleInfo>
62
- <originInfo>
63
- <publisher>PUBLISHER</publisher>
64
- <dateIssued>2007-02-15</dateIssued>
65
- </originInfo>
66
- <identifier type="issn">0013-8908</identifier>
67
- <part>
68
- <detail type="volume">
69
- <number>2</number>
70
- </detail>
71
- <detail type="level">
72
- <number>2</number>
73
- </detail>
74
- <extent unit="pages">
75
- <start>195</start>
76
- <end>230</end>
77
- </extent>
78
- <date>2007-02-01</date>
79
- </part>
80
- </relatedItem>
81
-
82
- <identifier type="uri">http://URL.edu.au/</identifier>
83
- <identifier type="doi">doi:10.1006/jmbi.1995.0238</identifier>
84
- <location>
85
- <url>http://URL.edu.au/</url>
86
- </location>
87
- <accessCondition type="restrictionOnAccess">EMBARGO NOTE</accessCondition>
88
- <accessCondition type="use and reproduction">OPEN ACCESS</accessCondition>
89
-
90
- </mods>
@@ -1,16 +0,0 @@
1
- id: pid
2
- default: edible
3
- edible:
4
- date: _edible_date
5
- string: _edible_string
6
- text: _edible_text
7
- symbol: _edible_sym
8
- integer: _edible_int
9
- long: _edible_long
10
- boolean: _edible_bool
11
- float: _edible_float
12
- double: _edible_double
13
- displayable: _display
14
- facetable: _facet
15
- sortable: _sort
16
- unstemmed_searchable: _unstem_search
@@ -1,109 +0,0 @@
1
- require 'spec_helper'
2
- require 'fixtures/mods_article'
3
-
4
- # TODO: there should be no dependencies on OM in Solrizer
5
- describe Solrizer::XML::TerminologyBasedSolrizer do
6
-
7
- before(:all) do
8
- Samples::ModsArticle.send(:include, Solrizer::XML::TerminologyBasedSolrizer)
9
- end
10
-
11
- before(:each) do
12
- article_xml = fixture( File.join("mods_articles", "hydrangea_article1.xml") )
13
- @mods_article = Samples::ModsArticle.from_xml(article_xml)
14
- end
15
-
16
- describe ".to_solr" do
17
-
18
- it "should provide .to_solr and return a SolrDocument" do
19
- @mods_article.should respond_to(:to_solr)
20
- @mods_article.to_solr.should be_kind_of(Hash)
21
- end
22
-
23
- it "should optionally allow you to provide the Hash to add fields to and return that document when done" do
24
- doc = Hash.new
25
- @mods_article.to_solr(doc).should equal(doc)
26
- end
27
-
28
- it "should iterate through the terminology terms, calling .solrize_term on each and passing in the solr doc" do
29
- solr_doc = Hash.new
30
- @mods_article.field_mapper = Solrizer::FieldMapper.new
31
- Samples::ModsArticle.terminology.terms.each_pair do |k,v|
32
- @mods_article.should_receive(:solrize_term).with(v, solr_doc, @mods_article.field_mapper)
33
- end
34
- @mods_article.to_solr(solr_doc)
35
- end
36
-
37
- it "should use Solr mappings to generate field names" do
38
- solr_doc = @mods_article.to_solr
39
- solr_doc["abstract"].should be_nil
40
- # NOTE: OM's old default expected stored and indexed; this is a change.
41
- solr_doc["abstract_tesim"].should == ["ABSTRACT"]
42
- solr_doc["title_info_1_language_tesim"].should == ["finnish"]
43
- solr_doc["person_1_role_0_text_tesim"].should == ["teacher"]
44
- # No index_as on the code field.
45
- solr_doc["person_1_role_0_code_tesim"].should be_nil
46
- solr_doc["person_last_name_tesim"].sort.should == ["FAMILY NAME", "Gautama"]
47
- solr_doc["topic_tag_tesim"].sort.should == ["CONTROLLED TERM", "TOPIC 1", "TOPIC 2"]
48
- # These are a holdover from an old verison of OM
49
- solr_doc['journal_0_issue_0_publication_date_dtsim'].should == ["2007-02-01T00:00:00Z"]
50
- end
51
-
52
- end
53
-
54
- describe ".solrize_term" do
55
-
56
- it "should add fields to a solr document for all nodes corresponding to the given term and its children" do
57
- solr_doc = Hash.new
58
- result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
59
- result.should == solr_doc
60
- end
61
-
62
- it "should add multiple fields based on index_as" do
63
- fake_solr_doc = {}
64
- term = Samples::ModsArticle.terminology.retrieve_term(:name)
65
- term.children[:namePart].index_as = [:searchable, :displayable, :facetable]
66
-
67
- @mods_article.solrize_term(term, fake_solr_doc)
68
-
69
- expected_names = ["DR.", "FAMILY NAME", "GIVEN NAMES"]
70
- %w(_teim _sim).each do |suffix|
71
- actual_names = fake_solr_doc["name_0_namePart#{suffix}"].sort
72
- actual_names.should == expected_names
73
- end
74
- end
75
-
76
- it "should add fields based on type using proxy" do
77
- unless RUBY_VERSION.match("1.8.7")
78
- solr_doc = Hash.new
79
- result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:pub_date), solr_doc)
80
- solr_doc["pub_date_dtsim"].should == ["2007-02-01T00:00:00Z"]
81
- end
82
- end
83
-
84
- it "should add fields based on type using ref" do
85
- solr_doc = Hash.new
86
- result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:issue_date), solr_doc)
87
- solr_doc["issue_date_dtsim"].should == ["2007-02-15T00:00:00Z"]
88
- end
89
-
90
- it "shouldn't index terms where index_as is an empty array" do
91
- fake_solr_doc = {}
92
- term = Samples::ModsArticle.terminology.retrieve_term(:name)
93
- term.children[:namePart].index_as = []
94
-
95
- @mods_article.solrize_term(term, fake_solr_doc)
96
- fake_solr_doc["name_0_namePart_teim"].should be_nil
97
- end
98
-
99
- it "should index terms where index_as is searchable" do
100
- fake_solr_doc = {}
101
- term = Samples::ModsArticle.terminology.retrieve_term(:name)
102
- term.children[:namePart].index_as = [:searchable]
103
-
104
- @mods_article.solrize_term(term, fake_solr_doc)
105
-
106
- fake_solr_doc["name_0_namePart_teim"].sort.should == ["DR.", "FAMILY NAME", "GIVEN NAMES"]
107
- end
108
- end
109
- end