solrizer 3.0.0.pre7 → 3.0.0.pre8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,3 @@
1
- require "nokogiri"
2
- require 'yaml'
3
-
4
1
  module Solrizer
5
2
 
6
3
  # Provides utilities for extracting solr fields from a variety of objects and/or creating solr documents from a given object
@@ -34,8 +34,6 @@ module Solrizer
34
34
  # # t.dish_name :index_as => [:some_field_type] -maps to-> dish_name_ssim
35
35
  # # t.ingredients :index_as => [:some_field_type, :edible] -maps to-> ingredients_ssim, ingredients_food
36
36
  #
37
- # (See Solrizer::XML::TerminologyBasedSolrizer for instructions on applying a custom mapping once you have defined it.)
38
- #
39
37
  #
40
38
  # == Custom Value Converters
41
39
  #
@@ -1,3 +1,3 @@
1
1
  module Solrizer
2
- VERSION = "3.0.0.pre7"
2
+ VERSION = "3.0.0.pre8"
3
3
  end
@@ -4,7 +4,7 @@ module Solrizer::XML::Extractor
4
4
 
5
5
  #
6
6
  # This method extracts solr fields from simple xml
7
- # If you want to do anything more nuanced with the xml, use TerminologyBasedSolrizer instead.
7
+ # If you want to do anything more nuanced with the xml, use OM instead.
8
8
  #
9
9
  # @param [xml] text xml content to index
10
10
  # @param [Hash] solr_doc
data/lib/solrizer/xml.rb CHANGED
@@ -1,7 +1,5 @@
1
- require "solrizer"
2
- require "om"
3
1
  module Solrizer::XML
4
2
  end
5
3
  Dir[File.join(File.dirname(__FILE__),"xml","*.rb")].each {|file| require file }
6
4
 
7
- Solrizer::Extractor.send(:include, Solrizer::XML::Extractor)
5
+ Solrizer::Extractor.send(:include, Solrizer::XML::Extractor)
data/solrizer.gemspec CHANGED
@@ -13,7 +13,6 @@ Gem::Specification.new do |s|
13
13
  s.description = %q{Use solrizer to populate solr indexes. You can run solrizer from within your app, using the provided rake tasks, or as a JMS listener}
14
14
 
15
15
  s.add_dependency "nokogiri"
16
- s.add_dependency "om", ">=1.5.0"
17
16
  s.add_dependency "xml-simple"
18
17
  s.add_dependency "mediashelf-loggable", "~>0.4.7"
19
18
  s.add_dependency "stomp"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: solrizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0.pre7
4
+ version: 3.0.0.pre8
5
5
  prerelease: 6
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-02 00:00:00.000000000 Z
12
+ date: 2013-02-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -27,22 +27,6 @@ dependencies:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
- - !ruby/object:Gem::Dependency
31
- name: om
32
- requirement: !ruby/object:Gem::Requirement
33
- none: false
34
- requirements:
35
- - - ">="
36
- - !ruby/object:Gem::Version
37
- version: 1.5.0
38
- type: :runtime
39
- prerelease: false
40
- version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
- requirements:
43
- - - ">="
44
- - !ruby/object:Gem::Version
45
- version: 1.5.0
46
30
  - !ruby/object:Gem::Dependency
47
31
  name: xml-simple
48
32
  requirement: !ruby/object:Gem::Requirement
@@ -218,25 +202,16 @@ files:
218
202
  - lib/solrizer/version.rb
219
203
  - lib/solrizer/xml.rb
220
204
  - lib/solrizer/xml/extractor.rb
221
- - lib/solrizer/xml/terminology_based_solrizer.rb
222
205
  - lib/tasks/solrizer.rake
223
206
  - solrizer.gemspec
224
207
  - spec/.rspec
225
208
  - spec/fixtures/druid-bv448hq0314-descMetadata.xml
226
- - spec/fixtures/druid-bv448hq0314-extProperties.xml
227
- - spec/fixtures/druid-cm234kq4672-extProperties.xml
228
- - spec/fixtures/druid-cm234kq4672-stories.xml
229
- - spec/fixtures/druid-hc513kw4806-descMetadata.xml
230
- - spec/fixtures/mods_article.rb
231
- - spec/fixtures/mods_articles/hydrangea_article1.xml
232
- - spec/fixtures/test_solr_mappings.yml
233
209
  - spec/spec_helper.rb
234
210
  - spec/units/common_spec.rb
235
211
  - spec/units/extractor_spec.rb
236
212
  - spec/units/field_mapper_spec.rb
237
213
  - spec/units/solrizer_spec.rb
238
214
  - spec/units/xml_extractor_spec.rb
239
- - spec/units/xml_terminology_based_solrizer_spec.rb
240
215
  homepage: http://github.com/projecthydra/solrizer
241
216
  licenses: []
242
217
  post_install_message:
@@ -249,6 +224,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
249
224
  - - ">="
250
225
  - !ruby/object:Gem::Version
251
226
  version: '0'
227
+ segments:
228
+ - 0
229
+ hash: 1646376342601153569
252
230
  required_rubygems_version: !ruby/object:Gem::Requirement
253
231
  none: false
254
232
  requirements:
@@ -264,18 +242,10 @@ summary: A utility for building solr indexes, usually from Fedora repository con
264
242
  with solrizer-fedora extension gem.
265
243
  test_files:
266
244
  - spec/fixtures/druid-bv448hq0314-descMetadata.xml
267
- - spec/fixtures/druid-bv448hq0314-extProperties.xml
268
- - spec/fixtures/druid-cm234kq4672-extProperties.xml
269
- - spec/fixtures/druid-cm234kq4672-stories.xml
270
- - spec/fixtures/druid-hc513kw4806-descMetadata.xml
271
- - spec/fixtures/mods_article.rb
272
- - spec/fixtures/mods_articles/hydrangea_article1.xml
273
- - spec/fixtures/test_solr_mappings.yml
274
245
  - spec/spec_helper.rb
275
246
  - spec/units/common_spec.rb
276
247
  - spec/units/extractor_spec.rb
277
248
  - spec/units/field_mapper_spec.rb
278
249
  - spec/units/solrizer_spec.rb
279
250
  - spec/units/xml_extractor_spec.rb
280
- - spec/units/xml_terminology_based_solrizer_spec.rb
281
251
  has_rdoc:
@@ -1,92 +0,0 @@
1
- # This module is only suitable to mix into Classes that use the OM::XML::Document Module
2
- module Solrizer::XML::TerminologyBasedSolrizer
3
- def self.included(klass)
4
- klass.send(:include, Solrizer::Common)
5
- klass.send(:extend, ClassMethods)
6
- end
7
-
8
- # Module Methods
9
- module ClassMethods
10
-
11
- # Build a solr document from +doc+ based on its terminology
12
- # @param [OM::XML::Document] doc
13
- # @param [Hash] (optional) solr_doc (values hash) to populate
14
- def solrize(doc, solr_doc=Hash.new, field_mapper = nil)
15
- unless doc.class.terminology.nil?
16
- doc.class.terminology.terms.each_pair do |term_name,term|
17
- doc.solrize_term(term, solr_doc, field_mapper)
18
- end
19
- end
20
-
21
- return solr_doc
22
- end
23
-
24
- # Populate a solr document with fields based on nodes in +xml+
25
- # Values for a term are gathered by to +term_pointer+ using OM::XML::TermValueOperators.term_values
26
- # and are deserialized by OM according to :type, as determined in its terminology.
27
- # The content of the actual field in solr is each +node+ of the +nodeset+ returned by OM,
28
- # rendered to a string.
29
- # @param [OM::XML::Document] doc xml document to extract values from
30
- # @param [OM::XML::Term] term corresponding to desired xml values
31
- # @param [Hash] (optional) solr_doc (values hash) to populate
32
- def solrize_term(doc, term, solr_doc = Hash.new, field_mapper = nil, opts={})
33
- parents = opts.fetch(:parents, [])
34
- term_pointer = parents+[term.name]
35
- nodeset = doc.term_values(*term_pointer)
36
-
37
- nodeset.each do |n|
38
- doc.solrize_node(n, term_pointer, term, solr_doc, field_mapper)
39
- # FIXME: there should be no dependencies on OM in Solrizer
40
- unless term.kind_of? OM::XML::NamedTermProxy
41
- term.children.each_pair do |child_term_name, child_term|
42
- doc.solrize_term(child_term, solr_doc, field_mapper, opts={:parents=>parents+[{term.name=>nodeset.index(n)}]})
43
- end
44
- end
45
- end
46
- solr_doc
47
- end
48
-
49
- # Populate a solr document with solr fields corresponding to the given xml node
50
- # Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
51
- # If the supplied term does not have an index_as attribute, no indexing will be performed.
52
- # @param [Nokogiri::XML::Node] node to solrize
53
- # @param [OM::XML::Document] doc document the node came from
54
- # @param [Array] term_pointer Array pointing to the term that should be used for solrization settings
55
- # @param [Term] term the term to be solrized
56
- # @param [Hash] (optional) solr_doc (values hash) to populate
57
- # @return [Hash] the solr doc
58
- def solrize_node(node_value, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
59
- return solr_doc unless term.index_as && !term.index_as.empty?
60
-
61
- # FIXME: there should be no dependencies on OM in Solrizer
62
- generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
63
- create_and_insert_terms(generic_field_name_base, node_value, term.index_as, solr_doc)
64
-
65
- if term_pointer.length > 1
66
- # FIXME: there should be no dependencies on OM in Solrizer
67
- hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
68
- create_and_insert_terms(hierarchical_field_name_base, node_value, term.index_as, solr_doc)
69
- end
70
- solr_doc
71
- end
72
-
73
- end
74
-
75
-
76
- # Instance Methods
77
-
78
- attr_accessor :field_mapper
79
-
80
- def to_solr(solr_doc = Hash.new, field_mapper = self.field_mapper) # :nodoc:
81
- self.class.solrize(self, solr_doc, field_mapper)
82
- end
83
-
84
- def solrize_term(term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
85
- self.class.solrize_term(self, term, solr_doc, field_mapper, opts)
86
- end
87
-
88
- def solrize_node(node, term_pointer, term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
89
- self.class.solrize_node(node, self, term_pointer, term, solr_doc, field_mapper, opts)
90
- end
91
-
92
- end
@@ -1,52 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <document>
3
- <attributes>
4
- <attribute type="item">5958</attribute>
5
- <attribute type="objectid">FEI0010-00013142</attribute>
6
- <attribute type="title">Letter from Ellie Engelmore to Professor K. C. Reddy</attribute>
7
- <attribute type="copyright">Presumed under copyright. Do not publish.</attribute>
8
- <attribute type="description"/>
9
- <attribute type="date">1985-12-30</attribute>
10
- <attribute type="datestr">30/12/1985</attribute>
11
- <attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00013142.pdf</attribute>
12
- <attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00013142.png</attribute>
13
- <attribute type="url"/>
14
- <attribute type="industryterm"/>
15
- <attribute type="technology">artificial intelligence</attribute>
16
- <attribute type="company"/>
17
- <attribute type="person">ELLIE ENGELMORE</attribute>
18
- <attribute type="year">1985</attribute>
19
- <attribute type="organization">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</attribute>
20
- <attribute type="sourcelocation">Folder 15</attribute>
21
- </attributes>
22
- <facets>
23
- <facet type="year" id="49">1980s</facet>
24
- <facet type="year" id="49">1985</facet>
25
- <facet type="year" id="42">1980s</facet>
26
- <facet type="sourcelocation" id="592">Feigenbaum</facet>
27
- <facet type="sourcelocation" id="592">eaf7000</facet>
28
- <facet type="sourcelocation" id="592">Box 51A</facet>
29
- <facet type="sourcelocation" id="594">Feigenbaum</facet>
30
- <facet type="sourcelocation" id="594">eaf7000</facet>
31
- <facet type="sourcelocation" id="594">Box 51A</facet>
32
- <facet type="sourcelocation" id="594">Folder 15</facet>
33
- <facet type="sourcelocation" id="691">Feigenbaum</facet>
34
- <facet type="sourcelocation" id="692">Feigenbaum</facet>
35
- <facet type="sourcelocation" id="692">eaf7000</facet>
36
- <facet type="doctype" id="32">Correspondence</facet>
37
- <facet type="city" id="82">Ann Arbor</facet>
38
- <facet type="city" id="910">Hyderabad</facet>
39
- <facet type="city" id="1519">Palo Alto</facet>
40
- <facet type="country" id="68">India</facet>
41
- <facet type="emailaddress" id="288">EENGELMORE@SUMEX-AIM.ARPA</facet>
42
- <facet type="organization" id="5065">Heuristic Programming Project</facet>
43
- <facet type="organization" id="7012">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</facet>
44
- <facet type="organization" id="8878">Professor K. C. Reddy School of Mathematics and Computer/Information Sciences</facet>
45
- <facet type="person" id="5810">ELLIE ENGELMORE</facet>
46
- <facet type="person" id="17934">Reddy</facet>
47
- <facet type="person" id="5787">EDWARD FEIGENBAUM</facet>
48
- <facet type="provinceorstate" id="96">Michigan</facet>
49
- <facet type="provinceorstate" id="27">California</facet>
50
- <facet type="technology" id="1713">artificial intelligence</facet>
51
- </facets>
52
- </document>
@@ -1,5 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <document>
3
- <attributes><attribute type="item">4290</attribute><attribute type="objectid">FEI0010-00011325</attribute><attribute type="title">Letter from Ellie Engelmore to Wemara Lichty</attribute><attribute type="copyright">Presumed under copyright. Do not publish.</attribute><attribute type="description"/><attribute type="date">1984-6-4</attribute><attribute type="datestr">4/6/1984</attribute><attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00011325.pdf</attribute><attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00011325.png</attribute><attribute type="url"/><attribute type="industryterm"/><attribute type="technology">artificial intelligence</attribute><attribute type="company"/><attribute type="person">A. FEIGENBAUM</attribute><attribute type="year">1984</attribute><attribute type="organization">McAlcster Hall University</attribute><attribute type="sourcelocation">Folder 5</attribute></attributes>
4
- <facets><facet type="year" id="48">1980s</facet><facet type="year" id="48">1984</facet><facet type="year" id="42">1980s</facet><facet type="sourcelocation" id="578">Feigenbaum</facet><facet type="sourcelocation" id="578">eaf7000</facet><facet type="sourcelocation" id="578">Box 51</facet><facet type="sourcelocation" id="587">Feigenbaum</facet><facet type="sourcelocation" id="587">eaf7000</facet><facet type="sourcelocation" id="587">Box 51</facet><facet type="sourcelocation" id="587">Folder 5</facet><facet type="sourcelocation" id="692">Feigenbaum</facet><facet type="sourcelocation" id="692">eaf7000</facet><facet type="sourcelocation" id="691">Feigenbaum</facet><facet type="doctype" id="32">Correspondence</facet><facet type="city" id="1948">Stanford</facet><facet type="country" id="33">Columbia</facet><facet type="facility" id="2551">U. Missouri library</facet><facet type="organization" id="5065">Heuristic Programming Project</facet><facet type="organization" id="7026">McAlcster Hall University</facet><facet type="organization" id="9645">STANFORD UNIVERSITY</facet><facet type="organization" id="11964">University of Missouri</facet><facet type="organization" id="12407">Wemara Lichty Psychology Department</facet><facet type="person" id="15650">Morton Hunt</facet><facet type="person" id="37">A. FEIGENBAUM</facet><facet type="person" id="5810">ELLIE ENGELMORE</facet><facet type="provinceorstate" id="27">California</facet><facet type="provinceorstate" id="100">Missouri</facet><facet type="publishedmedium" id="44">Artificial Intelligence</facet><facet type="technology" id="1713">artificial intelligence</facet></facets>
5
- </document>
@@ -1,17 +0,0 @@
1
-
2
- <html>
3
- <body>
4
- <pre>
5
- This is
6
- preformatted text.
7
- It preserves both spaces
8
- and line breaks.
9
- </pre>
10
- <p>The pre tag is good for displaying computer code:</p>
11
- <pre>
12
- for i = 1 to 10
13
- print i
14
- next i
15
- </pre>
16
- </body>
17
- </html>
@@ -1,11 +0,0 @@
1
- <dc xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
2
- <dcterms:type xsi:type="DCMITYPE">text</dcterms:type>
3
- <dcterms:medium>Paper Document</dcterms:medium>
4
- <dcterms:rights>Copyright © 2006 All rights reserved. Distribution for commercial purposes is prohibited.</dcterms:rights>
5
- <dcterms:date/>
6
- <dcterms:format>application/tiff</dcterms:format>
7
- <dcterms:format>application/jp2000</dcterms:format>
8
- <dcterms:format>application/pdf</dcterms:format>
9
- <dcterms:title>The Rise and Fall of the YouTube Empire</dcterms:title>
10
- <dcterms:publisher>Sample Unversity</dcterms:publisher>
11
- </dc>
@@ -1,90 +0,0 @@
1
- module Samples
2
- class ModsArticle
3
-
4
- include OM::XML::Document
5
-
6
- set_terminology do |t|
7
- t.root(:path=>"mods", :xmlns=>"http://www.loc.gov/mods/v3", :schema=>"http://www.loc.gov/standards/mods/v3/mods-3-2.xsd", "xmlns:foo"=>"http://my.custom.namespace")
8
-
9
-
10
- t.title_info(:path=>"titleInfo") {
11
- t.main_title(:index_as=>[:facetable],:path=>"title", :label=>"title") {
12
- t.main_title_lang(:path=>{:attribute=> "xml:lang"})
13
- }
14
- t.french_title(:ref=>[:title_info,:main_title], :attributes=>{"xml:lang"=>"fre"})
15
-
16
- t.language(:index_as=>[:facetable, :stored_searchable],:path=>{:attribute=>"lang"})
17
- }
18
- t.language{
19
- t.lang_code(:index_as=>[:facetable], :path=>"languageTerm", :attributes=>{:type=>"code"})
20
- }
21
- t.abstract(:index_as=>[:stored_searchable])
22
- t.subject {
23
- t.topic(:index_as=>[:facetable])
24
- }
25
- t.topic_tag(:proxy=>[:subject, :topic], :index_as=>[:stored_searchable])
26
- # t.topic_tag(:index_as=>[:facetable],:path=>"subject", :default_content_path=>"topic")
27
- # This is a mods:name. The underscore is purely to avoid namespace conflicts.
28
- t.name_ {
29
- # this is a namepart
30
- t.namePart(:type=>:string, :label=>"generic name")
31
- # affiliations are great
32
- t.affiliation
33
- t.institution(:path=>"affiliation", :index_as=>[:facetable], :label=>"organization")
34
- t.displayForm
35
- t.role(:ref=>[:role])
36
- t.description(:index_as=>[:facetable])
37
- t.date(:path=>"namePart", :attributes=>{:type=>"date"})
38
- t.last_name(:path=>"namePart", :attributes=>{:type=>"family"}, :index_as=>[:stored_searchable])
39
- t.first_name(:path=>"namePart", :attributes=>{:type=>"given"}, :label=>"first name")
40
- t.terms_of_address(:path=>"namePart", :attributes=>{:type=>"termsOfAddress"})
41
- t.computing_id
42
- t.name_content(:path=>"text()")
43
- }
44
- # lookup :person, :first_name
45
- t.person(:ref=>:name, :attributes=>{:type=>"personal"}, :index_as=>[:facetable])
46
- t.department(:proxy=>[:person,:description],:index_as=>[:facetable])
47
- t.organization(:ref=>:name, :attributes=>{:type=>"corporate"}, :index_as=>[:facetable])
48
- t.conference(:ref=>:name, :attributes=>{:type=>"conference"}, :index_as=>[:facetable])
49
- t.role {
50
- t.text(:path=>"roleTerm",:attributes=>{:type=>"text"}, :index_as=>[:stored_searchable])
51
- t.code(:path=>"roleTerm",:attributes=>{:type=>"code"})
52
- }
53
- t.journal(:path=>'relatedItem', :attributes=>{:type=>"host"}) {
54
- t.title_info(:index_as=>[:facetable],:ref=>[:title_info])
55
- t.origin_info(:path=>"originInfo") {
56
- t.publisher
57
- t.date_issued(:path=>"dateIssued", :type => :date, :index_as => [:stored_searchable])
58
- t.issuance(:index_as=>[:facetable])
59
- }
60
- t.issn(:path=>"identifier", :attributes=>{:type=>"issn"})
61
- t.issue(:path=>"part") {
62
- t.volume(:path=>"detail", :attributes=>{:type=>"volume"}, :default_content_path=>"number")
63
- t.level(:path=>"detail", :attributes=>{:type=>"number"}, :default_content_path=>"number")
64
- t.extent
65
- t.pages(:path=>"extent", :attributes=>{:unit=>"pages"}) {
66
- t.start
67
- t.end
68
- }
69
- t.start_page(:proxy=>[:pages, :start])
70
- t.end_page(:proxy=>[:pages, :end])
71
- t.publication_date(:path=>"date", :type => :date, :index_as => [:stored_searchable])
72
- }
73
- }
74
- t.note
75
- t.location(:path=>"location") {
76
- t.url(:path=>"url")
77
- }
78
- t.publication_url(:proxy=>[:location,:url])
79
- t.title(:proxy=>[:title_info, :main_title])
80
- t.journal_title(:proxy=>[:journal, :title_info, :main_title])
81
- t.pub_date(:proxy=>[:journal, :issue, :publication_date])
82
- t.issue_date(:ref=>[:journal, :origin_info, :date_issued], :type=> :date)
83
- end
84
-
85
- # Changes from OM::Properties implementation
86
- # renamed family_name => last_name
87
- # start_page & end_page now accessible as [:journal, :issue, :pages, :start] (etc.)
88
-
89
- end
90
- end
@@ -1,90 +0,0 @@
1
- <mods version="3.0" xsi:schemaLocation="http://www.loc.gov/mods/v3
2
- http://www.loc.gov/standards/mods/v3/mods-3-0.xsd" xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
-
4
- <titleInfo>
5
- <nonSort>THE</nonSort>
6
- <title xml:lang="eng">ARTICLE TITLE HYDRANGEA ARTICLE 1</title>
7
- <subTitle>SUBTITLE</subTitle>
8
- </titleInfo>
9
- <titleInfo lang="finnish">
10
- <title>Artikkelin otsikko Hydrangea artiklan 1</title>
11
- </titleInfo>
12
-
13
- <name type="personal">
14
- <namePart type="family">FAMILY NAME</namePart>
15
- <namePart type="given">GIVEN NAMES</namePart>
16
- <namePart type="termsOfAddress">DR.</namePart>
17
- <displayForm>NAME AS IT APPEARS</displayForm>
18
- <affiliation>FACULTY, UNIVERSITY</affiliation>
19
- <role>
20
- <roleTerm authority="marcrelator" type="text">creator</roleTerm>
21
- </role>
22
- <role>
23
- <roleTerm type="text">submitter</roleTerm>
24
- </role>
25
- </name>
26
-
27
- <name type="personal">
28
- <namePart type="family">Gautama</namePart>
29
- <namePart type="given">Siddartha</namePart>
30
- <namePart type="termsOfAddress">Prince</namePart>
31
- <affiliation>Nirvana</affiliation>
32
- <role>
33
- <roleTerm authority="marcrelator" type="text">teacher</roleTerm>
34
- </role>
35
- </name>
36
-
37
- <typeOfResource>text</typeOfResource>
38
- <genre authority="local">journal article</genre>
39
-
40
- <abstract>ABSTRACT</abstract>
41
- <subject>
42
- <topic>TOPIC 1</topic>
43
- <topic>TOPIC 2</topic>
44
- </subject>
45
- <subject authority="AUTHORITY SOURCE (RFCD, LCSH)">
46
- <topic>CONTROLLED TERM</topic>
47
- </subject>
48
-
49
- <language>
50
- <languageTerm authority="iso639-2b" type="code">en-aus </languageTerm>
51
- </language>
52
-
53
- <physicalDescription>
54
- <internetMediaType>application/pdf</internetMediaType>
55
- <extent>36 p.</extent>
56
- </physicalDescription>
57
-
58
- <relatedItem type="host">
59
- <titleInfo>
60
- <title>TITLE OF HOST JOURNAL</title>
61
- </titleInfo>
62
- <originInfo>
63
- <publisher>PUBLISHER</publisher>
64
- <dateIssued>2007-02-15</dateIssued>
65
- </originInfo>
66
- <identifier type="issn">0013-8908</identifier>
67
- <part>
68
- <detail type="volume">
69
- <number>2</number>
70
- </detail>
71
- <detail type="level">
72
- <number>2</number>
73
- </detail>
74
- <extent unit="pages">
75
- <start>195</start>
76
- <end>230</end>
77
- </extent>
78
- <date>2007-02-01</date>
79
- </part>
80
- </relatedItem>
81
-
82
- <identifier type="uri">http://URL.edu.au/</identifier>
83
- <identifier type="doi">doi:10.1006/jmbi.1995.0238</identifier>
84
- <location>
85
- <url>http://URL.edu.au/</url>
86
- </location>
87
- <accessCondition type="restrictionOnAccess">EMBARGO NOTE</accessCondition>
88
- <accessCondition type="use and reproduction">OPEN ACCESS</accessCondition>
89
-
90
- </mods>
@@ -1,16 +0,0 @@
1
- id: pid
2
- default: edible
3
- edible:
4
- date: _edible_date
5
- string: _edible_string
6
- text: _edible_text
7
- symbol: _edible_sym
8
- integer: _edible_int
9
- long: _edible_long
10
- boolean: _edible_bool
11
- float: _edible_float
12
- double: _edible_double
13
- displayable: _display
14
- facetable: _facet
15
- sortable: _sort
16
- unstemmed_searchable: _unstem_search
@@ -1,109 +0,0 @@
1
- require 'spec_helper'
2
- require 'fixtures/mods_article'
3
-
4
- # TODO: there should be no dependencies on OM in Solrizer
5
- describe Solrizer::XML::TerminologyBasedSolrizer do
6
-
7
- before(:all) do
8
- Samples::ModsArticle.send(:include, Solrizer::XML::TerminologyBasedSolrizer)
9
- end
10
-
11
- before(:each) do
12
- article_xml = fixture( File.join("mods_articles", "hydrangea_article1.xml") )
13
- @mods_article = Samples::ModsArticle.from_xml(article_xml)
14
- end
15
-
16
- describe ".to_solr" do
17
-
18
- it "should provide .to_solr and return a SolrDocument" do
19
- @mods_article.should respond_to(:to_solr)
20
- @mods_article.to_solr.should be_kind_of(Hash)
21
- end
22
-
23
- it "should optionally allow you to provide the Hash to add fields to and return that document when done" do
24
- doc = Hash.new
25
- @mods_article.to_solr(doc).should equal(doc)
26
- end
27
-
28
- it "should iterate through the terminology terms, calling .solrize_term on each and passing in the solr doc" do
29
- solr_doc = Hash.new
30
- @mods_article.field_mapper = Solrizer::FieldMapper.new
31
- Samples::ModsArticle.terminology.terms.each_pair do |k,v|
32
- @mods_article.should_receive(:solrize_term).with(v, solr_doc, @mods_article.field_mapper)
33
- end
34
- @mods_article.to_solr(solr_doc)
35
- end
36
-
37
- it "should use Solr mappings to generate field names" do
38
- solr_doc = @mods_article.to_solr
39
- solr_doc["abstract"].should be_nil
40
- # NOTE: OM's old default expected stored and indexed; this is a change.
41
- solr_doc["abstract_tesim"].should == ["ABSTRACT"]
42
- solr_doc["title_info_1_language_tesim"].should == ["finnish"]
43
- solr_doc["person_1_role_0_text_tesim"].should == ["teacher"]
44
- # No index_as on the code field.
45
- solr_doc["person_1_role_0_code_tesim"].should be_nil
46
- solr_doc["person_last_name_tesim"].sort.should == ["FAMILY NAME", "Gautama"]
47
- solr_doc["topic_tag_tesim"].sort.should == ["CONTROLLED TERM", "TOPIC 1", "TOPIC 2"]
48
- # These are a holdover from an old verison of OM
49
- solr_doc['journal_0_issue_0_publication_date_dtsim'].should == ["2007-02-01T00:00:00Z"]
50
- end
51
-
52
- end
53
-
54
- describe ".solrize_term" do
55
-
56
- it "should add fields to a solr document for all nodes corresponding to the given term and its children" do
57
- solr_doc = Hash.new
58
- result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
59
- result.should == solr_doc
60
- end
61
-
62
- it "should add multiple fields based on index_as" do
63
- fake_solr_doc = {}
64
- term = Samples::ModsArticle.terminology.retrieve_term(:name)
65
- term.children[:namePart].index_as = [:searchable, :displayable, :facetable]
66
-
67
- @mods_article.solrize_term(term, fake_solr_doc)
68
-
69
- expected_names = ["DR.", "FAMILY NAME", "GIVEN NAMES"]
70
- %w(_teim _sim).each do |suffix|
71
- actual_names = fake_solr_doc["name_0_namePart#{suffix}"].sort
72
- actual_names.should == expected_names
73
- end
74
- end
75
-
76
- it "should add fields based on type using proxy" do
77
- unless RUBY_VERSION.match("1.8.7")
78
- solr_doc = Hash.new
79
- result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:pub_date), solr_doc)
80
- solr_doc["pub_date_dtsim"].should == ["2007-02-01T00:00:00Z"]
81
- end
82
- end
83
-
84
- it "should add fields based on type using ref" do
85
- solr_doc = Hash.new
86
- result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:issue_date), solr_doc)
87
- solr_doc["issue_date_dtsim"].should == ["2007-02-15T00:00:00Z"]
88
- end
89
-
90
- it "shouldn't index terms where index_as is an empty array" do
91
- fake_solr_doc = {}
92
- term = Samples::ModsArticle.terminology.retrieve_term(:name)
93
- term.children[:namePart].index_as = []
94
-
95
- @mods_article.solrize_term(term, fake_solr_doc)
96
- fake_solr_doc["name_0_namePart_teim"].should be_nil
97
- end
98
-
99
- it "should index terms where index_as is searchable" do
100
- fake_solr_doc = {}
101
- term = Samples::ModsArticle.terminology.retrieve_term(:name)
102
- term.children[:namePart].index_as = [:searchable]
103
-
104
- @mods_article.solrize_term(term, fake_solr_doc)
105
-
106
- fake_solr_doc["name_0_namePart_teim"].sort.should == ["DR.", "FAMILY NAME", "GIVEN NAMES"]
107
- end
108
- end
109
- end