solrizer 3.0.0.pre7 → 3.0.0.pre8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/solrizer/extractor.rb +0 -3
- data/lib/solrizer/field_mapper.rb +0 -2
- data/lib/solrizer/version.rb +1 -1
- data/lib/solrizer/xml/extractor.rb +1 -1
- data/lib/solrizer/xml.rb +1 -3
- data/solrizer.gemspec +0 -1
- metadata +5 -35
- data/lib/solrizer/xml/terminology_based_solrizer.rb +0 -92
- data/spec/fixtures/druid-bv448hq0314-extProperties.xml +0 -52
- data/spec/fixtures/druid-cm234kq4672-extProperties.xml +0 -5
- data/spec/fixtures/druid-cm234kq4672-stories.xml +0 -17
- data/spec/fixtures/druid-hc513kw4806-descMetadata.xml +0 -11
- data/spec/fixtures/mods_article.rb +0 -90
- data/spec/fixtures/mods_articles/hydrangea_article1.xml +0 -90
- data/spec/fixtures/test_solr_mappings.yml +0 -16
- data/spec/units/xml_terminology_based_solrizer_spec.rb +0 -109
data/lib/solrizer/extractor.rb
CHANGED
@@ -34,8 +34,6 @@ module Solrizer
|
|
34
34
|
# # t.dish_name :index_as => [:some_field_type] -maps to-> dish_name_ssim
|
35
35
|
# # t.ingredients :index_as => [:some_field_type, :edible] -maps to-> ingredients_ssim, ingredients_food
|
36
36
|
#
|
37
|
-
# (See Solrizer::XML::TerminologyBasedSolrizer for instructions on applying a custom mapping once you have defined it.)
|
38
|
-
#
|
39
37
|
#
|
40
38
|
# == Custom Value Converters
|
41
39
|
#
|
data/lib/solrizer/version.rb
CHANGED
@@ -4,7 +4,7 @@ module Solrizer::XML::Extractor
|
|
4
4
|
|
5
5
|
#
|
6
6
|
# This method extracts solr fields from simple xml
|
7
|
-
# If you want to do anything more nuanced with the xml, use
|
7
|
+
# If you want to do anything more nuanced with the xml, use OM instead.
|
8
8
|
#
|
9
9
|
# @param [xml] text xml content to index
|
10
10
|
# @param [Hash] solr_doc
|
data/lib/solrizer/xml.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
|
-
require "solrizer"
|
2
|
-
require "om"
|
3
1
|
module Solrizer::XML
|
4
2
|
end
|
5
3
|
Dir[File.join(File.dirname(__FILE__),"xml","*.rb")].each {|file| require file }
|
6
4
|
|
7
|
-
Solrizer::Extractor.send(:include, Solrizer::XML::Extractor)
|
5
|
+
Solrizer::Extractor.send(:include, Solrizer::XML::Extractor)
|
data/solrizer.gemspec
CHANGED
@@ -13,7 +13,6 @@ Gem::Specification.new do |s|
|
|
13
13
|
s.description = %q{Use solrizer to populate solr indexes. You can run solrizer from within your app, using the provided rake tasks, or as a JMS listener}
|
14
14
|
|
15
15
|
s.add_dependency "nokogiri"
|
16
|
-
s.add_dependency "om", ">=1.5.0"
|
17
16
|
s.add_dependency "xml-simple"
|
18
17
|
s.add_dependency "mediashelf-loggable", "~>0.4.7"
|
19
18
|
s.add_dependency "stomp"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: solrizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.0.
|
4
|
+
version: 3.0.0.pre8
|
5
5
|
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-02-
|
12
|
+
date: 2013-02-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -27,22 +27,6 @@ dependencies:
|
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0'
|
30
|
-
- !ruby/object:Gem::Dependency
|
31
|
-
name: om
|
32
|
-
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
|
-
requirements:
|
35
|
-
- - ">="
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
version: 1.5.0
|
38
|
-
type: :runtime
|
39
|
-
prerelease: false
|
40
|
-
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
|
-
requirements:
|
43
|
-
- - ">="
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
version: 1.5.0
|
46
30
|
- !ruby/object:Gem::Dependency
|
47
31
|
name: xml-simple
|
48
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -218,25 +202,16 @@ files:
|
|
218
202
|
- lib/solrizer/version.rb
|
219
203
|
- lib/solrizer/xml.rb
|
220
204
|
- lib/solrizer/xml/extractor.rb
|
221
|
-
- lib/solrizer/xml/terminology_based_solrizer.rb
|
222
205
|
- lib/tasks/solrizer.rake
|
223
206
|
- solrizer.gemspec
|
224
207
|
- spec/.rspec
|
225
208
|
- spec/fixtures/druid-bv448hq0314-descMetadata.xml
|
226
|
-
- spec/fixtures/druid-bv448hq0314-extProperties.xml
|
227
|
-
- spec/fixtures/druid-cm234kq4672-extProperties.xml
|
228
|
-
- spec/fixtures/druid-cm234kq4672-stories.xml
|
229
|
-
- spec/fixtures/druid-hc513kw4806-descMetadata.xml
|
230
|
-
- spec/fixtures/mods_article.rb
|
231
|
-
- spec/fixtures/mods_articles/hydrangea_article1.xml
|
232
|
-
- spec/fixtures/test_solr_mappings.yml
|
233
209
|
- spec/spec_helper.rb
|
234
210
|
- spec/units/common_spec.rb
|
235
211
|
- spec/units/extractor_spec.rb
|
236
212
|
- spec/units/field_mapper_spec.rb
|
237
213
|
- spec/units/solrizer_spec.rb
|
238
214
|
- spec/units/xml_extractor_spec.rb
|
239
|
-
- spec/units/xml_terminology_based_solrizer_spec.rb
|
240
215
|
homepage: http://github.com/projecthydra/solrizer
|
241
216
|
licenses: []
|
242
217
|
post_install_message:
|
@@ -249,6 +224,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
249
224
|
- - ">="
|
250
225
|
- !ruby/object:Gem::Version
|
251
226
|
version: '0'
|
227
|
+
segments:
|
228
|
+
- 0
|
229
|
+
hash: 1646376342601153569
|
252
230
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
253
231
|
none: false
|
254
232
|
requirements:
|
@@ -264,18 +242,10 @@ summary: A utility for building solr indexes, usually from Fedora repository con
|
|
264
242
|
with solrizer-fedora extension gem.
|
265
243
|
test_files:
|
266
244
|
- spec/fixtures/druid-bv448hq0314-descMetadata.xml
|
267
|
-
- spec/fixtures/druid-bv448hq0314-extProperties.xml
|
268
|
-
- spec/fixtures/druid-cm234kq4672-extProperties.xml
|
269
|
-
- spec/fixtures/druid-cm234kq4672-stories.xml
|
270
|
-
- spec/fixtures/druid-hc513kw4806-descMetadata.xml
|
271
|
-
- spec/fixtures/mods_article.rb
|
272
|
-
- spec/fixtures/mods_articles/hydrangea_article1.xml
|
273
|
-
- spec/fixtures/test_solr_mappings.yml
|
274
245
|
- spec/spec_helper.rb
|
275
246
|
- spec/units/common_spec.rb
|
276
247
|
- spec/units/extractor_spec.rb
|
277
248
|
- spec/units/field_mapper_spec.rb
|
278
249
|
- spec/units/solrizer_spec.rb
|
279
250
|
- spec/units/xml_extractor_spec.rb
|
280
|
-
- spec/units/xml_terminology_based_solrizer_spec.rb
|
281
251
|
has_rdoc:
|
@@ -1,92 +0,0 @@
|
|
1
|
-
# This module is only suitable to mix into Classes that use the OM::XML::Document Module
|
2
|
-
module Solrizer::XML::TerminologyBasedSolrizer
|
3
|
-
def self.included(klass)
|
4
|
-
klass.send(:include, Solrizer::Common)
|
5
|
-
klass.send(:extend, ClassMethods)
|
6
|
-
end
|
7
|
-
|
8
|
-
# Module Methods
|
9
|
-
module ClassMethods
|
10
|
-
|
11
|
-
# Build a solr document from +doc+ based on its terminology
|
12
|
-
# @param [OM::XML::Document] doc
|
13
|
-
# @param [Hash] (optional) solr_doc (values hash) to populate
|
14
|
-
def solrize(doc, solr_doc=Hash.new, field_mapper = nil)
|
15
|
-
unless doc.class.terminology.nil?
|
16
|
-
doc.class.terminology.terms.each_pair do |term_name,term|
|
17
|
-
doc.solrize_term(term, solr_doc, field_mapper)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
return solr_doc
|
22
|
-
end
|
23
|
-
|
24
|
-
# Populate a solr document with fields based on nodes in +xml+
|
25
|
-
# Values for a term are gathered by to +term_pointer+ using OM::XML::TermValueOperators.term_values
|
26
|
-
# and are deserialized by OM according to :type, as determined in its terminology.
|
27
|
-
# The content of the actual field in solr is each +node+ of the +nodeset+ returned by OM,
|
28
|
-
# rendered to a string.
|
29
|
-
# @param [OM::XML::Document] doc xml document to extract values from
|
30
|
-
# @param [OM::XML::Term] term corresponding to desired xml values
|
31
|
-
# @param [Hash] (optional) solr_doc (values hash) to populate
|
32
|
-
def solrize_term(doc, term, solr_doc = Hash.new, field_mapper = nil, opts={})
|
33
|
-
parents = opts.fetch(:parents, [])
|
34
|
-
term_pointer = parents+[term.name]
|
35
|
-
nodeset = doc.term_values(*term_pointer)
|
36
|
-
|
37
|
-
nodeset.each do |n|
|
38
|
-
doc.solrize_node(n, term_pointer, term, solr_doc, field_mapper)
|
39
|
-
# FIXME: there should be no dependencies on OM in Solrizer
|
40
|
-
unless term.kind_of? OM::XML::NamedTermProxy
|
41
|
-
term.children.each_pair do |child_term_name, child_term|
|
42
|
-
doc.solrize_term(child_term, solr_doc, field_mapper, opts={:parents=>parents+[{term.name=>nodeset.index(n)}]})
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
solr_doc
|
47
|
-
end
|
48
|
-
|
49
|
-
# Populate a solr document with solr fields corresponding to the given xml node
|
50
|
-
# Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
|
51
|
-
# If the supplied term does not have an index_as attribute, no indexing will be performed.
|
52
|
-
# @param [Nokogiri::XML::Node] node to solrize
|
53
|
-
# @param [OM::XML::Document] doc document the node came from
|
54
|
-
# @param [Array] term_pointer Array pointing to the term that should be used for solrization settings
|
55
|
-
# @param [Term] term the term to be solrized
|
56
|
-
# @param [Hash] (optional) solr_doc (values hash) to populate
|
57
|
-
# @return [Hash] the solr doc
|
58
|
-
def solrize_node(node_value, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
|
59
|
-
return solr_doc unless term.index_as && !term.index_as.empty?
|
60
|
-
|
61
|
-
# FIXME: there should be no dependencies on OM in Solrizer
|
62
|
-
generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
|
63
|
-
create_and_insert_terms(generic_field_name_base, node_value, term.index_as, solr_doc)
|
64
|
-
|
65
|
-
if term_pointer.length > 1
|
66
|
-
# FIXME: there should be no dependencies on OM in Solrizer
|
67
|
-
hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
|
68
|
-
create_and_insert_terms(hierarchical_field_name_base, node_value, term.index_as, solr_doc)
|
69
|
-
end
|
70
|
-
solr_doc
|
71
|
-
end
|
72
|
-
|
73
|
-
end
|
74
|
-
|
75
|
-
|
76
|
-
# Instance Methods
|
77
|
-
|
78
|
-
attr_accessor :field_mapper
|
79
|
-
|
80
|
-
def to_solr(solr_doc = Hash.new, field_mapper = self.field_mapper) # :nodoc:
|
81
|
-
self.class.solrize(self, solr_doc, field_mapper)
|
82
|
-
end
|
83
|
-
|
84
|
-
def solrize_term(term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
|
85
|
-
self.class.solrize_term(self, term, solr_doc, field_mapper, opts)
|
86
|
-
end
|
87
|
-
|
88
|
-
def solrize_node(node, term_pointer, term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
|
89
|
-
self.class.solrize_node(node, self, term_pointer, term, solr_doc, field_mapper, opts)
|
90
|
-
end
|
91
|
-
|
92
|
-
end
|
@@ -1,52 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<document>
|
3
|
-
<attributes>
|
4
|
-
<attribute type="item">5958</attribute>
|
5
|
-
<attribute type="objectid">FEI0010-00013142</attribute>
|
6
|
-
<attribute type="title">Letter from Ellie Engelmore to Professor K. C. Reddy</attribute>
|
7
|
-
<attribute type="copyright">Presumed under copyright. Do not publish.</attribute>
|
8
|
-
<attribute type="description"/>
|
9
|
-
<attribute type="date">1985-12-30</attribute>
|
10
|
-
<attribute type="datestr">30/12/1985</attribute>
|
11
|
-
<attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00013142.pdf</attribute>
|
12
|
-
<attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00013142.png</attribute>
|
13
|
-
<attribute type="url"/>
|
14
|
-
<attribute type="industryterm"/>
|
15
|
-
<attribute type="technology">artificial intelligence</attribute>
|
16
|
-
<attribute type="company"/>
|
17
|
-
<attribute type="person">ELLIE ENGELMORE</attribute>
|
18
|
-
<attribute type="year">1985</attribute>
|
19
|
-
<attribute type="organization">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</attribute>
|
20
|
-
<attribute type="sourcelocation">Folder 15</attribute>
|
21
|
-
</attributes>
|
22
|
-
<facets>
|
23
|
-
<facet type="year" id="49">1980s</facet>
|
24
|
-
<facet type="year" id="49">1985</facet>
|
25
|
-
<facet type="year" id="42">1980s</facet>
|
26
|
-
<facet type="sourcelocation" id="592">Feigenbaum</facet>
|
27
|
-
<facet type="sourcelocation" id="592">eaf7000</facet>
|
28
|
-
<facet type="sourcelocation" id="592">Box 51A</facet>
|
29
|
-
<facet type="sourcelocation" id="594">Feigenbaum</facet>
|
30
|
-
<facet type="sourcelocation" id="594">eaf7000</facet>
|
31
|
-
<facet type="sourcelocation" id="594">Box 51A</facet>
|
32
|
-
<facet type="sourcelocation" id="594">Folder 15</facet>
|
33
|
-
<facet type="sourcelocation" id="691">Feigenbaum</facet>
|
34
|
-
<facet type="sourcelocation" id="692">Feigenbaum</facet>
|
35
|
-
<facet type="sourcelocation" id="692">eaf7000</facet>
|
36
|
-
<facet type="doctype" id="32">Correspondence</facet>
|
37
|
-
<facet type="city" id="82">Ann Arbor</facet>
|
38
|
-
<facet type="city" id="910">Hyderabad</facet>
|
39
|
-
<facet type="city" id="1519">Palo Alto</facet>
|
40
|
-
<facet type="country" id="68">India</facet>
|
41
|
-
<facet type="emailaddress" id="288">EENGELMORE@SUMEX-AIM.ARPA</facet>
|
42
|
-
<facet type="organization" id="5065">Heuristic Programming Project</facet>
|
43
|
-
<facet type="organization" id="7012">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</facet>
|
44
|
-
<facet type="organization" id="8878">Professor K. C. Reddy School of Mathematics and Computer/Information Sciences</facet>
|
45
|
-
<facet type="person" id="5810">ELLIE ENGELMORE</facet>
|
46
|
-
<facet type="person" id="17934">Reddy</facet>
|
47
|
-
<facet type="person" id="5787">EDWARD FEIGENBAUM</facet>
|
48
|
-
<facet type="provinceorstate" id="96">Michigan</facet>
|
49
|
-
<facet type="provinceorstate" id="27">California</facet>
|
50
|
-
<facet type="technology" id="1713">artificial intelligence</facet>
|
51
|
-
</facets>
|
52
|
-
</document>
|
@@ -1,5 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<document>
|
3
|
-
<attributes><attribute type="item">4290</attribute><attribute type="objectid">FEI0010-00011325</attribute><attribute type="title">Letter from Ellie Engelmore to Wemara Lichty</attribute><attribute type="copyright">Presumed under copyright. Do not publish.</attribute><attribute type="description"/><attribute type="date">1984-6-4</attribute><attribute type="datestr">4/6/1984</attribute><attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00011325.pdf</attribute><attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00011325.png</attribute><attribute type="url"/><attribute type="industryterm"/><attribute type="technology">artificial intelligence</attribute><attribute type="company"/><attribute type="person">A. FEIGENBAUM</attribute><attribute type="year">1984</attribute><attribute type="organization">McAlcster Hall University</attribute><attribute type="sourcelocation">Folder 5</attribute></attributes>
|
4
|
-
<facets><facet type="year" id="48">1980s</facet><facet type="year" id="48">1984</facet><facet type="year" id="42">1980s</facet><facet type="sourcelocation" id="578">Feigenbaum</facet><facet type="sourcelocation" id="578">eaf7000</facet><facet type="sourcelocation" id="578">Box 51</facet><facet type="sourcelocation" id="587">Feigenbaum</facet><facet type="sourcelocation" id="587">eaf7000</facet><facet type="sourcelocation" id="587">Box 51</facet><facet type="sourcelocation" id="587">Folder 5</facet><facet type="sourcelocation" id="692">Feigenbaum</facet><facet type="sourcelocation" id="692">eaf7000</facet><facet type="sourcelocation" id="691">Feigenbaum</facet><facet type="doctype" id="32">Correspondence</facet><facet type="city" id="1948">Stanford</facet><facet type="country" id="33">Columbia</facet><facet type="facility" id="2551">U. Missouri library</facet><facet type="organization" id="5065">Heuristic Programming Project</facet><facet type="organization" id="7026">McAlcster Hall University</facet><facet type="organization" id="9645">STANFORD UNIVERSITY</facet><facet type="organization" id="11964">University of Missouri</facet><facet type="organization" id="12407">Wemara Lichty Psychology Department</facet><facet type="person" id="15650">Morton Hunt</facet><facet type="person" id="37">A. FEIGENBAUM</facet><facet type="person" id="5810">ELLIE ENGELMORE</facet><facet type="provinceorstate" id="27">California</facet><facet type="provinceorstate" id="100">Missouri</facet><facet type="publishedmedium" id="44">Artificial Intelligence</facet><facet type="technology" id="1713">artificial intelligence</facet></facets>
|
5
|
-
</document>
|
@@ -1,17 +0,0 @@
|
|
1
|
-
|
2
|
-
<html>
|
3
|
-
<body>
|
4
|
-
<pre>
|
5
|
-
This is
|
6
|
-
preformatted text.
|
7
|
-
It preserves both spaces
|
8
|
-
and line breaks.
|
9
|
-
</pre>
|
10
|
-
<p>The pre tag is good for displaying computer code:</p>
|
11
|
-
<pre>
|
12
|
-
for i = 1 to 10
|
13
|
-
print i
|
14
|
-
next i
|
15
|
-
</pre>
|
16
|
-
</body>
|
17
|
-
</html>
|
@@ -1,11 +0,0 @@
|
|
1
|
-
<dc xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
2
|
-
<dcterms:type xsi:type="DCMITYPE">text</dcterms:type>
|
3
|
-
<dcterms:medium>Paper Document</dcterms:medium>
|
4
|
-
<dcterms:rights>Copyright © 2006 All rights reserved. Distribution for commercial purposes is prohibited.</dcterms:rights>
|
5
|
-
<dcterms:date/>
|
6
|
-
<dcterms:format>application/tiff</dcterms:format>
|
7
|
-
<dcterms:format>application/jp2000</dcterms:format>
|
8
|
-
<dcterms:format>application/pdf</dcterms:format>
|
9
|
-
<dcterms:title>The Rise and Fall of the YouTube Empire</dcterms:title>
|
10
|
-
<dcterms:publisher>Sample Unversity</dcterms:publisher>
|
11
|
-
</dc>
|
@@ -1,90 +0,0 @@
|
|
1
|
-
module Samples
|
2
|
-
class ModsArticle
|
3
|
-
|
4
|
-
include OM::XML::Document
|
5
|
-
|
6
|
-
set_terminology do |t|
|
7
|
-
t.root(:path=>"mods", :xmlns=>"http://www.loc.gov/mods/v3", :schema=>"http://www.loc.gov/standards/mods/v3/mods-3-2.xsd", "xmlns:foo"=>"http://my.custom.namespace")
|
8
|
-
|
9
|
-
|
10
|
-
t.title_info(:path=>"titleInfo") {
|
11
|
-
t.main_title(:index_as=>[:facetable],:path=>"title", :label=>"title") {
|
12
|
-
t.main_title_lang(:path=>{:attribute=> "xml:lang"})
|
13
|
-
}
|
14
|
-
t.french_title(:ref=>[:title_info,:main_title], :attributes=>{"xml:lang"=>"fre"})
|
15
|
-
|
16
|
-
t.language(:index_as=>[:facetable, :stored_searchable],:path=>{:attribute=>"lang"})
|
17
|
-
}
|
18
|
-
t.language{
|
19
|
-
t.lang_code(:index_as=>[:facetable], :path=>"languageTerm", :attributes=>{:type=>"code"})
|
20
|
-
}
|
21
|
-
t.abstract(:index_as=>[:stored_searchable])
|
22
|
-
t.subject {
|
23
|
-
t.topic(:index_as=>[:facetable])
|
24
|
-
}
|
25
|
-
t.topic_tag(:proxy=>[:subject, :topic], :index_as=>[:stored_searchable])
|
26
|
-
# t.topic_tag(:index_as=>[:facetable],:path=>"subject", :default_content_path=>"topic")
|
27
|
-
# This is a mods:name. The underscore is purely to avoid namespace conflicts.
|
28
|
-
t.name_ {
|
29
|
-
# this is a namepart
|
30
|
-
t.namePart(:type=>:string, :label=>"generic name")
|
31
|
-
# affiliations are great
|
32
|
-
t.affiliation
|
33
|
-
t.institution(:path=>"affiliation", :index_as=>[:facetable], :label=>"organization")
|
34
|
-
t.displayForm
|
35
|
-
t.role(:ref=>[:role])
|
36
|
-
t.description(:index_as=>[:facetable])
|
37
|
-
t.date(:path=>"namePart", :attributes=>{:type=>"date"})
|
38
|
-
t.last_name(:path=>"namePart", :attributes=>{:type=>"family"}, :index_as=>[:stored_searchable])
|
39
|
-
t.first_name(:path=>"namePart", :attributes=>{:type=>"given"}, :label=>"first name")
|
40
|
-
t.terms_of_address(:path=>"namePart", :attributes=>{:type=>"termsOfAddress"})
|
41
|
-
t.computing_id
|
42
|
-
t.name_content(:path=>"text()")
|
43
|
-
}
|
44
|
-
# lookup :person, :first_name
|
45
|
-
t.person(:ref=>:name, :attributes=>{:type=>"personal"}, :index_as=>[:facetable])
|
46
|
-
t.department(:proxy=>[:person,:description],:index_as=>[:facetable])
|
47
|
-
t.organization(:ref=>:name, :attributes=>{:type=>"corporate"}, :index_as=>[:facetable])
|
48
|
-
t.conference(:ref=>:name, :attributes=>{:type=>"conference"}, :index_as=>[:facetable])
|
49
|
-
t.role {
|
50
|
-
t.text(:path=>"roleTerm",:attributes=>{:type=>"text"}, :index_as=>[:stored_searchable])
|
51
|
-
t.code(:path=>"roleTerm",:attributes=>{:type=>"code"})
|
52
|
-
}
|
53
|
-
t.journal(:path=>'relatedItem', :attributes=>{:type=>"host"}) {
|
54
|
-
t.title_info(:index_as=>[:facetable],:ref=>[:title_info])
|
55
|
-
t.origin_info(:path=>"originInfo") {
|
56
|
-
t.publisher
|
57
|
-
t.date_issued(:path=>"dateIssued", :type => :date, :index_as => [:stored_searchable])
|
58
|
-
t.issuance(:index_as=>[:facetable])
|
59
|
-
}
|
60
|
-
t.issn(:path=>"identifier", :attributes=>{:type=>"issn"})
|
61
|
-
t.issue(:path=>"part") {
|
62
|
-
t.volume(:path=>"detail", :attributes=>{:type=>"volume"}, :default_content_path=>"number")
|
63
|
-
t.level(:path=>"detail", :attributes=>{:type=>"number"}, :default_content_path=>"number")
|
64
|
-
t.extent
|
65
|
-
t.pages(:path=>"extent", :attributes=>{:unit=>"pages"}) {
|
66
|
-
t.start
|
67
|
-
t.end
|
68
|
-
}
|
69
|
-
t.start_page(:proxy=>[:pages, :start])
|
70
|
-
t.end_page(:proxy=>[:pages, :end])
|
71
|
-
t.publication_date(:path=>"date", :type => :date, :index_as => [:stored_searchable])
|
72
|
-
}
|
73
|
-
}
|
74
|
-
t.note
|
75
|
-
t.location(:path=>"location") {
|
76
|
-
t.url(:path=>"url")
|
77
|
-
}
|
78
|
-
t.publication_url(:proxy=>[:location,:url])
|
79
|
-
t.title(:proxy=>[:title_info, :main_title])
|
80
|
-
t.journal_title(:proxy=>[:journal, :title_info, :main_title])
|
81
|
-
t.pub_date(:proxy=>[:journal, :issue, :publication_date])
|
82
|
-
t.issue_date(:ref=>[:journal, :origin_info, :date_issued], :type=> :date)
|
83
|
-
end
|
84
|
-
|
85
|
-
# Changes from OM::Properties implementation
|
86
|
-
# renamed family_name => last_name
|
87
|
-
# start_page & end_page now accessible as [:journal, :issue, :pages, :start] (etc.)
|
88
|
-
|
89
|
-
end
|
90
|
-
end
|
@@ -1,90 +0,0 @@
|
|
1
|
-
<mods version="3.0" xsi:schemaLocation="http://www.loc.gov/mods/v3
|
2
|
-
http://www.loc.gov/standards/mods/v3/mods-3-0.xsd" xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
3
|
-
|
4
|
-
<titleInfo>
|
5
|
-
<nonSort>THE</nonSort>
|
6
|
-
<title xml:lang="eng">ARTICLE TITLE HYDRANGEA ARTICLE 1</title>
|
7
|
-
<subTitle>SUBTITLE</subTitle>
|
8
|
-
</titleInfo>
|
9
|
-
<titleInfo lang="finnish">
|
10
|
-
<title>Artikkelin otsikko Hydrangea artiklan 1</title>
|
11
|
-
</titleInfo>
|
12
|
-
|
13
|
-
<name type="personal">
|
14
|
-
<namePart type="family">FAMILY NAME</namePart>
|
15
|
-
<namePart type="given">GIVEN NAMES</namePart>
|
16
|
-
<namePart type="termsOfAddress">DR.</namePart>
|
17
|
-
<displayForm>NAME AS IT APPEARS</displayForm>
|
18
|
-
<affiliation>FACULTY, UNIVERSITY</affiliation>
|
19
|
-
<role>
|
20
|
-
<roleTerm authority="marcrelator" type="text">creator</roleTerm>
|
21
|
-
</role>
|
22
|
-
<role>
|
23
|
-
<roleTerm type="text">submitter</roleTerm>
|
24
|
-
</role>
|
25
|
-
</name>
|
26
|
-
|
27
|
-
<name type="personal">
|
28
|
-
<namePart type="family">Gautama</namePart>
|
29
|
-
<namePart type="given">Siddartha</namePart>
|
30
|
-
<namePart type="termsOfAddress">Prince</namePart>
|
31
|
-
<affiliation>Nirvana</affiliation>
|
32
|
-
<role>
|
33
|
-
<roleTerm authority="marcrelator" type="text">teacher</roleTerm>
|
34
|
-
</role>
|
35
|
-
</name>
|
36
|
-
|
37
|
-
<typeOfResource>text</typeOfResource>
|
38
|
-
<genre authority="local">journal article</genre>
|
39
|
-
|
40
|
-
<abstract>ABSTRACT</abstract>
|
41
|
-
<subject>
|
42
|
-
<topic>TOPIC 1</topic>
|
43
|
-
<topic>TOPIC 2</topic>
|
44
|
-
</subject>
|
45
|
-
<subject authority="AUTHORITY SOURCE (RFCD, LCSH)">
|
46
|
-
<topic>CONTROLLED TERM</topic>
|
47
|
-
</subject>
|
48
|
-
|
49
|
-
<language>
|
50
|
-
<languageTerm authority="iso639-2b" type="code">en-aus </languageTerm>
|
51
|
-
</language>
|
52
|
-
|
53
|
-
<physicalDescription>
|
54
|
-
<internetMediaType>application/pdf</internetMediaType>
|
55
|
-
<extent>36 p.</extent>
|
56
|
-
</physicalDescription>
|
57
|
-
|
58
|
-
<relatedItem type="host">
|
59
|
-
<titleInfo>
|
60
|
-
<title>TITLE OF HOST JOURNAL</title>
|
61
|
-
</titleInfo>
|
62
|
-
<originInfo>
|
63
|
-
<publisher>PUBLISHER</publisher>
|
64
|
-
<dateIssued>2007-02-15</dateIssued>
|
65
|
-
</originInfo>
|
66
|
-
<identifier type="issn">0013-8908</identifier>
|
67
|
-
<part>
|
68
|
-
<detail type="volume">
|
69
|
-
<number>2</number>
|
70
|
-
</detail>
|
71
|
-
<detail type="level">
|
72
|
-
<number>2</number>
|
73
|
-
</detail>
|
74
|
-
<extent unit="pages">
|
75
|
-
<start>195</start>
|
76
|
-
<end>230</end>
|
77
|
-
</extent>
|
78
|
-
<date>2007-02-01</date>
|
79
|
-
</part>
|
80
|
-
</relatedItem>
|
81
|
-
|
82
|
-
<identifier type="uri">http://URL.edu.au/</identifier>
|
83
|
-
<identifier type="doi">doi:10.1006/jmbi.1995.0238</identifier>
|
84
|
-
<location>
|
85
|
-
<url>http://URL.edu.au/</url>
|
86
|
-
</location>
|
87
|
-
<accessCondition type="restrictionOnAccess">EMBARGO NOTE</accessCondition>
|
88
|
-
<accessCondition type="use and reproduction">OPEN ACCESS</accessCondition>
|
89
|
-
|
90
|
-
</mods>
|
@@ -1,16 +0,0 @@
|
|
1
|
-
id: pid
|
2
|
-
default: edible
|
3
|
-
edible:
|
4
|
-
date: _edible_date
|
5
|
-
string: _edible_string
|
6
|
-
text: _edible_text
|
7
|
-
symbol: _edible_sym
|
8
|
-
integer: _edible_int
|
9
|
-
long: _edible_long
|
10
|
-
boolean: _edible_bool
|
11
|
-
float: _edible_float
|
12
|
-
double: _edible_double
|
13
|
-
displayable: _display
|
14
|
-
facetable: _facet
|
15
|
-
sortable: _sort
|
16
|
-
unstemmed_searchable: _unstem_search
|
@@ -1,109 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
require 'fixtures/mods_article'
|
3
|
-
|
4
|
-
# TODO: there should be no dependencies on OM in Solrizer
|
5
|
-
describe Solrizer::XML::TerminologyBasedSolrizer do
|
6
|
-
|
7
|
-
before(:all) do
|
8
|
-
Samples::ModsArticle.send(:include, Solrizer::XML::TerminologyBasedSolrizer)
|
9
|
-
end
|
10
|
-
|
11
|
-
before(:each) do
|
12
|
-
article_xml = fixture( File.join("mods_articles", "hydrangea_article1.xml") )
|
13
|
-
@mods_article = Samples::ModsArticle.from_xml(article_xml)
|
14
|
-
end
|
15
|
-
|
16
|
-
describe ".to_solr" do
|
17
|
-
|
18
|
-
it "should provide .to_solr and return a SolrDocument" do
|
19
|
-
@mods_article.should respond_to(:to_solr)
|
20
|
-
@mods_article.to_solr.should be_kind_of(Hash)
|
21
|
-
end
|
22
|
-
|
23
|
-
it "should optionally allow you to provide the Hash to add fields to and return that document when done" do
|
24
|
-
doc = Hash.new
|
25
|
-
@mods_article.to_solr(doc).should equal(doc)
|
26
|
-
end
|
27
|
-
|
28
|
-
it "should iterate through the terminology terms, calling .solrize_term on each and passing in the solr doc" do
|
29
|
-
solr_doc = Hash.new
|
30
|
-
@mods_article.field_mapper = Solrizer::FieldMapper.new
|
31
|
-
Samples::ModsArticle.terminology.terms.each_pair do |k,v|
|
32
|
-
@mods_article.should_receive(:solrize_term).with(v, solr_doc, @mods_article.field_mapper)
|
33
|
-
end
|
34
|
-
@mods_article.to_solr(solr_doc)
|
35
|
-
end
|
36
|
-
|
37
|
-
it "should use Solr mappings to generate field names" do
|
38
|
-
solr_doc = @mods_article.to_solr
|
39
|
-
solr_doc["abstract"].should be_nil
|
40
|
-
# NOTE: OM's old default expected stored and indexed; this is a change.
|
41
|
-
solr_doc["abstract_tesim"].should == ["ABSTRACT"]
|
42
|
-
solr_doc["title_info_1_language_tesim"].should == ["finnish"]
|
43
|
-
solr_doc["person_1_role_0_text_tesim"].should == ["teacher"]
|
44
|
-
# No index_as on the code field.
|
45
|
-
solr_doc["person_1_role_0_code_tesim"].should be_nil
|
46
|
-
solr_doc["person_last_name_tesim"].sort.should == ["FAMILY NAME", "Gautama"]
|
47
|
-
solr_doc["topic_tag_tesim"].sort.should == ["CONTROLLED TERM", "TOPIC 1", "TOPIC 2"]
|
48
|
-
# These are a holdover from an old verison of OM
|
49
|
-
solr_doc['journal_0_issue_0_publication_date_dtsim'].should == ["2007-02-01T00:00:00Z"]
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
|
54
|
-
describe ".solrize_term" do
|
55
|
-
|
56
|
-
it "should add fields to a solr document for all nodes corresponding to the given term and its children" do
|
57
|
-
solr_doc = Hash.new
|
58
|
-
result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
|
59
|
-
result.should == solr_doc
|
60
|
-
end
|
61
|
-
|
62
|
-
it "should add multiple fields based on index_as" do
|
63
|
-
fake_solr_doc = {}
|
64
|
-
term = Samples::ModsArticle.terminology.retrieve_term(:name)
|
65
|
-
term.children[:namePart].index_as = [:searchable, :displayable, :facetable]
|
66
|
-
|
67
|
-
@mods_article.solrize_term(term, fake_solr_doc)
|
68
|
-
|
69
|
-
expected_names = ["DR.", "FAMILY NAME", "GIVEN NAMES"]
|
70
|
-
%w(_teim _sim).each do |suffix|
|
71
|
-
actual_names = fake_solr_doc["name_0_namePart#{suffix}"].sort
|
72
|
-
actual_names.should == expected_names
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
it "should add fields based on type using proxy" do
|
77
|
-
unless RUBY_VERSION.match("1.8.7")
|
78
|
-
solr_doc = Hash.new
|
79
|
-
result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:pub_date), solr_doc)
|
80
|
-
solr_doc["pub_date_dtsim"].should == ["2007-02-01T00:00:00Z"]
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
it "should add fields based on type using ref" do
|
85
|
-
solr_doc = Hash.new
|
86
|
-
result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:issue_date), solr_doc)
|
87
|
-
solr_doc["issue_date_dtsim"].should == ["2007-02-15T00:00:00Z"]
|
88
|
-
end
|
89
|
-
|
90
|
-
it "shouldn't index terms where index_as is an empty array" do
|
91
|
-
fake_solr_doc = {}
|
92
|
-
term = Samples::ModsArticle.terminology.retrieve_term(:name)
|
93
|
-
term.children[:namePart].index_as = []
|
94
|
-
|
95
|
-
@mods_article.solrize_term(term, fake_solr_doc)
|
96
|
-
fake_solr_doc["name_0_namePart_teim"].should be_nil
|
97
|
-
end
|
98
|
-
|
99
|
-
it "should index terms where index_as is searchable" do
|
100
|
-
fake_solr_doc = {}
|
101
|
-
term = Samples::ModsArticle.terminology.retrieve_term(:name)
|
102
|
-
term.children[:namePart].index_as = [:searchable]
|
103
|
-
|
104
|
-
@mods_article.solrize_term(term, fake_solr_doc)
|
105
|
-
|
106
|
-
fake_solr_doc["name_0_namePart_teim"].sort.should == ["DR.", "FAMILY NAME", "GIVEN NAMES"]
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|