solrizer 3.0.0.pre7 → 3.0.0.pre8
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/solrizer/extractor.rb +0 -3
- data/lib/solrizer/field_mapper.rb +0 -2
- data/lib/solrizer/version.rb +1 -1
- data/lib/solrizer/xml/extractor.rb +1 -1
- data/lib/solrizer/xml.rb +1 -3
- data/solrizer.gemspec +0 -1
- metadata +5 -35
- data/lib/solrizer/xml/terminology_based_solrizer.rb +0 -92
- data/spec/fixtures/druid-bv448hq0314-extProperties.xml +0 -52
- data/spec/fixtures/druid-cm234kq4672-extProperties.xml +0 -5
- data/spec/fixtures/druid-cm234kq4672-stories.xml +0 -17
- data/spec/fixtures/druid-hc513kw4806-descMetadata.xml +0 -11
- data/spec/fixtures/mods_article.rb +0 -90
- data/spec/fixtures/mods_articles/hydrangea_article1.xml +0 -90
- data/spec/fixtures/test_solr_mappings.yml +0 -16
- data/spec/units/xml_terminology_based_solrizer_spec.rb +0 -109
data/lib/solrizer/extractor.rb
CHANGED
@@ -34,8 +34,6 @@ module Solrizer
|
|
34
34
|
# # t.dish_name :index_as => [:some_field_type] -maps to-> dish_name_ssim
|
35
35
|
# # t.ingredients :index_as => [:some_field_type, :edible] -maps to-> ingredients_ssim, ingredients_food
|
36
36
|
#
|
37
|
-
# (See Solrizer::XML::TerminologyBasedSolrizer for instructions on applying a custom mapping once you have defined it.)
|
38
|
-
#
|
39
37
|
#
|
40
38
|
# == Custom Value Converters
|
41
39
|
#
|
data/lib/solrizer/version.rb
CHANGED
@@ -4,7 +4,7 @@ module Solrizer::XML::Extractor
|
|
4
4
|
|
5
5
|
#
|
6
6
|
# This method extracts solr fields from simple xml
|
7
|
-
# If you want to do anything more nuanced with the xml, use
|
7
|
+
# If you want to do anything more nuanced with the xml, use OM instead.
|
8
8
|
#
|
9
9
|
# @param [xml] text xml content to index
|
10
10
|
# @param [Hash] solr_doc
|
data/lib/solrizer/xml.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
|
-
require "solrizer"
|
2
|
-
require "om"
|
3
1
|
module Solrizer::XML
|
4
2
|
end
|
5
3
|
Dir[File.join(File.dirname(__FILE__),"xml","*.rb")].each {|file| require file }
|
6
4
|
|
7
|
-
Solrizer::Extractor.send(:include, Solrizer::XML::Extractor)
|
5
|
+
Solrizer::Extractor.send(:include, Solrizer::XML::Extractor)
|
data/solrizer.gemspec
CHANGED
@@ -13,7 +13,6 @@ Gem::Specification.new do |s|
|
|
13
13
|
s.description = %q{Use solrizer to populate solr indexes. You can run solrizer from within your app, using the provided rake tasks, or as a JMS listener}
|
14
14
|
|
15
15
|
s.add_dependency "nokogiri"
|
16
|
-
s.add_dependency "om", ">=1.5.0"
|
17
16
|
s.add_dependency "xml-simple"
|
18
17
|
s.add_dependency "mediashelf-loggable", "~>0.4.7"
|
19
18
|
s.add_dependency "stomp"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: solrizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.0.
|
4
|
+
version: 3.0.0.pre8
|
5
5
|
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-02-
|
12
|
+
date: 2013-02-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -27,22 +27,6 @@ dependencies:
|
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0'
|
30
|
-
- !ruby/object:Gem::Dependency
|
31
|
-
name: om
|
32
|
-
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
|
-
requirements:
|
35
|
-
- - ">="
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
version: 1.5.0
|
38
|
-
type: :runtime
|
39
|
-
prerelease: false
|
40
|
-
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
|
-
requirements:
|
43
|
-
- - ">="
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
version: 1.5.0
|
46
30
|
- !ruby/object:Gem::Dependency
|
47
31
|
name: xml-simple
|
48
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -218,25 +202,16 @@ files:
|
|
218
202
|
- lib/solrizer/version.rb
|
219
203
|
- lib/solrizer/xml.rb
|
220
204
|
- lib/solrizer/xml/extractor.rb
|
221
|
-
- lib/solrizer/xml/terminology_based_solrizer.rb
|
222
205
|
- lib/tasks/solrizer.rake
|
223
206
|
- solrizer.gemspec
|
224
207
|
- spec/.rspec
|
225
208
|
- spec/fixtures/druid-bv448hq0314-descMetadata.xml
|
226
|
-
- spec/fixtures/druid-bv448hq0314-extProperties.xml
|
227
|
-
- spec/fixtures/druid-cm234kq4672-extProperties.xml
|
228
|
-
- spec/fixtures/druid-cm234kq4672-stories.xml
|
229
|
-
- spec/fixtures/druid-hc513kw4806-descMetadata.xml
|
230
|
-
- spec/fixtures/mods_article.rb
|
231
|
-
- spec/fixtures/mods_articles/hydrangea_article1.xml
|
232
|
-
- spec/fixtures/test_solr_mappings.yml
|
233
209
|
- spec/spec_helper.rb
|
234
210
|
- spec/units/common_spec.rb
|
235
211
|
- spec/units/extractor_spec.rb
|
236
212
|
- spec/units/field_mapper_spec.rb
|
237
213
|
- spec/units/solrizer_spec.rb
|
238
214
|
- spec/units/xml_extractor_spec.rb
|
239
|
-
- spec/units/xml_terminology_based_solrizer_spec.rb
|
240
215
|
homepage: http://github.com/projecthydra/solrizer
|
241
216
|
licenses: []
|
242
217
|
post_install_message:
|
@@ -249,6 +224,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
249
224
|
- - ">="
|
250
225
|
- !ruby/object:Gem::Version
|
251
226
|
version: '0'
|
227
|
+
segments:
|
228
|
+
- 0
|
229
|
+
hash: 1646376342601153569
|
252
230
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
253
231
|
none: false
|
254
232
|
requirements:
|
@@ -264,18 +242,10 @@ summary: A utility for building solr indexes, usually from Fedora repository con
|
|
264
242
|
with solrizer-fedora extension gem.
|
265
243
|
test_files:
|
266
244
|
- spec/fixtures/druid-bv448hq0314-descMetadata.xml
|
267
|
-
- spec/fixtures/druid-bv448hq0314-extProperties.xml
|
268
|
-
- spec/fixtures/druid-cm234kq4672-extProperties.xml
|
269
|
-
- spec/fixtures/druid-cm234kq4672-stories.xml
|
270
|
-
- spec/fixtures/druid-hc513kw4806-descMetadata.xml
|
271
|
-
- spec/fixtures/mods_article.rb
|
272
|
-
- spec/fixtures/mods_articles/hydrangea_article1.xml
|
273
|
-
- spec/fixtures/test_solr_mappings.yml
|
274
245
|
- spec/spec_helper.rb
|
275
246
|
- spec/units/common_spec.rb
|
276
247
|
- spec/units/extractor_spec.rb
|
277
248
|
- spec/units/field_mapper_spec.rb
|
278
249
|
- spec/units/solrizer_spec.rb
|
279
250
|
- spec/units/xml_extractor_spec.rb
|
280
|
-
- spec/units/xml_terminology_based_solrizer_spec.rb
|
281
251
|
has_rdoc:
|
@@ -1,92 +0,0 @@
|
|
1
|
-
# This module is only suitable to mix into Classes that use the OM::XML::Document Module
|
2
|
-
module Solrizer::XML::TerminologyBasedSolrizer
|
3
|
-
def self.included(klass)
|
4
|
-
klass.send(:include, Solrizer::Common)
|
5
|
-
klass.send(:extend, ClassMethods)
|
6
|
-
end
|
7
|
-
|
8
|
-
# Module Methods
|
9
|
-
module ClassMethods
|
10
|
-
|
11
|
-
# Build a solr document from +doc+ based on its terminology
|
12
|
-
# @param [OM::XML::Document] doc
|
13
|
-
# @param [Hash] (optional) solr_doc (values hash) to populate
|
14
|
-
def solrize(doc, solr_doc=Hash.new, field_mapper = nil)
|
15
|
-
unless doc.class.terminology.nil?
|
16
|
-
doc.class.terminology.terms.each_pair do |term_name,term|
|
17
|
-
doc.solrize_term(term, solr_doc, field_mapper)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
return solr_doc
|
22
|
-
end
|
23
|
-
|
24
|
-
# Populate a solr document with fields based on nodes in +xml+
|
25
|
-
# Values for a term are gathered by to +term_pointer+ using OM::XML::TermValueOperators.term_values
|
26
|
-
# and are deserialized by OM according to :type, as determined in its terminology.
|
27
|
-
# The content of the actual field in solr is each +node+ of the +nodeset+ returned by OM,
|
28
|
-
# rendered to a string.
|
29
|
-
# @param [OM::XML::Document] doc xml document to extract values from
|
30
|
-
# @param [OM::XML::Term] term corresponding to desired xml values
|
31
|
-
# @param [Hash] (optional) solr_doc (values hash) to populate
|
32
|
-
def solrize_term(doc, term, solr_doc = Hash.new, field_mapper = nil, opts={})
|
33
|
-
parents = opts.fetch(:parents, [])
|
34
|
-
term_pointer = parents+[term.name]
|
35
|
-
nodeset = doc.term_values(*term_pointer)
|
36
|
-
|
37
|
-
nodeset.each do |n|
|
38
|
-
doc.solrize_node(n, term_pointer, term, solr_doc, field_mapper)
|
39
|
-
# FIXME: there should be no dependencies on OM in Solrizer
|
40
|
-
unless term.kind_of? OM::XML::NamedTermProxy
|
41
|
-
term.children.each_pair do |child_term_name, child_term|
|
42
|
-
doc.solrize_term(child_term, solr_doc, field_mapper, opts={:parents=>parents+[{term.name=>nodeset.index(n)}]})
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
solr_doc
|
47
|
-
end
|
48
|
-
|
49
|
-
# Populate a solr document with solr fields corresponding to the given xml node
|
50
|
-
# Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
|
51
|
-
# If the supplied term does not have an index_as attribute, no indexing will be performed.
|
52
|
-
# @param [Nokogiri::XML::Node] node to solrize
|
53
|
-
# @param [OM::XML::Document] doc document the node came from
|
54
|
-
# @param [Array] term_pointer Array pointing to the term that should be used for solrization settings
|
55
|
-
# @param [Term] term the term to be solrized
|
56
|
-
# @param [Hash] (optional) solr_doc (values hash) to populate
|
57
|
-
# @return [Hash] the solr doc
|
58
|
-
def solrize_node(node_value, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
|
59
|
-
return solr_doc unless term.index_as && !term.index_as.empty?
|
60
|
-
|
61
|
-
# FIXME: there should be no dependencies on OM in Solrizer
|
62
|
-
generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
|
63
|
-
create_and_insert_terms(generic_field_name_base, node_value, term.index_as, solr_doc)
|
64
|
-
|
65
|
-
if term_pointer.length > 1
|
66
|
-
# FIXME: there should be no dependencies on OM in Solrizer
|
67
|
-
hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
|
68
|
-
create_and_insert_terms(hierarchical_field_name_base, node_value, term.index_as, solr_doc)
|
69
|
-
end
|
70
|
-
solr_doc
|
71
|
-
end
|
72
|
-
|
73
|
-
end
|
74
|
-
|
75
|
-
|
76
|
-
# Instance Methods
|
77
|
-
|
78
|
-
attr_accessor :field_mapper
|
79
|
-
|
80
|
-
def to_solr(solr_doc = Hash.new, field_mapper = self.field_mapper) # :nodoc:
|
81
|
-
self.class.solrize(self, solr_doc, field_mapper)
|
82
|
-
end
|
83
|
-
|
84
|
-
def solrize_term(term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
|
85
|
-
self.class.solrize_term(self, term, solr_doc, field_mapper, opts)
|
86
|
-
end
|
87
|
-
|
88
|
-
def solrize_node(node, term_pointer, term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
|
89
|
-
self.class.solrize_node(node, self, term_pointer, term, solr_doc, field_mapper, opts)
|
90
|
-
end
|
91
|
-
|
92
|
-
end
|
@@ -1,52 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<document>
|
3
|
-
<attributes>
|
4
|
-
<attribute type="item">5958</attribute>
|
5
|
-
<attribute type="objectid">FEI0010-00013142</attribute>
|
6
|
-
<attribute type="title">Letter from Ellie Engelmore to Professor K. C. Reddy</attribute>
|
7
|
-
<attribute type="copyright">Presumed under copyright. Do not publish.</attribute>
|
8
|
-
<attribute type="description"/>
|
9
|
-
<attribute type="date">1985-12-30</attribute>
|
10
|
-
<attribute type="datestr">30/12/1985</attribute>
|
11
|
-
<attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00013142.pdf</attribute>
|
12
|
-
<attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00013142.png</attribute>
|
13
|
-
<attribute type="url"/>
|
14
|
-
<attribute type="industryterm"/>
|
15
|
-
<attribute type="technology">artificial intelligence</attribute>
|
16
|
-
<attribute type="company"/>
|
17
|
-
<attribute type="person">ELLIE ENGELMORE</attribute>
|
18
|
-
<attribute type="year">1985</attribute>
|
19
|
-
<attribute type="organization">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</attribute>
|
20
|
-
<attribute type="sourcelocation">Folder 15</attribute>
|
21
|
-
</attributes>
|
22
|
-
<facets>
|
23
|
-
<facet type="year" id="49">1980s</facet>
|
24
|
-
<facet type="year" id="49">1985</facet>
|
25
|
-
<facet type="year" id="42">1980s</facet>
|
26
|
-
<facet type="sourcelocation" id="592">Feigenbaum</facet>
|
27
|
-
<facet type="sourcelocation" id="592">eaf7000</facet>
|
28
|
-
<facet type="sourcelocation" id="592">Box 51A</facet>
|
29
|
-
<facet type="sourcelocation" id="594">Feigenbaum</facet>
|
30
|
-
<facet type="sourcelocation" id="594">eaf7000</facet>
|
31
|
-
<facet type="sourcelocation" id="594">Box 51A</facet>
|
32
|
-
<facet type="sourcelocation" id="594">Folder 15</facet>
|
33
|
-
<facet type="sourcelocation" id="691">Feigenbaum</facet>
|
34
|
-
<facet type="sourcelocation" id="692">Feigenbaum</facet>
|
35
|
-
<facet type="sourcelocation" id="692">eaf7000</facet>
|
36
|
-
<facet type="doctype" id="32">Correspondence</facet>
|
37
|
-
<facet type="city" id="82">Ann Arbor</facet>
|
38
|
-
<facet type="city" id="910">Hyderabad</facet>
|
39
|
-
<facet type="city" id="1519">Palo Alto</facet>
|
40
|
-
<facet type="country" id="68">India</facet>
|
41
|
-
<facet type="emailaddress" id="288">EENGELMORE@SUMEX-AIM.ARPA</facet>
|
42
|
-
<facet type="organization" id="5065">Heuristic Programming Project</facet>
|
43
|
-
<facet type="organization" id="7012">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</facet>
|
44
|
-
<facet type="organization" id="8878">Professor K. C. Reddy School of Mathematics and Computer/Information Sciences</facet>
|
45
|
-
<facet type="person" id="5810">ELLIE ENGELMORE</facet>
|
46
|
-
<facet type="person" id="17934">Reddy</facet>
|
47
|
-
<facet type="person" id="5787">EDWARD FEIGENBAUM</facet>
|
48
|
-
<facet type="provinceorstate" id="96">Michigan</facet>
|
49
|
-
<facet type="provinceorstate" id="27">California</facet>
|
50
|
-
<facet type="technology" id="1713">artificial intelligence</facet>
|
51
|
-
</facets>
|
52
|
-
</document>
|
@@ -1,5 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<document>
|
3
|
-
<attributes><attribute type="item">4290</attribute><attribute type="objectid">FEI0010-00011325</attribute><attribute type="title">Letter from Ellie Engelmore to Wemara Lichty</attribute><attribute type="copyright">Presumed under copyright. Do not publish.</attribute><attribute type="description"/><attribute type="date">1984-6-4</attribute><attribute type="datestr">4/6/1984</attribute><attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00011325.pdf</attribute><attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00011325.png</attribute><attribute type="url"/><attribute type="industryterm"/><attribute type="technology">artificial intelligence</attribute><attribute type="company"/><attribute type="person">A. FEIGENBAUM</attribute><attribute type="year">1984</attribute><attribute type="organization">McAlcster Hall University</attribute><attribute type="sourcelocation">Folder 5</attribute></attributes>
|
4
|
-
<facets><facet type="year" id="48">1980s</facet><facet type="year" id="48">1984</facet><facet type="year" id="42">1980s</facet><facet type="sourcelocation" id="578">Feigenbaum</facet><facet type="sourcelocation" id="578">eaf7000</facet><facet type="sourcelocation" id="578">Box 51</facet><facet type="sourcelocation" id="587">Feigenbaum</facet><facet type="sourcelocation" id="587">eaf7000</facet><facet type="sourcelocation" id="587">Box 51</facet><facet type="sourcelocation" id="587">Folder 5</facet><facet type="sourcelocation" id="692">Feigenbaum</facet><facet type="sourcelocation" id="692">eaf7000</facet><facet type="sourcelocation" id="691">Feigenbaum</facet><facet type="doctype" id="32">Correspondence</facet><facet type="city" id="1948">Stanford</facet><facet type="country" id="33">Columbia</facet><facet type="facility" id="2551">U. Missouri library</facet><facet type="organization" id="5065">Heuristic Programming Project</facet><facet type="organization" id="7026">McAlcster Hall University</facet><facet type="organization" id="9645">STANFORD UNIVERSITY</facet><facet type="organization" id="11964">University of Missouri</facet><facet type="organization" id="12407">Wemara Lichty Psychology Department</facet><facet type="person" id="15650">Morton Hunt</facet><facet type="person" id="37">A. FEIGENBAUM</facet><facet type="person" id="5810">ELLIE ENGELMORE</facet><facet type="provinceorstate" id="27">California</facet><facet type="provinceorstate" id="100">Missouri</facet><facet type="publishedmedium" id="44">Artificial Intelligence</facet><facet type="technology" id="1713">artificial intelligence</facet></facets>
|
5
|
-
</document>
|
@@ -1,17 +0,0 @@
|
|
1
|
-
|
2
|
-
<html>
|
3
|
-
<body>
|
4
|
-
<pre>
|
5
|
-
This is
|
6
|
-
preformatted text.
|
7
|
-
It preserves both spaces
|
8
|
-
and line breaks.
|
9
|
-
</pre>
|
10
|
-
<p>The pre tag is good for displaying computer code:</p>
|
11
|
-
<pre>
|
12
|
-
for i = 1 to 10
|
13
|
-
print i
|
14
|
-
next i
|
15
|
-
</pre>
|
16
|
-
</body>
|
17
|
-
</html>
|
@@ -1,11 +0,0 @@
|
|
1
|
-
<dc xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
2
|
-
<dcterms:type xsi:type="DCMITYPE">text</dcterms:type>
|
3
|
-
<dcterms:medium>Paper Document</dcterms:medium>
|
4
|
-
<dcterms:rights>Copyright © 2006 All rights reserved. Distribution for commercial purposes is prohibited.</dcterms:rights>
|
5
|
-
<dcterms:date/>
|
6
|
-
<dcterms:format>application/tiff</dcterms:format>
|
7
|
-
<dcterms:format>application/jp2000</dcterms:format>
|
8
|
-
<dcterms:format>application/pdf</dcterms:format>
|
9
|
-
<dcterms:title>The Rise and Fall of the YouTube Empire</dcterms:title>
|
10
|
-
<dcterms:publisher>Sample Unversity</dcterms:publisher>
|
11
|
-
</dc>
|
@@ -1,90 +0,0 @@
|
|
1
|
-
module Samples
|
2
|
-
class ModsArticle
|
3
|
-
|
4
|
-
include OM::XML::Document
|
5
|
-
|
6
|
-
set_terminology do |t|
|
7
|
-
t.root(:path=>"mods", :xmlns=>"http://www.loc.gov/mods/v3", :schema=>"http://www.loc.gov/standards/mods/v3/mods-3-2.xsd", "xmlns:foo"=>"http://my.custom.namespace")
|
8
|
-
|
9
|
-
|
10
|
-
t.title_info(:path=>"titleInfo") {
|
11
|
-
t.main_title(:index_as=>[:facetable],:path=>"title", :label=>"title") {
|
12
|
-
t.main_title_lang(:path=>{:attribute=> "xml:lang"})
|
13
|
-
}
|
14
|
-
t.french_title(:ref=>[:title_info,:main_title], :attributes=>{"xml:lang"=>"fre"})
|
15
|
-
|
16
|
-
t.language(:index_as=>[:facetable, :stored_searchable],:path=>{:attribute=>"lang"})
|
17
|
-
}
|
18
|
-
t.language{
|
19
|
-
t.lang_code(:index_as=>[:facetable], :path=>"languageTerm", :attributes=>{:type=>"code"})
|
20
|
-
}
|
21
|
-
t.abstract(:index_as=>[:stored_searchable])
|
22
|
-
t.subject {
|
23
|
-
t.topic(:index_as=>[:facetable])
|
24
|
-
}
|
25
|
-
t.topic_tag(:proxy=>[:subject, :topic], :index_as=>[:stored_searchable])
|
26
|
-
# t.topic_tag(:index_as=>[:facetable],:path=>"subject", :default_content_path=>"topic")
|
27
|
-
# This is a mods:name. The underscore is purely to avoid namespace conflicts.
|
28
|
-
t.name_ {
|
29
|
-
# this is a namepart
|
30
|
-
t.namePart(:type=>:string, :label=>"generic name")
|
31
|
-
# affiliations are great
|
32
|
-
t.affiliation
|
33
|
-
t.institution(:path=>"affiliation", :index_as=>[:facetable], :label=>"organization")
|
34
|
-
t.displayForm
|
35
|
-
t.role(:ref=>[:role])
|
36
|
-
t.description(:index_as=>[:facetable])
|
37
|
-
t.date(:path=>"namePart", :attributes=>{:type=>"date"})
|
38
|
-
t.last_name(:path=>"namePart", :attributes=>{:type=>"family"}, :index_as=>[:stored_searchable])
|
39
|
-
t.first_name(:path=>"namePart", :attributes=>{:type=>"given"}, :label=>"first name")
|
40
|
-
t.terms_of_address(:path=>"namePart", :attributes=>{:type=>"termsOfAddress"})
|
41
|
-
t.computing_id
|
42
|
-
t.name_content(:path=>"text()")
|
43
|
-
}
|
44
|
-
# lookup :person, :first_name
|
45
|
-
t.person(:ref=>:name, :attributes=>{:type=>"personal"}, :index_as=>[:facetable])
|
46
|
-
t.department(:proxy=>[:person,:description],:index_as=>[:facetable])
|
47
|
-
t.organization(:ref=>:name, :attributes=>{:type=>"corporate"}, :index_as=>[:facetable])
|
48
|
-
t.conference(:ref=>:name, :attributes=>{:type=>"conference"}, :index_as=>[:facetable])
|
49
|
-
t.role {
|
50
|
-
t.text(:path=>"roleTerm",:attributes=>{:type=>"text"}, :index_as=>[:stored_searchable])
|
51
|
-
t.code(:path=>"roleTerm",:attributes=>{:type=>"code"})
|
52
|
-
}
|
53
|
-
t.journal(:path=>'relatedItem', :attributes=>{:type=>"host"}) {
|
54
|
-
t.title_info(:index_as=>[:facetable],:ref=>[:title_info])
|
55
|
-
t.origin_info(:path=>"originInfo") {
|
56
|
-
t.publisher
|
57
|
-
t.date_issued(:path=>"dateIssued", :type => :date, :index_as => [:stored_searchable])
|
58
|
-
t.issuance(:index_as=>[:facetable])
|
59
|
-
}
|
60
|
-
t.issn(:path=>"identifier", :attributes=>{:type=>"issn"})
|
61
|
-
t.issue(:path=>"part") {
|
62
|
-
t.volume(:path=>"detail", :attributes=>{:type=>"volume"}, :default_content_path=>"number")
|
63
|
-
t.level(:path=>"detail", :attributes=>{:type=>"number"}, :default_content_path=>"number")
|
64
|
-
t.extent
|
65
|
-
t.pages(:path=>"extent", :attributes=>{:unit=>"pages"}) {
|
66
|
-
t.start
|
67
|
-
t.end
|
68
|
-
}
|
69
|
-
t.start_page(:proxy=>[:pages, :start])
|
70
|
-
t.end_page(:proxy=>[:pages, :end])
|
71
|
-
t.publication_date(:path=>"date", :type => :date, :index_as => [:stored_searchable])
|
72
|
-
}
|
73
|
-
}
|
74
|
-
t.note
|
75
|
-
t.location(:path=>"location") {
|
76
|
-
t.url(:path=>"url")
|
77
|
-
}
|
78
|
-
t.publication_url(:proxy=>[:location,:url])
|
79
|
-
t.title(:proxy=>[:title_info, :main_title])
|
80
|
-
t.journal_title(:proxy=>[:journal, :title_info, :main_title])
|
81
|
-
t.pub_date(:proxy=>[:journal, :issue, :publication_date])
|
82
|
-
t.issue_date(:ref=>[:journal, :origin_info, :date_issued], :type=> :date)
|
83
|
-
end
|
84
|
-
|
85
|
-
# Changes from OM::Properties implementation
|
86
|
-
# renamed family_name => last_name
|
87
|
-
# start_page & end_page now accessible as [:journal, :issue, :pages, :start] (etc.)
|
88
|
-
|
89
|
-
end
|
90
|
-
end
|
@@ -1,90 +0,0 @@
|
|
1
|
-
<mods version="3.0" xsi:schemaLocation="http://www.loc.gov/mods/v3
|
2
|
-
http://www.loc.gov/standards/mods/v3/mods-3-0.xsd" xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
3
|
-
|
4
|
-
<titleInfo>
|
5
|
-
<nonSort>THE</nonSort>
|
6
|
-
<title xml:lang="eng">ARTICLE TITLE HYDRANGEA ARTICLE 1</title>
|
7
|
-
<subTitle>SUBTITLE</subTitle>
|
8
|
-
</titleInfo>
|
9
|
-
<titleInfo lang="finnish">
|
10
|
-
<title>Artikkelin otsikko Hydrangea artiklan 1</title>
|
11
|
-
</titleInfo>
|
12
|
-
|
13
|
-
<name type="personal">
|
14
|
-
<namePart type="family">FAMILY NAME</namePart>
|
15
|
-
<namePart type="given">GIVEN NAMES</namePart>
|
16
|
-
<namePart type="termsOfAddress">DR.</namePart>
|
17
|
-
<displayForm>NAME AS IT APPEARS</displayForm>
|
18
|
-
<affiliation>FACULTY, UNIVERSITY</affiliation>
|
19
|
-
<role>
|
20
|
-
<roleTerm authority="marcrelator" type="text">creator</roleTerm>
|
21
|
-
</role>
|
22
|
-
<role>
|
23
|
-
<roleTerm type="text">submitter</roleTerm>
|
24
|
-
</role>
|
25
|
-
</name>
|
26
|
-
|
27
|
-
<name type="personal">
|
28
|
-
<namePart type="family">Gautama</namePart>
|
29
|
-
<namePart type="given">Siddartha</namePart>
|
30
|
-
<namePart type="termsOfAddress">Prince</namePart>
|
31
|
-
<affiliation>Nirvana</affiliation>
|
32
|
-
<role>
|
33
|
-
<roleTerm authority="marcrelator" type="text">teacher</roleTerm>
|
34
|
-
</role>
|
35
|
-
</name>
|
36
|
-
|
37
|
-
<typeOfResource>text</typeOfResource>
|
38
|
-
<genre authority="local">journal article</genre>
|
39
|
-
|
40
|
-
<abstract>ABSTRACT</abstract>
|
41
|
-
<subject>
|
42
|
-
<topic>TOPIC 1</topic>
|
43
|
-
<topic>TOPIC 2</topic>
|
44
|
-
</subject>
|
45
|
-
<subject authority="AUTHORITY SOURCE (RFCD, LCSH)">
|
46
|
-
<topic>CONTROLLED TERM</topic>
|
47
|
-
</subject>
|
48
|
-
|
49
|
-
<language>
|
50
|
-
<languageTerm authority="iso639-2b" type="code">en-aus </languageTerm>
|
51
|
-
</language>
|
52
|
-
|
53
|
-
<physicalDescription>
|
54
|
-
<internetMediaType>application/pdf</internetMediaType>
|
55
|
-
<extent>36 p.</extent>
|
56
|
-
</physicalDescription>
|
57
|
-
|
58
|
-
<relatedItem type="host">
|
59
|
-
<titleInfo>
|
60
|
-
<title>TITLE OF HOST JOURNAL</title>
|
61
|
-
</titleInfo>
|
62
|
-
<originInfo>
|
63
|
-
<publisher>PUBLISHER</publisher>
|
64
|
-
<dateIssued>2007-02-15</dateIssued>
|
65
|
-
</originInfo>
|
66
|
-
<identifier type="issn">0013-8908</identifier>
|
67
|
-
<part>
|
68
|
-
<detail type="volume">
|
69
|
-
<number>2</number>
|
70
|
-
</detail>
|
71
|
-
<detail type="level">
|
72
|
-
<number>2</number>
|
73
|
-
</detail>
|
74
|
-
<extent unit="pages">
|
75
|
-
<start>195</start>
|
76
|
-
<end>230</end>
|
77
|
-
</extent>
|
78
|
-
<date>2007-02-01</date>
|
79
|
-
</part>
|
80
|
-
</relatedItem>
|
81
|
-
|
82
|
-
<identifier type="uri">http://URL.edu.au/</identifier>
|
83
|
-
<identifier type="doi">doi:10.1006/jmbi.1995.0238</identifier>
|
84
|
-
<location>
|
85
|
-
<url>http://URL.edu.au/</url>
|
86
|
-
</location>
|
87
|
-
<accessCondition type="restrictionOnAccess">EMBARGO NOTE</accessCondition>
|
88
|
-
<accessCondition type="use and reproduction">OPEN ACCESS</accessCondition>
|
89
|
-
|
90
|
-
</mods>
|
@@ -1,16 +0,0 @@
|
|
1
|
-
id: pid
|
2
|
-
default: edible
|
3
|
-
edible:
|
4
|
-
date: _edible_date
|
5
|
-
string: _edible_string
|
6
|
-
text: _edible_text
|
7
|
-
symbol: _edible_sym
|
8
|
-
integer: _edible_int
|
9
|
-
long: _edible_long
|
10
|
-
boolean: _edible_bool
|
11
|
-
float: _edible_float
|
12
|
-
double: _edible_double
|
13
|
-
displayable: _display
|
14
|
-
facetable: _facet
|
15
|
-
sortable: _sort
|
16
|
-
unstemmed_searchable: _unstem_search
|
@@ -1,109 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
require 'fixtures/mods_article'
|
3
|
-
|
4
|
-
# TODO: there should be no dependencies on OM in Solrizer
|
5
|
-
describe Solrizer::XML::TerminologyBasedSolrizer do
|
6
|
-
|
7
|
-
before(:all) do
|
8
|
-
Samples::ModsArticle.send(:include, Solrizer::XML::TerminologyBasedSolrizer)
|
9
|
-
end
|
10
|
-
|
11
|
-
before(:each) do
|
12
|
-
article_xml = fixture( File.join("mods_articles", "hydrangea_article1.xml") )
|
13
|
-
@mods_article = Samples::ModsArticle.from_xml(article_xml)
|
14
|
-
end
|
15
|
-
|
16
|
-
describe ".to_solr" do
|
17
|
-
|
18
|
-
it "should provide .to_solr and return a SolrDocument" do
|
19
|
-
@mods_article.should respond_to(:to_solr)
|
20
|
-
@mods_article.to_solr.should be_kind_of(Hash)
|
21
|
-
end
|
22
|
-
|
23
|
-
it "should optionally allow you to provide the Hash to add fields to and return that document when done" do
|
24
|
-
doc = Hash.new
|
25
|
-
@mods_article.to_solr(doc).should equal(doc)
|
26
|
-
end
|
27
|
-
|
28
|
-
it "should iterate through the terminology terms, calling .solrize_term on each and passing in the solr doc" do
|
29
|
-
solr_doc = Hash.new
|
30
|
-
@mods_article.field_mapper = Solrizer::FieldMapper.new
|
31
|
-
Samples::ModsArticle.terminology.terms.each_pair do |k,v|
|
32
|
-
@mods_article.should_receive(:solrize_term).with(v, solr_doc, @mods_article.field_mapper)
|
33
|
-
end
|
34
|
-
@mods_article.to_solr(solr_doc)
|
35
|
-
end
|
36
|
-
|
37
|
-
it "should use Solr mappings to generate field names" do
|
38
|
-
solr_doc = @mods_article.to_solr
|
39
|
-
solr_doc["abstract"].should be_nil
|
40
|
-
# NOTE: OM's old default expected stored and indexed; this is a change.
|
41
|
-
solr_doc["abstract_tesim"].should == ["ABSTRACT"]
|
42
|
-
solr_doc["title_info_1_language_tesim"].should == ["finnish"]
|
43
|
-
solr_doc["person_1_role_0_text_tesim"].should == ["teacher"]
|
44
|
-
# No index_as on the code field.
|
45
|
-
solr_doc["person_1_role_0_code_tesim"].should be_nil
|
46
|
-
solr_doc["person_last_name_tesim"].sort.should == ["FAMILY NAME", "Gautama"]
|
47
|
-
solr_doc["topic_tag_tesim"].sort.should == ["CONTROLLED TERM", "TOPIC 1", "TOPIC 2"]
|
48
|
-
# These are a holdover from an old verison of OM
|
49
|
-
solr_doc['journal_0_issue_0_publication_date_dtsim'].should == ["2007-02-01T00:00:00Z"]
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
|
54
|
-
describe ".solrize_term" do
|
55
|
-
|
56
|
-
it "should add fields to a solr document for all nodes corresponding to the given term and its children" do
|
57
|
-
solr_doc = Hash.new
|
58
|
-
result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
|
59
|
-
result.should == solr_doc
|
60
|
-
end
|
61
|
-
|
62
|
-
it "should add multiple fields based on index_as" do
|
63
|
-
fake_solr_doc = {}
|
64
|
-
term = Samples::ModsArticle.terminology.retrieve_term(:name)
|
65
|
-
term.children[:namePart].index_as = [:searchable, :displayable, :facetable]
|
66
|
-
|
67
|
-
@mods_article.solrize_term(term, fake_solr_doc)
|
68
|
-
|
69
|
-
expected_names = ["DR.", "FAMILY NAME", "GIVEN NAMES"]
|
70
|
-
%w(_teim _sim).each do |suffix|
|
71
|
-
actual_names = fake_solr_doc["name_0_namePart#{suffix}"].sort
|
72
|
-
actual_names.should == expected_names
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
it "should add fields based on type using proxy" do
|
77
|
-
unless RUBY_VERSION.match("1.8.7")
|
78
|
-
solr_doc = Hash.new
|
79
|
-
result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:pub_date), solr_doc)
|
80
|
-
solr_doc["pub_date_dtsim"].should == ["2007-02-01T00:00:00Z"]
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
it "should add fields based on type using ref" do
|
85
|
-
solr_doc = Hash.new
|
86
|
-
result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:issue_date), solr_doc)
|
87
|
-
solr_doc["issue_date_dtsim"].should == ["2007-02-15T00:00:00Z"]
|
88
|
-
end
|
89
|
-
|
90
|
-
it "shouldn't index terms where index_as is an empty array" do
|
91
|
-
fake_solr_doc = {}
|
92
|
-
term = Samples::ModsArticle.terminology.retrieve_term(:name)
|
93
|
-
term.children[:namePart].index_as = []
|
94
|
-
|
95
|
-
@mods_article.solrize_term(term, fake_solr_doc)
|
96
|
-
fake_solr_doc["name_0_namePart_teim"].should be_nil
|
97
|
-
end
|
98
|
-
|
99
|
-
it "should index terms where index_as is searchable" do
|
100
|
-
fake_solr_doc = {}
|
101
|
-
term = Samples::ModsArticle.terminology.retrieve_term(:name)
|
102
|
-
term.children[:namePart].index_as = [:searchable]
|
103
|
-
|
104
|
-
@mods_article.solrize_term(term, fake_solr_doc)
|
105
|
-
|
106
|
-
fake_solr_doc["name_0_namePart_teim"].sort.should == ["DR.", "FAMILY NAME", "GIVEN NAMES"]
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|