solrizer 1.2.2 → 2.0.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -1
- data/History.txt +3 -0
- data/lib/solrizer/version.rb +1 -1
- data/lib/solrizer/xml/terminology_based_solrizer.rb +4 -1
- data/spec/fixtures/mods_article.rb +88 -0
- data/spec/units/xml_terminology_based_solrizer_spec.rb +13 -18
- metadata +19 -13
- data/.rvmrc +0 -35
- data/Gemfile.lock +0 -72
data/.gitignore
CHANGED
data/History.txt
CHANGED
data/lib/solrizer/version.rb
CHANGED
@@ -48,14 +48,17 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
48
48
|
|
49
49
|
# Populate a solr document with solr fields corresponding to the given xml node
|
50
50
|
# Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
|
51
|
+
# If the supplied term does not have an index_as attribute, no indexing will be performed.
|
51
52
|
# @param [Nokogiri::XML::Node] node to solrize
|
52
53
|
# @param [OM::XML::Document] doc document the node came from
|
53
54
|
# @param [Array] term_pointer Array pointing to the term that should be used for solrization settings
|
55
|
+
# @param [Term] term the term to be solrized
|
54
56
|
# @param [Hash] (optional) solr_doc (values hash) to populate
|
57
|
+
# @return [Hash] the solr doc
|
55
58
|
def self.solrize_node(node, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
|
59
|
+
return solr_doc unless term.index_as
|
56
60
|
field_mapper ||= self.default_field_mapper
|
57
61
|
terminology = doc.class.terminology
|
58
|
-
# term = terminology.retrieve_term(*term_pointer)
|
59
62
|
|
60
63
|
if term.path.kind_of?(Hash) && term.path.has_key?(:attribute)
|
61
64
|
node_value = node.value
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module Samples
|
2
|
+
class ModsArticle
|
3
|
+
|
4
|
+
include OM::XML::Document
|
5
|
+
|
6
|
+
set_terminology do |t|
|
7
|
+
t.root(:path=>"mods", :xmlns=>"http://www.loc.gov/mods/v3", :schema=>"http://www.loc.gov/standards/mods/v3/mods-3-2.xsd", "xmlns:foo"=>"http://my.custom.namespace")
|
8
|
+
|
9
|
+
|
10
|
+
t.title_info(:path=>"titleInfo") {
|
11
|
+
t.main_title(:index_as=>[:facetable],:path=>"title", :label=>"title") {
|
12
|
+
t.main_title_lang(:path=>{:attribute=> "xml:lang"})
|
13
|
+
}
|
14
|
+
t.french_title(:ref=>[:title_info,:main_title], :attributes=>{"xml:lang"=>"fre"})
|
15
|
+
|
16
|
+
t.language(:index_as=>[:facetable],:path=>{:attribute=>"lang"})
|
17
|
+
}
|
18
|
+
t.language{
|
19
|
+
t.lang_code(:index_as=>[:facetable], :path=>"languageTerm", :attributes=>{:type=>"code"})
|
20
|
+
}
|
21
|
+
t.abstract(:index_as=>[])
|
22
|
+
t.subject {
|
23
|
+
t.topic(:index_as=>[:facetable])
|
24
|
+
}
|
25
|
+
t.topic_tag(:proxy=>[:subject, :topic])
|
26
|
+
# t.topic_tag(:index_as=>[:facetable],:path=>"subject", :default_content_path=>"topic")
|
27
|
+
# This is a mods:name. The underscore is purely to avoid namespace conflicts.
|
28
|
+
t.name_ {
|
29
|
+
# this is a namepart
|
30
|
+
t.namePart(:type=>:string, :label=>"generic name")
|
31
|
+
# affiliations are great
|
32
|
+
t.affiliation
|
33
|
+
t.institution(:path=>"affiliation", :index_as=>[:facetable], :label=>"organization")
|
34
|
+
t.displayForm
|
35
|
+
t.role(:ref=>[:role])
|
36
|
+
t.description(:index_as=>[:facetable])
|
37
|
+
t.date(:path=>"namePart", :attributes=>{:type=>"date"})
|
38
|
+
t.last_name(:path=>"namePart", :attributes=>{:type=>"family"}, :index_as=>[])
|
39
|
+
t.first_name(:path=>"namePart", :attributes=>{:type=>"given"}, :label=>"first name")
|
40
|
+
t.terms_of_address(:path=>"namePart", :attributes=>{:type=>"termsOfAddress"})
|
41
|
+
t.computing_id
|
42
|
+
t.name_content(:path=>"text()")
|
43
|
+
}
|
44
|
+
# lookup :person, :first_name
|
45
|
+
t.person(:ref=>:name, :attributes=>{:type=>"personal"}, :index_as=>[:facetable])
|
46
|
+
t.department(:proxy=>[:person,:description],:index_as=>[:facetable])
|
47
|
+
t.organization(:ref=>:name, :attributes=>{:type=>"corporate"}, :index_as=>[:facetable])
|
48
|
+
t.conference(:ref=>:name, :attributes=>{:type=>"conference"}, :index_as=>[:facetable])
|
49
|
+
t.role {
|
50
|
+
t.text(:path=>"roleTerm",:attributes=>{:type=>"text"}, :index_as=>[])
|
51
|
+
t.code(:path=>"roleTerm",:attributes=>{:type=>"code"})
|
52
|
+
}
|
53
|
+
t.journal(:path=>'relatedItem', :attributes=>{:type=>"host"}) {
|
54
|
+
t.title_info(:index_as=>[:facetable],:ref=>[:title_info])
|
55
|
+
t.origin_info(:path=>"originInfo") {
|
56
|
+
t.publisher
|
57
|
+
t.date_issued(:path=>"dateIssued")
|
58
|
+
t.issuance(:index_as=>[:facetable])
|
59
|
+
}
|
60
|
+
t.issn(:path=>"identifier", :attributes=>{:type=>"issn"})
|
61
|
+
t.issue(:path=>"part") {
|
62
|
+
t.volume(:path=>"detail", :attributes=>{:type=>"volume"}, :default_content_path=>"number")
|
63
|
+
t.level(:path=>"detail", :attributes=>{:type=>"number"}, :default_content_path=>"number")
|
64
|
+
t.extent
|
65
|
+
t.pages(:path=>"extent", :attributes=>{:unit=>"pages"}) {
|
66
|
+
t.start
|
67
|
+
t.end
|
68
|
+
}
|
69
|
+
t.start_page(:proxy=>[:pages, :start])
|
70
|
+
t.end_page(:proxy=>[:pages, :end])
|
71
|
+
t.publication_date(:path=>"date", :index_as=>[])
|
72
|
+
}
|
73
|
+
}
|
74
|
+
t.note
|
75
|
+
t.location(:path=>"location") {
|
76
|
+
t.url(:path=>"url")
|
77
|
+
}
|
78
|
+
t.publication_url(:proxy=>[:location,:url])
|
79
|
+
t.title(:proxy=>[:title_info, :main_title])
|
80
|
+
t.journal_title(:proxy=>[:journal, :title_info, :main_title])
|
81
|
+
end
|
82
|
+
|
83
|
+
# Changes from OM::Properties implementation
|
84
|
+
# renamed family_name => last_name
|
85
|
+
# start_page & end_page now accessible as [:journal, :issue, :pages, :start] (etc.)
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
@@ -1,15 +1,15 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
-
require '
|
2
|
+
require 'fixtures/mods_article'
|
3
3
|
|
4
4
|
describe Solrizer::XML::TerminologyBasedSolrizer do
|
5
5
|
|
6
6
|
before(:all) do
|
7
|
-
|
7
|
+
Samples::ModsArticle.send(:include, Solrizer::XML::TerminologyBasedSolrizer)
|
8
8
|
end
|
9
9
|
|
10
10
|
before(:each) do
|
11
11
|
article_xml = fixture( File.join("mods_articles", "hydrangea_article1.xml") )
|
12
|
-
@mods_article =
|
12
|
+
@mods_article = Samples::ModsArticle.from_xml(article_xml)
|
13
13
|
end
|
14
14
|
|
15
15
|
describe ".to_solr" do
|
@@ -34,7 +34,7 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
34
34
|
# ActiveFedora::NokogiriDatastream.stubs(:accessors).returns(mock_accessors)
|
35
35
|
solr_doc = Hash.new
|
36
36
|
@mods_article.field_mapper = Solrizer::FieldMapper::Default.new
|
37
|
-
|
37
|
+
Samples::ModsArticle.terminology.terms.each_pair do |k,v|
|
38
38
|
@mods_article.expects(:solrize_term).with(v, solr_doc, @mods_article.field_mapper)
|
39
39
|
end
|
40
40
|
@mods_article.to_solr(solr_doc)
|
@@ -49,21 +49,16 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
49
49
|
solr_doc["abstract_t"].should == ["ABSTRACT"]
|
50
50
|
solr_doc["title_info_1_language_t"].should == ["finnish"]
|
51
51
|
solr_doc["person_1_role_0_text_t"].should == ["teacher"]
|
52
|
+
# No index_as on the code field.
|
53
|
+
solr_doc["person_1_role_0_code_t"].should be_nil
|
52
54
|
solr_doc["person_last_name_t"].sort.should == ["FAMILY NAME", "Gautama"]
|
53
|
-
|
54
|
-
# solr_doc["topic_tag_t"].sort.should == ["CONTROLLED TERM", "TOPIC1", "TOPIC2"]
|
55
|
+
solr_doc["topic_tag_t"].sort.should == ["CONTROLLED TERM", "TOPIC 1", "TOPIC 2"]
|
55
56
|
|
56
57
|
# These are a holdover from an old verison of OM
|
57
|
-
#
|
58
|
-
|
58
|
+
puts "DOC: #{solr_doc.length}"
|
59
|
+
solr_doc['journal_0_issue_0_publication_date_t'].should == ["FEB. 2007"]
|
59
60
|
|
60
|
-
|
61
|
-
#
|
62
|
-
# solr_doc[:publisher_t].should be_nil
|
63
|
-
# solr_doc[:coverage_t].should be_nil
|
64
|
-
# solr_doc[:creation_date_dt].should be_nil
|
65
|
-
# solr_doc.should == ""
|
66
|
-
|
61
|
+
|
67
62
|
end
|
68
63
|
|
69
64
|
end
|
@@ -72,14 +67,14 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
72
67
|
|
73
68
|
it "should add fields to a solr document for all nodes corresponding to the given term and its children" do
|
74
69
|
solr_doc = Hash.new
|
75
|
-
result = @mods_article.solrize_term(
|
70
|
+
result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
|
76
71
|
result.should == solr_doc
|
77
|
-
# @mods_article.solrize_term(:title_info,
|
72
|
+
# @mods_article.solrize_term(:title_info, Samples::ModsArticle.terminology.retrieve_term(:title_info), :solr_doc=>solr_doc).should == ""
|
78
73
|
end
|
79
74
|
|
80
75
|
it "should add multiple fields based on index_as" do
|
81
76
|
fake_solr_doc = {}
|
82
|
-
term =
|
77
|
+
term = Samples::ModsArticle.terminology.retrieve_term(:name)
|
83
78
|
term.children[:namePart].index_as = [:displayable, :facetable]
|
84
79
|
|
85
80
|
@mods_article.solrize_term(term, fake_solr_doc)
|
metadata
CHANGED
@@ -1,13 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: solrizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: -2879808512
|
5
|
+
prerelease: 6
|
6
6
|
segments:
|
7
|
-
- 1
|
8
|
-
- 2
|
9
7
|
- 2
|
10
|
-
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
- rc
|
11
|
+
- 1
|
12
|
+
version: 2.0.0.rc1
|
11
13
|
platform: ruby
|
12
14
|
authors:
|
13
15
|
- Matt Zumwalt
|
@@ -15,7 +17,8 @@ autorequire:
|
|
15
17
|
bindir: bin
|
16
18
|
cert_chain: []
|
17
19
|
|
18
|
-
date: 2012-
|
20
|
+
date: 2012-10-15 00:00:00 -05:00
|
21
|
+
default_executable:
|
19
22
|
dependencies:
|
20
23
|
- !ruby/object:Gem::Dependency
|
21
24
|
name: nokogiri
|
@@ -188,9 +191,7 @@ extra_rdoc_files:
|
|
188
191
|
- README.textile
|
189
192
|
files:
|
190
193
|
- .gitignore
|
191
|
-
- .rvmrc
|
192
194
|
- Gemfile
|
193
|
-
- Gemfile.lock
|
194
195
|
- History.txt
|
195
196
|
- LICENSE
|
196
197
|
- README.textile
|
@@ -222,6 +223,7 @@ files:
|
|
222
223
|
- spec/fixtures/druid-cm234kq4672-extProperties.xml
|
223
224
|
- spec/fixtures/druid-cm234kq4672-stories.xml
|
224
225
|
- spec/fixtures/druid-hc513kw4806-descMetadata.xml
|
226
|
+
- spec/fixtures/mods_article.rb
|
225
227
|
- spec/fixtures/mods_articles/hydrangea_article1.xml
|
226
228
|
- spec/fixtures/test_solr_mappings.yml
|
227
229
|
- spec/spec_helper.rb
|
@@ -230,6 +232,7 @@ files:
|
|
230
232
|
- spec/units/field_name_mapper_spec.rb
|
231
233
|
- spec/units/xml_extractor_spec.rb
|
232
234
|
- spec/units/xml_terminology_based_solrizer_spec.rb
|
235
|
+
has_rdoc: true
|
233
236
|
homepage: http://github.com/projecthydra/solrizer
|
234
237
|
licenses: []
|
235
238
|
|
@@ -250,16 +253,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
250
253
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
251
254
|
none: false
|
252
255
|
requirements:
|
253
|
-
- - "
|
256
|
+
- - ">"
|
254
257
|
- !ruby/object:Gem::Version
|
255
|
-
hash:
|
258
|
+
hash: 25
|
256
259
|
segments:
|
257
|
-
-
|
258
|
-
|
260
|
+
- 1
|
261
|
+
- 3
|
262
|
+
- 1
|
263
|
+
version: 1.3.1
|
259
264
|
requirements: []
|
260
265
|
|
261
266
|
rubyforge_project:
|
262
|
-
rubygems_version: 1.
|
267
|
+
rubygems_version: 1.6.2
|
263
268
|
signing_key:
|
264
269
|
specification_version: 3
|
265
270
|
summary: A utility for building solr indexes, usually from Fedora repository content with solrizer-fedora extension gem.
|
@@ -269,6 +274,7 @@ test_files:
|
|
269
274
|
- spec/fixtures/druid-cm234kq4672-extProperties.xml
|
270
275
|
- spec/fixtures/druid-cm234kq4672-stories.xml
|
271
276
|
- spec/fixtures/druid-hc513kw4806-descMetadata.xml
|
277
|
+
- spec/fixtures/mods_article.rb
|
272
278
|
- spec/fixtures/mods_articles/hydrangea_article1.xml
|
273
279
|
- spec/fixtures/test_solr_mappings.yml
|
274
280
|
- spec/spec_helper.rb
|
data/.rvmrc
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
#!/usr/bin/env bash
|
2
|
-
|
3
|
-
# This is an RVM Project .rvmrc file, used to automatically load the ruby
|
4
|
-
# development environment upon cd'ing into the directory
|
5
|
-
|
6
|
-
ruby_string="ree-1.8.7"
|
7
|
-
gemset_name="solrizer"
|
8
|
-
|
9
|
-
#
|
10
|
-
rvm_install_on_use_flag=1
|
11
|
-
|
12
|
-
# Specify our desired <ruby>[@<gemset>], the @gemset name is optional.
|
13
|
-
environment_id="${ruby_string}@${gemset_name}"
|
14
|
-
|
15
|
-
# First, attempt to load the desired environment directly from the environment
|
16
|
-
# file. This is very fast and efficient compared to running through the entire
|
17
|
-
# CLI and selector. If you want feedback on which environment was used then
|
18
|
-
# insert the word 'use' after --create as this triggers verbose mode.
|
19
|
-
#
|
20
|
-
if [[ -d "${rvm_path:-$HOME/.rvm}/environments" \
|
21
|
-
&& -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]] ; then
|
22
|
-
\. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
|
23
|
-
else
|
24
|
-
# If the environment file has not yet been created, use the RVM CLI to select.
|
25
|
-
rvm --create "$environment_id"
|
26
|
-
fi
|
27
|
-
|
28
|
-
#(
|
29
|
-
# Ensure that Bundler is installed, install it if it is not.
|
30
|
-
if ! command -v bundle ; then
|
31
|
-
printf "The rubygem 'bundler' is not installed, installing it now.\n"
|
32
|
-
gem install bundler
|
33
|
-
fi
|
34
|
-
#)&
|
35
|
-
|
data/Gemfile.lock
DELETED
@@ -1,72 +0,0 @@
|
|
1
|
-
PATH
|
2
|
-
remote: .
|
3
|
-
specs:
|
4
|
-
solrizer (1.2.0)
|
5
|
-
daemons
|
6
|
-
mediashelf-loggable (~> 0.4.7)
|
7
|
-
nokogiri
|
8
|
-
om (>= 1.5.0)
|
9
|
-
stomp
|
10
|
-
xml-simple
|
11
|
-
|
12
|
-
GEM
|
13
|
-
remote: http://rubygems.org/
|
14
|
-
specs:
|
15
|
-
RedCloth (4.2.8)
|
16
|
-
archive-tar-minitar (0.5.2)
|
17
|
-
columnize (0.3.4)
|
18
|
-
daemons (1.1.8)
|
19
|
-
diff-lcs (1.1.3)
|
20
|
-
linecache (0.46)
|
21
|
-
rbx-require-relative (> 0.0.4)
|
22
|
-
linecache19 (0.5.12)
|
23
|
-
ruby_core_source (>= 0.1.4)
|
24
|
-
mediashelf-loggable (0.4.9)
|
25
|
-
metaclass (0.0.1)
|
26
|
-
mocha (0.10.0)
|
27
|
-
metaclass (~> 0.0.1)
|
28
|
-
nokogiri (1.5.2)
|
29
|
-
om (1.6.0)
|
30
|
-
mediashelf-loggable
|
31
|
-
nokogiri (>= 1.4.2)
|
32
|
-
rbx-require-relative (0.0.5)
|
33
|
-
rcov (0.9.10)
|
34
|
-
rspec (2.7.0)
|
35
|
-
rspec-core (~> 2.7.0)
|
36
|
-
rspec-expectations (~> 2.7.0)
|
37
|
-
rspec-mocks (~> 2.7.0)
|
38
|
-
rspec-core (2.7.1)
|
39
|
-
rspec-expectations (2.7.0)
|
40
|
-
diff-lcs (~> 1.1.2)
|
41
|
-
rspec-mocks (2.7.0)
|
42
|
-
ruby-debug (0.10.4)
|
43
|
-
columnize (>= 0.1)
|
44
|
-
ruby-debug-base (~> 0.10.4.0)
|
45
|
-
ruby-debug-base (0.10.4)
|
46
|
-
linecache (>= 0.3)
|
47
|
-
ruby-debug-base19 (0.11.25)
|
48
|
-
columnize (>= 0.3.1)
|
49
|
-
linecache19 (>= 0.5.11)
|
50
|
-
ruby_core_source (>= 0.1.4)
|
51
|
-
ruby-debug19 (0.11.6)
|
52
|
-
columnize (>= 0.3.1)
|
53
|
-
linecache19 (>= 0.5.11)
|
54
|
-
ruby-debug-base19 (>= 0.11.19)
|
55
|
-
ruby_core_source (0.1.5)
|
56
|
-
archive-tar-minitar (>= 0.5.2)
|
57
|
-
stomp (1.2.2)
|
58
|
-
xml-simple (1.1.1)
|
59
|
-
yard (0.7.2)
|
60
|
-
|
61
|
-
PLATFORMS
|
62
|
-
ruby
|
63
|
-
|
64
|
-
DEPENDENCIES
|
65
|
-
RedCloth
|
66
|
-
mocha
|
67
|
-
rcov
|
68
|
-
rspec (~> 2.0)
|
69
|
-
ruby-debug
|
70
|
-
ruby-debug19
|
71
|
-
solrizer!
|
72
|
-
yard
|