solrizer 1.2.2 → 2.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,5 +1,6 @@
1
1
  .DS_Store
2
2
  nohup.out
3
+ Gemfile.lock
3
4
 
4
5
  *.sqlite3
5
6
  *.log
@@ -20,4 +21,4 @@ rerun.txt
20
21
 
21
22
  /.bundle
22
23
  /.yardoc
23
- /doc
24
+ /doc
data/History.txt CHANGED
@@ -1,3 +1,6 @@
1
+ h2. 2.0.0
2
+ HYDRA-827 DO NOT index terms by default
3
+
1
4
  h2. 1.2.2
2
5
  BUG: RSolr::Client doesn't have a delete method. Changed to delete_by_id. (only affects solrizer shell script) Thanks to mkorcy.
3
6
 
@@ -1,3 +1,3 @@
1
1
  module Solrizer
2
- VERSION = "1.2.2"
2
+ VERSION = "2.0.0.rc1"
3
3
  end
@@ -48,14 +48,17 @@ module Solrizer::XML::TerminologyBasedSolrizer
48
48
 
49
49
  # Populate a solr document with solr fields corresponding to the given xml node
50
50
  # Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
51
+ # If the supplied term does not have an index_as attribute, no indexing will be performed.
51
52
  # @param [Nokogiri::XML::Node] node to solrize
52
53
  # @param [OM::XML::Document] doc document the node came from
53
54
  # @param [Array] term_pointer Array pointing to the term that should be used for solrization settings
55
+ # @param [Term] term the term to be solrized
54
56
  # @param [Hash] (optional) solr_doc (values hash) to populate
57
+ # @return [Hash] the solr doc
55
58
  def self.solrize_node(node, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
59
+ return solr_doc unless term.index_as
56
60
  field_mapper ||= self.default_field_mapper
57
61
  terminology = doc.class.terminology
58
- # term = terminology.retrieve_term(*term_pointer)
59
62
 
60
63
  if term.path.kind_of?(Hash) && term.path.has_key?(:attribute)
61
64
  node_value = node.value
@@ -0,0 +1,88 @@
1
+ module Samples
2
+ class ModsArticle
3
+
4
+ include OM::XML::Document
5
+
6
+ set_terminology do |t|
7
+ t.root(:path=>"mods", :xmlns=>"http://www.loc.gov/mods/v3", :schema=>"http://www.loc.gov/standards/mods/v3/mods-3-2.xsd", "xmlns:foo"=>"http://my.custom.namespace")
8
+
9
+
10
+ t.title_info(:path=>"titleInfo") {
11
+ t.main_title(:index_as=>[:facetable],:path=>"title", :label=>"title") {
12
+ t.main_title_lang(:path=>{:attribute=> "xml:lang"})
13
+ }
14
+ t.french_title(:ref=>[:title_info,:main_title], :attributes=>{"xml:lang"=>"fre"})
15
+
16
+ t.language(:index_as=>[:facetable],:path=>{:attribute=>"lang"})
17
+ }
18
+ t.language{
19
+ t.lang_code(:index_as=>[:facetable], :path=>"languageTerm", :attributes=>{:type=>"code"})
20
+ }
21
+ t.abstract(:index_as=>[])
22
+ t.subject {
23
+ t.topic(:index_as=>[:facetable])
24
+ }
25
+ t.topic_tag(:proxy=>[:subject, :topic])
26
+ # t.topic_tag(:index_as=>[:facetable],:path=>"subject", :default_content_path=>"topic")
27
+ # This is a mods:name. The underscore is purely to avoid namespace conflicts.
28
+ t.name_ {
29
+ # this is a namepart
30
+ t.namePart(:type=>:string, :label=>"generic name")
31
+ # affiliations are great
32
+ t.affiliation
33
+ t.institution(:path=>"affiliation", :index_as=>[:facetable], :label=>"organization")
34
+ t.displayForm
35
+ t.role(:ref=>[:role])
36
+ t.description(:index_as=>[:facetable])
37
+ t.date(:path=>"namePart", :attributes=>{:type=>"date"})
38
+ t.last_name(:path=>"namePart", :attributes=>{:type=>"family"}, :index_as=>[])
39
+ t.first_name(:path=>"namePart", :attributes=>{:type=>"given"}, :label=>"first name")
40
+ t.terms_of_address(:path=>"namePart", :attributes=>{:type=>"termsOfAddress"})
41
+ t.computing_id
42
+ t.name_content(:path=>"text()")
43
+ }
44
+ # lookup :person, :first_name
45
+ t.person(:ref=>:name, :attributes=>{:type=>"personal"}, :index_as=>[:facetable])
46
+ t.department(:proxy=>[:person,:description],:index_as=>[:facetable])
47
+ t.organization(:ref=>:name, :attributes=>{:type=>"corporate"}, :index_as=>[:facetable])
48
+ t.conference(:ref=>:name, :attributes=>{:type=>"conference"}, :index_as=>[:facetable])
49
+ t.role {
50
+ t.text(:path=>"roleTerm",:attributes=>{:type=>"text"}, :index_as=>[])
51
+ t.code(:path=>"roleTerm",:attributes=>{:type=>"code"})
52
+ }
53
+ t.journal(:path=>'relatedItem', :attributes=>{:type=>"host"}) {
54
+ t.title_info(:index_as=>[:facetable],:ref=>[:title_info])
55
+ t.origin_info(:path=>"originInfo") {
56
+ t.publisher
57
+ t.date_issued(:path=>"dateIssued")
58
+ t.issuance(:index_as=>[:facetable])
59
+ }
60
+ t.issn(:path=>"identifier", :attributes=>{:type=>"issn"})
61
+ t.issue(:path=>"part") {
62
+ t.volume(:path=>"detail", :attributes=>{:type=>"volume"}, :default_content_path=>"number")
63
+ t.level(:path=>"detail", :attributes=>{:type=>"number"}, :default_content_path=>"number")
64
+ t.extent
65
+ t.pages(:path=>"extent", :attributes=>{:unit=>"pages"}) {
66
+ t.start
67
+ t.end
68
+ }
69
+ t.start_page(:proxy=>[:pages, :start])
70
+ t.end_page(:proxy=>[:pages, :end])
71
+ t.publication_date(:path=>"date", :index_as=>[])
72
+ }
73
+ }
74
+ t.note
75
+ t.location(:path=>"location") {
76
+ t.url(:path=>"url")
77
+ }
78
+ t.publication_url(:proxy=>[:location,:url])
79
+ t.title(:proxy=>[:title_info, :main_title])
80
+ t.journal_title(:proxy=>[:journal, :title_info, :main_title])
81
+ end
82
+
83
+ # Changes from OM::Properties implementation
84
+ # renamed family_name => last_name
85
+ # start_page & end_page now accessible as [:journal, :issue, :pages, :start] (etc.)
86
+
87
+ end
88
+ end
@@ -1,15 +1,15 @@
1
1
  require 'spec_helper'
2
- require 'solrizer/xml'
2
+ require 'fixtures/mods_article'
3
3
 
4
4
  describe Solrizer::XML::TerminologyBasedSolrizer do
5
5
 
6
6
  before(:all) do
7
- OM::Samples::ModsArticle.send(:include, Solrizer::XML::TerminologyBasedSolrizer)
7
+ Samples::ModsArticle.send(:include, Solrizer::XML::TerminologyBasedSolrizer)
8
8
  end
9
9
 
10
10
  before(:each) do
11
11
  article_xml = fixture( File.join("mods_articles", "hydrangea_article1.xml") )
12
- @mods_article = OM::Samples::ModsArticle.from_xml(article_xml)
12
+ @mods_article = Samples::ModsArticle.from_xml(article_xml)
13
13
  end
14
14
 
15
15
  describe ".to_solr" do
@@ -34,7 +34,7 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
34
34
  # ActiveFedora::NokogiriDatastream.stubs(:accessors).returns(mock_accessors)
35
35
  solr_doc = Hash.new
36
36
  @mods_article.field_mapper = Solrizer::FieldMapper::Default.new
37
- OM::Samples::ModsArticle.terminology.terms.each_pair do |k,v|
37
+ Samples::ModsArticle.terminology.terms.each_pair do |k,v|
38
38
  @mods_article.expects(:solrize_term).with(v, solr_doc, @mods_article.field_mapper)
39
39
  end
40
40
  @mods_article.to_solr(solr_doc)
@@ -49,21 +49,16 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
49
49
  solr_doc["abstract_t"].should == ["ABSTRACT"]
50
50
  solr_doc["title_info_1_language_t"].should == ["finnish"]
51
51
  solr_doc["person_1_role_0_text_t"].should == ["teacher"]
52
+ # No index_as on the code field.
53
+ solr_doc["person_1_role_0_code_t"].should be_nil
52
54
  solr_doc["person_last_name_t"].sort.should == ["FAMILY NAME", "Gautama"]
53
- # This next line will fail until om > 1.0.2 is released
54
- # solr_doc["topic_tag_t"].sort.should == ["CONTROLLED TERM", "TOPIC1", "TOPIC2"]
55
+ solr_doc["topic_tag_t"].sort.should == ["CONTROLLED TERM", "TOPIC 1", "TOPIC 2"]
55
56
 
56
57
  # These are a holdover from an old verison of OM
57
- # solr_doc[:finnish_title_info_language_t].should == "finnish"
58
- # solr_doc[:finnish_title_info_main_title_t].should == "Artikkelin otsikko Hydrangea artiklan 1"
58
+ puts "DOC: #{solr_doc.length}"
59
+ solr_doc['journal_0_issue_0_publication_date_t'].should == ["FEB. 2007"]
59
60
 
60
- # solr_doc[:mydate_date].should == "fake-date"
61
- #
62
- # solr_doc[:publisher_t].should be_nil
63
- # solr_doc[:coverage_t].should be_nil
64
- # solr_doc[:creation_date_dt].should be_nil
65
- # solr_doc.should == ""
66
-
61
+
67
62
  end
68
63
 
69
64
  end
@@ -72,14 +67,14 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
72
67
 
73
68
  it "should add fields to a solr document for all nodes corresponding to the given term and its children" do
74
69
  solr_doc = Hash.new
75
- result = @mods_article.solrize_term(OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
70
+ result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
76
71
  result.should == solr_doc
77
- # @mods_article.solrize_term(:title_info, OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), :solr_doc=>solr_doc).should == ""
72
+ # @mods_article.solrize_term(:title_info, Samples::ModsArticle.terminology.retrieve_term(:title_info), :solr_doc=>solr_doc).should == ""
78
73
  end
79
74
 
80
75
  it "should add multiple fields based on index_as" do
81
76
  fake_solr_doc = {}
82
- term = OM::Samples::ModsArticle.terminology.retrieve_term(:name)
77
+ term = Samples::ModsArticle.terminology.retrieve_term(:name)
83
78
  term.children[:namePart].index_as = [:displayable, :facetable]
84
79
 
85
80
  @mods_article.solrize_term(term, fake_solr_doc)
metadata CHANGED
@@ -1,13 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: solrizer
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
5
- prerelease:
4
+ hash: -2879808512
5
+ prerelease: 6
6
6
  segments:
7
- - 1
8
- - 2
9
7
  - 2
10
- version: 1.2.2
8
+ - 0
9
+ - 0
10
+ - rc
11
+ - 1
12
+ version: 2.0.0.rc1
11
13
  platform: ruby
12
14
  authors:
13
15
  - Matt Zumwalt
@@ -15,7 +17,8 @@ autorequire:
15
17
  bindir: bin
16
18
  cert_chain: []
17
19
 
18
- date: 2012-07-23 00:00:00 Z
20
+ date: 2012-10-15 00:00:00 -05:00
21
+ default_executable:
19
22
  dependencies:
20
23
  - !ruby/object:Gem::Dependency
21
24
  name: nokogiri
@@ -188,9 +191,7 @@ extra_rdoc_files:
188
191
  - README.textile
189
192
  files:
190
193
  - .gitignore
191
- - .rvmrc
192
194
  - Gemfile
193
- - Gemfile.lock
194
195
  - History.txt
195
196
  - LICENSE
196
197
  - README.textile
@@ -222,6 +223,7 @@ files:
222
223
  - spec/fixtures/druid-cm234kq4672-extProperties.xml
223
224
  - spec/fixtures/druid-cm234kq4672-stories.xml
224
225
  - spec/fixtures/druid-hc513kw4806-descMetadata.xml
226
+ - spec/fixtures/mods_article.rb
225
227
  - spec/fixtures/mods_articles/hydrangea_article1.xml
226
228
  - spec/fixtures/test_solr_mappings.yml
227
229
  - spec/spec_helper.rb
@@ -230,6 +232,7 @@ files:
230
232
  - spec/units/field_name_mapper_spec.rb
231
233
  - spec/units/xml_extractor_spec.rb
232
234
  - spec/units/xml_terminology_based_solrizer_spec.rb
235
+ has_rdoc: true
233
236
  homepage: http://github.com/projecthydra/solrizer
234
237
  licenses: []
235
238
 
@@ -250,16 +253,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
250
253
  required_rubygems_version: !ruby/object:Gem::Requirement
251
254
  none: false
252
255
  requirements:
253
- - - ">="
256
+ - - ">"
254
257
  - !ruby/object:Gem::Version
255
- hash: 3
258
+ hash: 25
256
259
  segments:
257
- - 0
258
- version: "0"
260
+ - 1
261
+ - 3
262
+ - 1
263
+ version: 1.3.1
259
264
  requirements: []
260
265
 
261
266
  rubyforge_project:
262
- rubygems_version: 1.8.17
267
+ rubygems_version: 1.6.2
263
268
  signing_key:
264
269
  specification_version: 3
265
270
  summary: A utility for building solr indexes, usually from Fedora repository content with solrizer-fedora extension gem.
@@ -269,6 +274,7 @@ test_files:
269
274
  - spec/fixtures/druid-cm234kq4672-extProperties.xml
270
275
  - spec/fixtures/druid-cm234kq4672-stories.xml
271
276
  - spec/fixtures/druid-hc513kw4806-descMetadata.xml
277
+ - spec/fixtures/mods_article.rb
272
278
  - spec/fixtures/mods_articles/hydrangea_article1.xml
273
279
  - spec/fixtures/test_solr_mappings.yml
274
280
  - spec/spec_helper.rb
data/.rvmrc DELETED
@@ -1,35 +0,0 @@
1
- #!/usr/bin/env bash
2
-
3
- # This is an RVM Project .rvmrc file, used to automatically load the ruby
4
- # development environment upon cd'ing into the directory
5
-
6
- ruby_string="ree-1.8.7"
7
- gemset_name="solrizer"
8
-
9
- #
10
- rvm_install_on_use_flag=1
11
-
12
- # Specify our desired <ruby>[@<gemset>], the @gemset name is optional.
13
- environment_id="${ruby_string}@${gemset_name}"
14
-
15
- # First, attempt to load the desired environment directly from the environment
16
- # file. This is very fast and efficient compared to running through the entire
17
- # CLI and selector. If you want feedback on which environment was used then
18
- # insert the word 'use' after --create as this triggers verbose mode.
19
- #
20
- if [[ -d "${rvm_path:-$HOME/.rvm}/environments" \
21
- && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]] ; then
22
- \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
23
- else
24
- # If the environment file has not yet been created, use the RVM CLI to select.
25
- rvm --create "$environment_id"
26
- fi
27
-
28
- #(
29
- # Ensure that Bundler is installed, install it if it is not.
30
- if ! command -v bundle ; then
31
- printf "The rubygem 'bundler' is not installed, installing it now.\n"
32
- gem install bundler
33
- fi
34
- #)&
35
-
data/Gemfile.lock DELETED
@@ -1,72 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- solrizer (1.2.0)
5
- daemons
6
- mediashelf-loggable (~> 0.4.7)
7
- nokogiri
8
- om (>= 1.5.0)
9
- stomp
10
- xml-simple
11
-
12
- GEM
13
- remote: http://rubygems.org/
14
- specs:
15
- RedCloth (4.2.8)
16
- archive-tar-minitar (0.5.2)
17
- columnize (0.3.4)
18
- daemons (1.1.8)
19
- diff-lcs (1.1.3)
20
- linecache (0.46)
21
- rbx-require-relative (> 0.0.4)
22
- linecache19 (0.5.12)
23
- ruby_core_source (>= 0.1.4)
24
- mediashelf-loggable (0.4.9)
25
- metaclass (0.0.1)
26
- mocha (0.10.0)
27
- metaclass (~> 0.0.1)
28
- nokogiri (1.5.2)
29
- om (1.6.0)
30
- mediashelf-loggable
31
- nokogiri (>= 1.4.2)
32
- rbx-require-relative (0.0.5)
33
- rcov (0.9.10)
34
- rspec (2.7.0)
35
- rspec-core (~> 2.7.0)
36
- rspec-expectations (~> 2.7.0)
37
- rspec-mocks (~> 2.7.0)
38
- rspec-core (2.7.1)
39
- rspec-expectations (2.7.0)
40
- diff-lcs (~> 1.1.2)
41
- rspec-mocks (2.7.0)
42
- ruby-debug (0.10.4)
43
- columnize (>= 0.1)
44
- ruby-debug-base (~> 0.10.4.0)
45
- ruby-debug-base (0.10.4)
46
- linecache (>= 0.3)
47
- ruby-debug-base19 (0.11.25)
48
- columnize (>= 0.3.1)
49
- linecache19 (>= 0.5.11)
50
- ruby_core_source (>= 0.1.4)
51
- ruby-debug19 (0.11.6)
52
- columnize (>= 0.3.1)
53
- linecache19 (>= 0.5.11)
54
- ruby-debug-base19 (>= 0.11.19)
55
- ruby_core_source (0.1.5)
56
- archive-tar-minitar (>= 0.5.2)
57
- stomp (1.2.2)
58
- xml-simple (1.1.1)
59
- yard (0.7.2)
60
-
61
- PLATFORMS
62
- ruby
63
-
64
- DEPENDENCIES
65
- RedCloth
66
- mocha
67
- rcov
68
- rspec (~> 2.0)
69
- ruby-debug
70
- ruby-debug19
71
- solrizer!
72
- yard