solrizer 1.2.2 → 2.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -1,5 +1,6 @@
1
1
  .DS_Store
2
2
  nohup.out
3
+ Gemfile.lock
3
4
 
4
5
  *.sqlite3
5
6
  *.log
@@ -20,4 +21,4 @@ rerun.txt
20
21
 
21
22
  /.bundle
22
23
  /.yardoc
23
- /doc
24
+ /doc
data/History.txt CHANGED
@@ -1,3 +1,6 @@
1
+ h2. 2.0.0
2
+ HYDRA-827 DO NOT index terms by default
3
+
1
4
  h2. 1.2.2
2
5
  BUG: RSolr::Client doesn't have a delete method. Changed to delete_by_id. (only affects solrizer shell script) Thanks to mkorcy.
3
6
 
@@ -1,3 +1,3 @@
1
1
  module Solrizer
2
- VERSION = "1.2.2"
2
+ VERSION = "2.0.0.rc1"
3
3
  end
@@ -48,14 +48,17 @@ module Solrizer::XML::TerminologyBasedSolrizer
48
48
 
49
49
  # Populate a solr document with solr fields corresponding to the given xml node
50
50
  # Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
51
+ # If the supplied term does not have an index_as attribute, no indexing will be performed.
51
52
  # @param [Nokogiri::XML::Node] node to solrize
52
53
  # @param [OM::XML::Document] doc document the node came from
53
54
  # @param [Array] term_pointer Array pointing to the term that should be used for solrization settings
55
+ # @param [Term] term the term to be solrized
54
56
  # @param [Hash] (optional) solr_doc (values hash) to populate
57
+ # @return [Hash] the solr doc
55
58
  def self.solrize_node(node, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
59
+ return solr_doc unless term.index_as
56
60
  field_mapper ||= self.default_field_mapper
57
61
  terminology = doc.class.terminology
58
- # term = terminology.retrieve_term(*term_pointer)
59
62
 
60
63
  if term.path.kind_of?(Hash) && term.path.has_key?(:attribute)
61
64
  node_value = node.value
@@ -0,0 +1,88 @@
1
+ module Samples
2
+ class ModsArticle
3
+
4
+ include OM::XML::Document
5
+
6
+ set_terminology do |t|
7
+ t.root(:path=>"mods", :xmlns=>"http://www.loc.gov/mods/v3", :schema=>"http://www.loc.gov/standards/mods/v3/mods-3-2.xsd", "xmlns:foo"=>"http://my.custom.namespace")
8
+
9
+
10
+ t.title_info(:path=>"titleInfo") {
11
+ t.main_title(:index_as=>[:facetable],:path=>"title", :label=>"title") {
12
+ t.main_title_lang(:path=>{:attribute=> "xml:lang"})
13
+ }
14
+ t.french_title(:ref=>[:title_info,:main_title], :attributes=>{"xml:lang"=>"fre"})
15
+
16
+ t.language(:index_as=>[:facetable],:path=>{:attribute=>"lang"})
17
+ }
18
+ t.language{
19
+ t.lang_code(:index_as=>[:facetable], :path=>"languageTerm", :attributes=>{:type=>"code"})
20
+ }
21
+ t.abstract(:index_as=>[])
22
+ t.subject {
23
+ t.topic(:index_as=>[:facetable])
24
+ }
25
+ t.topic_tag(:proxy=>[:subject, :topic])
26
+ # t.topic_tag(:index_as=>[:facetable],:path=>"subject", :default_content_path=>"topic")
27
+ # This is a mods:name. The underscore is purely to avoid namespace conflicts.
28
+ t.name_ {
29
+ # this is a namepart
30
+ t.namePart(:type=>:string, :label=>"generic name")
31
+ # affiliations are great
32
+ t.affiliation
33
+ t.institution(:path=>"affiliation", :index_as=>[:facetable], :label=>"organization")
34
+ t.displayForm
35
+ t.role(:ref=>[:role])
36
+ t.description(:index_as=>[:facetable])
37
+ t.date(:path=>"namePart", :attributes=>{:type=>"date"})
38
+ t.last_name(:path=>"namePart", :attributes=>{:type=>"family"}, :index_as=>[])
39
+ t.first_name(:path=>"namePart", :attributes=>{:type=>"given"}, :label=>"first name")
40
+ t.terms_of_address(:path=>"namePart", :attributes=>{:type=>"termsOfAddress"})
41
+ t.computing_id
42
+ t.name_content(:path=>"text()")
43
+ }
44
+ # lookup :person, :first_name
45
+ t.person(:ref=>:name, :attributes=>{:type=>"personal"}, :index_as=>[:facetable])
46
+ t.department(:proxy=>[:person,:description],:index_as=>[:facetable])
47
+ t.organization(:ref=>:name, :attributes=>{:type=>"corporate"}, :index_as=>[:facetable])
48
+ t.conference(:ref=>:name, :attributes=>{:type=>"conference"}, :index_as=>[:facetable])
49
+ t.role {
50
+ t.text(:path=>"roleTerm",:attributes=>{:type=>"text"}, :index_as=>[])
51
+ t.code(:path=>"roleTerm",:attributes=>{:type=>"code"})
52
+ }
53
+ t.journal(:path=>'relatedItem', :attributes=>{:type=>"host"}) {
54
+ t.title_info(:index_as=>[:facetable],:ref=>[:title_info])
55
+ t.origin_info(:path=>"originInfo") {
56
+ t.publisher
57
+ t.date_issued(:path=>"dateIssued")
58
+ t.issuance(:index_as=>[:facetable])
59
+ }
60
+ t.issn(:path=>"identifier", :attributes=>{:type=>"issn"})
61
+ t.issue(:path=>"part") {
62
+ t.volume(:path=>"detail", :attributes=>{:type=>"volume"}, :default_content_path=>"number")
63
+ t.level(:path=>"detail", :attributes=>{:type=>"number"}, :default_content_path=>"number")
64
+ t.extent
65
+ t.pages(:path=>"extent", :attributes=>{:unit=>"pages"}) {
66
+ t.start
67
+ t.end
68
+ }
69
+ t.start_page(:proxy=>[:pages, :start])
70
+ t.end_page(:proxy=>[:pages, :end])
71
+ t.publication_date(:path=>"date", :index_as=>[])
72
+ }
73
+ }
74
+ t.note
75
+ t.location(:path=>"location") {
76
+ t.url(:path=>"url")
77
+ }
78
+ t.publication_url(:proxy=>[:location,:url])
79
+ t.title(:proxy=>[:title_info, :main_title])
80
+ t.journal_title(:proxy=>[:journal, :title_info, :main_title])
81
+ end
82
+
83
+ # Changes from OM::Properties implementation
84
+ # renamed family_name => last_name
85
+ # start_page & end_page now accessible as [:journal, :issue, :pages, :start] (etc.)
86
+
87
+ end
88
+ end
@@ -1,15 +1,15 @@
1
1
  require 'spec_helper'
2
- require 'solrizer/xml'
2
+ require 'fixtures/mods_article'
3
3
 
4
4
  describe Solrizer::XML::TerminologyBasedSolrizer do
5
5
 
6
6
  before(:all) do
7
- OM::Samples::ModsArticle.send(:include, Solrizer::XML::TerminologyBasedSolrizer)
7
+ Samples::ModsArticle.send(:include, Solrizer::XML::TerminologyBasedSolrizer)
8
8
  end
9
9
 
10
10
  before(:each) do
11
11
  article_xml = fixture( File.join("mods_articles", "hydrangea_article1.xml") )
12
- @mods_article = OM::Samples::ModsArticle.from_xml(article_xml)
12
+ @mods_article = Samples::ModsArticle.from_xml(article_xml)
13
13
  end
14
14
 
15
15
  describe ".to_solr" do
@@ -34,7 +34,7 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
34
34
  # ActiveFedora::NokogiriDatastream.stubs(:accessors).returns(mock_accessors)
35
35
  solr_doc = Hash.new
36
36
  @mods_article.field_mapper = Solrizer::FieldMapper::Default.new
37
- OM::Samples::ModsArticle.terminology.terms.each_pair do |k,v|
37
+ Samples::ModsArticle.terminology.terms.each_pair do |k,v|
38
38
  @mods_article.expects(:solrize_term).with(v, solr_doc, @mods_article.field_mapper)
39
39
  end
40
40
  @mods_article.to_solr(solr_doc)
@@ -49,21 +49,16 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
49
49
  solr_doc["abstract_t"].should == ["ABSTRACT"]
50
50
  solr_doc["title_info_1_language_t"].should == ["finnish"]
51
51
  solr_doc["person_1_role_0_text_t"].should == ["teacher"]
52
+ # No index_as on the code field.
53
+ solr_doc["person_1_role_0_code_t"].should be_nil
52
54
  solr_doc["person_last_name_t"].sort.should == ["FAMILY NAME", "Gautama"]
53
- # This next line will fail until om > 1.0.2 is released
54
- # solr_doc["topic_tag_t"].sort.should == ["CONTROLLED TERM", "TOPIC1", "TOPIC2"]
55
+ solr_doc["topic_tag_t"].sort.should == ["CONTROLLED TERM", "TOPIC 1", "TOPIC 2"]
55
56
 
56
57
  # These are a holdover from an old verison of OM
57
- # solr_doc[:finnish_title_info_language_t].should == "finnish"
58
- # solr_doc[:finnish_title_info_main_title_t].should == "Artikkelin otsikko Hydrangea artiklan 1"
58
+ puts "DOC: #{solr_doc.length}"
59
+ solr_doc['journal_0_issue_0_publication_date_t'].should == ["FEB. 2007"]
59
60
 
60
- # solr_doc[:mydate_date].should == "fake-date"
61
- #
62
- # solr_doc[:publisher_t].should be_nil
63
- # solr_doc[:coverage_t].should be_nil
64
- # solr_doc[:creation_date_dt].should be_nil
65
- # solr_doc.should == ""
66
-
61
+
67
62
  end
68
63
 
69
64
  end
@@ -72,14 +67,14 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
72
67
 
73
68
  it "should add fields to a solr document for all nodes corresponding to the given term and its children" do
74
69
  solr_doc = Hash.new
75
- result = @mods_article.solrize_term(OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
70
+ result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
76
71
  result.should == solr_doc
77
- # @mods_article.solrize_term(:title_info, OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), :solr_doc=>solr_doc).should == ""
72
+ # @mods_article.solrize_term(:title_info, Samples::ModsArticle.terminology.retrieve_term(:title_info), :solr_doc=>solr_doc).should == ""
78
73
  end
79
74
 
80
75
  it "should add multiple fields based on index_as" do
81
76
  fake_solr_doc = {}
82
- term = OM::Samples::ModsArticle.terminology.retrieve_term(:name)
77
+ term = Samples::ModsArticle.terminology.retrieve_term(:name)
83
78
  term.children[:namePart].index_as = [:displayable, :facetable]
84
79
 
85
80
  @mods_article.solrize_term(term, fake_solr_doc)
metadata CHANGED
@@ -1,13 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: solrizer
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
5
- prerelease:
4
+ hash: -2879808512
5
+ prerelease: 6
6
6
  segments:
7
- - 1
8
- - 2
9
7
  - 2
10
- version: 1.2.2
8
+ - 0
9
+ - 0
10
+ - rc
11
+ - 1
12
+ version: 2.0.0.rc1
11
13
  platform: ruby
12
14
  authors:
13
15
  - Matt Zumwalt
@@ -15,7 +17,8 @@ autorequire:
15
17
  bindir: bin
16
18
  cert_chain: []
17
19
 
18
- date: 2012-07-23 00:00:00 Z
20
+ date: 2012-10-15 00:00:00 -05:00
21
+ default_executable:
19
22
  dependencies:
20
23
  - !ruby/object:Gem::Dependency
21
24
  name: nokogiri
@@ -188,9 +191,7 @@ extra_rdoc_files:
188
191
  - README.textile
189
192
  files:
190
193
  - .gitignore
191
- - .rvmrc
192
194
  - Gemfile
193
- - Gemfile.lock
194
195
  - History.txt
195
196
  - LICENSE
196
197
  - README.textile
@@ -222,6 +223,7 @@ files:
222
223
  - spec/fixtures/druid-cm234kq4672-extProperties.xml
223
224
  - spec/fixtures/druid-cm234kq4672-stories.xml
224
225
  - spec/fixtures/druid-hc513kw4806-descMetadata.xml
226
+ - spec/fixtures/mods_article.rb
225
227
  - spec/fixtures/mods_articles/hydrangea_article1.xml
226
228
  - spec/fixtures/test_solr_mappings.yml
227
229
  - spec/spec_helper.rb
@@ -230,6 +232,7 @@ files:
230
232
  - spec/units/field_name_mapper_spec.rb
231
233
  - spec/units/xml_extractor_spec.rb
232
234
  - spec/units/xml_terminology_based_solrizer_spec.rb
235
+ has_rdoc: true
233
236
  homepage: http://github.com/projecthydra/solrizer
234
237
  licenses: []
235
238
 
@@ -250,16 +253,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
250
253
  required_rubygems_version: !ruby/object:Gem::Requirement
251
254
  none: false
252
255
  requirements:
253
- - - ">="
256
+ - - ">"
254
257
  - !ruby/object:Gem::Version
255
- hash: 3
258
+ hash: 25
256
259
  segments:
257
- - 0
258
- version: "0"
260
+ - 1
261
+ - 3
262
+ - 1
263
+ version: 1.3.1
259
264
  requirements: []
260
265
 
261
266
  rubyforge_project:
262
- rubygems_version: 1.8.17
267
+ rubygems_version: 1.6.2
263
268
  signing_key:
264
269
  specification_version: 3
265
270
  summary: A utility for building solr indexes, usually from Fedora repository content with solrizer-fedora extension gem.
@@ -269,6 +274,7 @@ test_files:
269
274
  - spec/fixtures/druid-cm234kq4672-extProperties.xml
270
275
  - spec/fixtures/druid-cm234kq4672-stories.xml
271
276
  - spec/fixtures/druid-hc513kw4806-descMetadata.xml
277
+ - spec/fixtures/mods_article.rb
272
278
  - spec/fixtures/mods_articles/hydrangea_article1.xml
273
279
  - spec/fixtures/test_solr_mappings.yml
274
280
  - spec/spec_helper.rb
data/.rvmrc DELETED
@@ -1,35 +0,0 @@
1
- #!/usr/bin/env bash
2
-
3
- # This is an RVM Project .rvmrc file, used to automatically load the ruby
4
- # development environment upon cd'ing into the directory
5
-
6
- ruby_string="ree-1.8.7"
7
- gemset_name="solrizer"
8
-
9
- #
10
- rvm_install_on_use_flag=1
11
-
12
- # Specify our desired <ruby>[@<gemset>], the @gemset name is optional.
13
- environment_id="${ruby_string}@${gemset_name}"
14
-
15
- # First, attempt to load the desired environment directly from the environment
16
- # file. This is very fast and efficient compared to running through the entire
17
- # CLI and selector. If you want feedback on which environment was used then
18
- # insert the word 'use' after --create as this triggers verbose mode.
19
- #
20
- if [[ -d "${rvm_path:-$HOME/.rvm}/environments" \
21
- && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]] ; then
22
- \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
23
- else
24
- # If the environment file has not yet been created, use the RVM CLI to select.
25
- rvm --create "$environment_id"
26
- fi
27
-
28
- #(
29
- # Ensure that Bundler is installed, install it if it is not.
30
- if ! command -v bundle ; then
31
- printf "The rubygem 'bundler' is not installed, installing it now.\n"
32
- gem install bundler
33
- fi
34
- #)&
35
-
data/Gemfile.lock DELETED
@@ -1,72 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- solrizer (1.2.0)
5
- daemons
6
- mediashelf-loggable (~> 0.4.7)
7
- nokogiri
8
- om (>= 1.5.0)
9
- stomp
10
- xml-simple
11
-
12
- GEM
13
- remote: http://rubygems.org/
14
- specs:
15
- RedCloth (4.2.8)
16
- archive-tar-minitar (0.5.2)
17
- columnize (0.3.4)
18
- daemons (1.1.8)
19
- diff-lcs (1.1.3)
20
- linecache (0.46)
21
- rbx-require-relative (> 0.0.4)
22
- linecache19 (0.5.12)
23
- ruby_core_source (>= 0.1.4)
24
- mediashelf-loggable (0.4.9)
25
- metaclass (0.0.1)
26
- mocha (0.10.0)
27
- metaclass (~> 0.0.1)
28
- nokogiri (1.5.2)
29
- om (1.6.0)
30
- mediashelf-loggable
31
- nokogiri (>= 1.4.2)
32
- rbx-require-relative (0.0.5)
33
- rcov (0.9.10)
34
- rspec (2.7.0)
35
- rspec-core (~> 2.7.0)
36
- rspec-expectations (~> 2.7.0)
37
- rspec-mocks (~> 2.7.0)
38
- rspec-core (2.7.1)
39
- rspec-expectations (2.7.0)
40
- diff-lcs (~> 1.1.2)
41
- rspec-mocks (2.7.0)
42
- ruby-debug (0.10.4)
43
- columnize (>= 0.1)
44
- ruby-debug-base (~> 0.10.4.0)
45
- ruby-debug-base (0.10.4)
46
- linecache (>= 0.3)
47
- ruby-debug-base19 (0.11.25)
48
- columnize (>= 0.3.1)
49
- linecache19 (>= 0.5.11)
50
- ruby_core_source (>= 0.1.4)
51
- ruby-debug19 (0.11.6)
52
- columnize (>= 0.3.1)
53
- linecache19 (>= 0.5.11)
54
- ruby-debug-base19 (>= 0.11.19)
55
- ruby_core_source (0.1.5)
56
- archive-tar-minitar (>= 0.5.2)
57
- stomp (1.2.2)
58
- xml-simple (1.1.1)
59
- yard (0.7.2)
60
-
61
- PLATFORMS
62
- ruby
63
-
64
- DEPENDENCIES
65
- RedCloth
66
- mocha
67
- rcov
68
- rspec (~> 2.0)
69
- ruby-debug
70
- ruby-debug19
71
- solrizer!
72
- yard