solrizer-fedora 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,25 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+ /.bundle
21
+ /.yardoc
22
+ /doc
23
+ jetty/*
24
+
25
+ ## PROJECT::SPECIFIC
data/.gitmodules ADDED
@@ -0,0 +1,3 @@
1
+ [submodule "jetty"]
2
+ path = jetty
3
+ url = git://github.com/projecthydra/hydra-jetty.git
data/.rvmrc ADDED
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # This is an RVM Project .rvmrc file, used to automatically load the ruby
4
+ # development environment upon cd'ing into the directory
5
+
6
+ ruby_string="ree-1.8.7"
7
+ gemset_name="solrizer-fedora"
8
+
9
+ #
10
+ rvm_install_on_use_flag=1
11
+
12
+ # Specify our desired <ruby>[@<gemset>], the @gemset name is optional.
13
+ environment_id="${ruby_string}@${gemset_name}"
14
+
15
+ # First, attempt to load the desired environment directly from the environment
16
+ # file. This is very fast and efficient compared to running through the entire
17
+ # CLI and selector. If you want feedback on which environment was used then
18
+ # insert the word 'use' after --create as this triggers verbose mode.
19
+ #
20
+ if [[ -d "${rvm_path:-$HOME/.rvm}/environments" \
21
+ && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]] ; then
22
+ \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
23
+ else
24
+ # If the environment file has not yet been created, use the RVM CLI to select.
25
+ rvm --create "$environment_id"
26
+ fi
27
+
28
+ #(
29
+ # Ensure that Bundler is installed, install it if it is not.
30
+ if ! command -v bundle ; then
31
+ printf "The rubygem 'bundler' is not installed, installing it now.\n"
32
+ gem install bundler
33
+ fi
34
+ #)&
35
+
data/Gemfile CHANGED
@@ -1,11 +1,16 @@
1
1
  source "http://rubygems.org"
2
- gem 'active-fedora', '>=2.0.0'
3
- gem 'rsolr'
4
- gem 'solrizer', '>=1.0.0'
5
2
 
6
3
  group :development, :test do
4
+ gem 'jeweler'
5
+ gem 'jettywrapper'
6
+ gem 'rcov'
7
7
  gem 'ruby-debug'
8
8
  gem 'ruby-debug-base'
9
9
  gem 'rspec', '<2.0.0'
10
10
  gem 'mocha'
11
- end
11
+ gem 'yard'
12
+ gem 'RedCloth'
13
+ end
14
+
15
+ # Specify your gem's dependencies in solrizer.gemspec
16
+ gemspec
data/Gemfile.lock CHANGED
@@ -1,32 +1,58 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ solrizer-fedora (1.0.3)
5
+ active-fedora (>= 2.2.0)
6
+ fastercsv
7
+ rsolr
8
+ solr-ruby (>= 0.0.6)
9
+ solrizer (>= 1.0.0)
10
+ solrizer-fedora
11
+
1
12
  GEM
2
13
  remote: http://rubygems.org/
3
14
  specs:
4
- active-fedora (1.2.7)
5
- activeresource (< 3.0.0)
15
+ RedCloth (4.2.7)
16
+ active-fedora (2.2.0)
17
+ active-fedora
18
+ activeresource
19
+ equivalent-xml
20
+ facets
21
+ mediashelf-loggable
6
22
  mime-types (>= 1.16)
7
23
  multipart-post
8
24
  nokogiri
9
25
  om (>= 1.0)
10
26
  solr-ruby (>= 0.0.6)
11
- solrizer (>= 0.3.0)
27
+ solrizer (> 1.0.0)
12
28
  xml-simple (>= 1.0.12)
13
29
  activeresource (2.3.11)
14
30
  activesupport (= 2.3.11)
15
31
  activesupport (2.3.11)
16
32
  builder (3.0.0)
17
33
  columnize (0.3.2)
34
+ daemons (1.1.3)
35
+ equivalent-xml (0.2.6)
36
+ nokogiri (>= 1.4.3)
18
37
  facets (2.9.1)
38
+ fastercsv (1.5.4)
39
+ git (1.2.5)
40
+ jettywrapper (0.0.3)
41
+ jeweler (1.6.0)
42
+ bundler (~> 1.0.0)
43
+ git (>= 1.2.5)
44
+ rake
19
45
  linecache (0.43)
20
- mediashelf-loggable (0.4.0)
46
+ mediashelf-loggable (0.4.2)
21
47
  mime-types (1.16)
22
48
  mocha (0.9.12)
23
49
  multipart-post (1.1.0)
24
50
  nokogiri (1.4.4)
25
- om (1.0.2)
26
- facets
27
- facets
28
- nokogiri
51
+ om (1.2.4)
29
52
  nokogiri (>= 1.4.2)
53
+ om
54
+ rake (0.8.7)
55
+ rcov (0.9.9)
30
56
  rsolr (1.0.0)
31
57
  builder (>= 2.1.2)
32
58
  rspec (1.3.1)
@@ -36,26 +62,32 @@ GEM
36
62
  ruby-debug-base (0.10.4)
37
63
  linecache (>= 0.3)
38
64
  solr-ruby (0.0.8)
39
- solrizer (1.0.0)
65
+ solrizer (1.0.4)
66
+ daemons
40
67
  mediashelf-loggable
41
68
  mediashelf-loggable
42
69
  nokogiri
43
70
  nokogiri
44
71
  nokogiri
45
- om (>= 1.0.0)
46
72
  om
47
- solr-ruby
73
+ om (>= 1.0.0)
74
+ stomp
48
75
  xml-simple
49
- xml-simple (1.0.14)
76
+ stomp (1.1.8)
77
+ xml-simple (1.0.15)
78
+ yard (0.6.8)
50
79
 
51
80
  PLATFORMS
52
81
  ruby
53
82
 
54
83
  DEPENDENCIES
55
- active-fedora (= 1.2.7)
84
+ RedCloth
85
+ jettywrapper
86
+ jeweler
56
87
  mocha
57
- rsolr
88
+ rcov
58
89
  rspec (< 2.0.0)
59
90
  ruby-debug
60
91
  ruby-debug-base
61
- solrizer (>= 1.0.0)
92
+ solrizer-fedora!
93
+ yard
data/README.textile CHANGED
@@ -2,29 +2,224 @@ h1. solrizer-fedora
2
2
 
3
3
  An extension to projecthydra/solrizer that provides utilities for loading objects from Fedora Repositories and creating solr documents from them.
4
4
 
5
- h2. Usage
5
+ h2. Installation
6
+
7
+ The gem is hosted on rubygems.org. The bset way to manage the gems for your project is to use bundler. Create a Gemfile in the root of your application and include the following:
8
+
9
+ <pre>
10
+ source "http://rubygems.org"
11
+
12
+ gem 'solrizer-fedora'
13
+ </pre>
14
+
15
+ Then:
6
16
 
7
- <pre>gem install solrizer-fedora</pre>
17
+ <pre>bundle install</pre>
8
18
 
9
- You must tell the app where to find fedora and solr. Put that information into config/fedora.yml and config/solr.yml
10
19
 
11
- Then...
20
+ h2. Testing
21
+
22
+ In order to run the RSpec tests, it is necesary to have a hydra-jetty instance running. This can be accomplished two ways:
23
+
24
+ h3. Using the bundled jetty instance:
25
+
26
+ Configure the bundled hydra-jetty instance.
27
+
28
+ <pre>
29
+ git submodule init
30
+ git submodule update
31
+ </pre>
32
+
33
+ Once you have updated the jetty submodule, you can easily run the rspec tests with the following rake task:
34
+
35
+ <pre>
36
+ rake hudson
37
+ </pre>
38
+
39
+ While the primary intention of this task is to provide test coverage and documentation out on projecthydra's "continuous integration server":http://hudson.projecthydra.org, it can also be used locally to run tests without having to install and configure an instance of "hydra-jetty":https://github.com/projecthydra/hydra-jetty.
40
+
41
+ Note: if you have another instance of hydra-jetty running, you should either close it down prior to running the rake hudson task.
42
+
43
+
44
+ h3. Using a different instance of hydra-jetty:
45
+
46
+ If you prefer, you can run the specs against a different hydra-jetty instance. Follow the instructions included with those projects to start the jetty instance.
47
+
48
+
49
+
50
+ h2. Pre-requisite
51
+
52
+ h3. Setup local hydra-jetty
53
+
54
+ In order to use solrizer-fedora, you must first set up an instance of "hydra-jetty":https://github.com/projecthydra/hydra-jetty.
55
+
56
+ Once you have set this up, cd into the directory and type:
57
+
58
+ <pre>java -jar start.jar</pre>
59
+
60
+ You must tell the app where to find fedora and solr. Put that information into config/fedora.yml and config/solr.yml:
61
+
62
+ Sample config files:
63
+
64
+ config/fedora.yml
65
+ <pre>
66
+ development:
67
+ fedora:
68
+ url: http://fedoraAdmin:fedoraAdmin@127.0.0.1:8983/fedora
69
+ solr:
70
+ url: http://127.0.0.1:8983/solr/development
71
+ test:
72
+ fedora:
73
+ url: http://fedoraAdmin:fedoraAdmin@127.0.0.1:8983/fedora
74
+ solr:
75
+ url: http://127.0.0.1:8983/solr/test
76
+ production:
77
+ fedora:
78
+ url: http://fedoraAdmin:fedoraAdmin@127.0.0.1:8080/fedora
79
+ solr:
80
+ url: http://127.0.0.1:8080/solr
81
+ </pre>
82
+
83
+ config/solr.yml
84
+ <pre>
85
+ development:
86
+ default:
87
+ url: http://localhost:8983/solr
88
+ full_text:
89
+ url: http://localhost:8983/solr
90
+ test: &TEST
91
+ default:
92
+ url: http://localhost:8983/solr
93
+ full_text:
94
+ url: http://localhost:8983/solr
95
+ production:
96
+ default:
97
+ url: http://localhost:8080/solr/production
98
+ full_text:
99
+ url: http://localhost:8080/solr/production
100
+ </pre>
101
+
102
+ h2. Usage
103
+
104
+ h3. Fire up the console:
105
+
106
+ Start up a console and load solrizer-fedora:
12
107
 
13
108
  <pre>
14
109
  irb
15
110
  require "rubygems"
16
111
  require "solrizer-fedora"
112
+ </pre>
113
+
114
+ Initialize ActiveFedora:
115
+
116
+ <pre>
117
+ ActiveFedora.init
118
+ </pre>
119
+
120
+
121
+ h3. Create an instance of Solrizer::Fedora::Solrizer:
122
+
123
+ <pre>
17
124
  solrizer = Solrizer::Fedora::Solrizer.new
18
- solrizer.solrize("demo:5")
19
125
  </pre>
20
126
 
127
+ Or, if you want to index full text rather than just fields (and you have provided a full text solr index in your solr.yml):
128
+
129
+ <pre>
130
+ full_text_solrizer = Solrizer::Fedora::Solrizer.new(:index_full_text=>true)
131
+ </pre>
132
+
133
+
134
+ h3. Solrizing a single object in your repository:
135
+
136
+ If you have an existing object in your repository, you can solrize it by passing its pid:
137
+
138
+ <pre>
139
+ solrizer.solrize "demo:5"
140
+ </pre>
141
+
142
+ If you have either an instance of ActiveFedora::Base or Fedora::Object, you can solrize it by passing the object itself:
143
+
144
+ <pre>
145
+ my_object = ActiveFedora::Base.new
146
+
147
+ solrizer.solrize my_object
148
+ </pre>
149
+
150
+ To view the resulting document, open a web browser and go to the jetty's solr admin page (most likely http://localhost/solr/admin) and query your solr instance for the pid (e.g. id:demo\:5).
151
+
152
+
153
+ h3. Solrizing all the objects in your repository:
154
+
155
+ In order to solrize all the objects in your repository, run:
156
+
157
+ <pre>
158
+ solrizer.solrize_objects
159
+ </pre>
160
+
161
+
162
+ h3. Advanced usage
163
+
164
+ Solrizer-fedora inspects your repository objects and attempts to match your objects to your ruby models based on the hasModel declarations in the RELS-EXT datastream.
165
+
166
+ Go to the fedora admin interface (http://FEDORA_BASE_URL/fedora/admin) and create a new object with an PID of "changeme:123" and whatever Label you want.
167
+
168
+ Once you have created that object you will need to add the RELS-EXT datastream to it. Make the datastream ID "RELS-EXT", MIME-Type of "application/rdf+xml", and put the XML below in the XML from text field.
169
+
170
+ <pre>
171
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
172
+ <rdf:Description rdf:about="info:fedora/changeme:123">
173
+ <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:MyObject"></hasModel>
174
+ </rdf:Description>
175
+ </rdf:RDF>
176
+ </pre>
177
+
178
+ Solrizer-fedora will check to see if you have a matching ruby model called MyObject. If you do, it will load it and add any specific solr fields specified. So, assuming the following model:
179
+
180
+ <pre>
181
+ class MyObject < ActiveFedora::Base
182
+
183
+ has_metadata :name => "properties", :type=> ActiveFedora::MetadataDatastream do |m|
184
+ m.field 'foo', :string
185
+ end
186
+
187
+ end
188
+ </pre>
189
+
190
+ Now add another datastream to the changeme:123 object and give it a datastream ID of "properties", a MIME-Type of "text/xml", and add the following to the XML from text field:
191
+
192
+ <pre>
193
+ <fields>
194
+ <foo>bar</foo>
195
+ </fields>
196
+ </pre>
197
+
198
+ If you still have your fedora-solrizer class from above you can now solrize the object by ID:
199
+
200
+ <pre>
201
+ solrizer.solrize "changeme:123"
202
+ </pre>
203
+
204
+ Now your solr doc with the "changeme:123" ID will include the following:
205
+
206
+ <pre>
207
+ <arr name="foo_t">
208
+ <str>bar</str>
209
+ </arr>
210
+ </pre>
211
+
212
+
213
+
214
+
215
+
21
216
 
22
217
  h2. Note on Patches/Pull Requests
23
218
 
24
219
  * Fork the project.
25
220
  * Make your feature addition or bug fix.
26
- * Add tests for it. This is important so I don't break it in a
27
- future version unintentionally.
221
+ * Add tests for it. This is important so it doesn't get broken unintentionally
222
+ in a future version.
28
223
  * Commit, do not mess with rakefile, version, or history.
29
224
  (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
30
225
  * Send me a pull request. Bonus points for topic branches.
data/Rakefile CHANGED
@@ -1,21 +1,11 @@
1
1
  require 'rubygems'
2
2
  require 'rake'
3
3
 
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "solrizer-fedora"
8
- gem.summary = %Q{An extension to solrizer that deals with Fedora objects & Repositories}
9
- gem.description = %Q{An extension to projecthydra/solrizer that provides utilities for loading objects from Fedora Repositories and creating solr documents from them.}
10
- gem.email = "matt.zumwalt@yourmediashelf.com"
11
- gem.homepage = "http://github.com/projecthydra/solrizer-fedora"
12
- gem.authors = ["Matt Zumwalt"]
13
- # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
14
- end
15
- Jeweler::GemcutterTasks.new
16
- rescue LoadError
17
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
18
- end
4
+ # load rake tasks in lib/tasks
5
+ Dir.glob('lib/tasks/*.rake').each { |r| import r }
6
+
7
+ require 'bundler'
8
+ Bundler::GemHelper.install_tasks
19
9
 
20
10
  require 'spec/rake/spectask'
21
11
  Spec::Rake::SpecTask.new(:spec) do |spec|
@@ -1,3 +1,5 @@
1
+ # This supports an experimental feature that allows Solrizer::Fedora::Extractor.extract_rels_ext to
2
+ # map between RDF URIs and compact model names like mods_document or jp2_document
1
3
  info:fedora/afmodel:SaltDocument : salt_document
2
4
  info:fedora/afmodel:JP2Document : jp2_document
3
5
  info:fedora/afmodel:ModsDocument : mods_document
@@ -1,7 +1,14 @@
1
1
  require "rubygems"
2
2
  require "solrizer"
3
+
4
+ # Solrizer::Fedora is an implementation of Solrizer that reads content from Fedora repositories and indexes it into solr.
5
+ #
6
+ # Note: This module automatically extends Solrizer::Extractor with additional Fedora-specific extractor behaviors from Solrizer::Fedora::Extractor.
3
7
  module Solrizer::Fedora
8
+ def self.version
9
+ Solrizer::Fedora::VERSION
10
+ end
4
11
  end
5
12
  Dir[File.join(File.dirname(__FILE__),"fedora","*.rb")].each {|file| require file }
6
13
 
7
- Solrizer::Extractor.send(:include, Solrizer::Fedora::Extractor)
14
+ Solrizer::Extractor.send(:include, Solrizer::Fedora::Extractor)
@@ -2,6 +2,8 @@ require 'rexml/document'
2
2
  require "nokogiri"
3
3
  require 'yaml'
4
4
 
5
+ # Solrizer::Fedora::Extractor provides Fedora-specific extractor behaviors
6
+ # This module is automatically mixed into Solrizer::Extractor when you load the solrizer-fedora gem into an application. This is carried out in solrizer/fedora.rb
5
7
  module Solrizer::Fedora::Extractor
6
8
 
7
9
  #
@@ -10,12 +12,16 @@ module Solrizer::Fedora::Extractor
10
12
  def extract_rels_ext( text, solr_doc=Hash.new )
11
13
  # TODO: only read in this file once
12
14
 
13
- if defined?(RAILS_ROOT)
14
- config_path = File.join(RAILS_ROOT, "config")
15
- else
16
- config_path = File.join(File.dirname(__FILE__), "..", "..", "..", "config")
17
- end
18
- map = YAML.load(File.open(File.join(config_path, "hydra_types.yml")))
15
+ if defined?(Rails.root.to_s)
16
+ config_path = File.join(Rails.root.to_s, "config","hydra_types.yml")
17
+ config_path = nil unless File.exist?(config_path)
18
+ end
19
+ unless config_path
20
+ config_path = File.join(File.dirname(__FILE__), "..", "..", "..", "config","hydra_types.yml")
21
+ end
22
+
23
+
24
+ map = YAML.load(File.open(config_path))
19
25
 
20
26
  doc = Nokogiri::XML(text)
21
27
  doc.xpath( '//foo:hasModel', 'foo' => 'info:fedora/fedora-system:def/model#' ).each do |element|
@@ -13,10 +13,15 @@ class Indexer
13
13
  @@unique_id
14
14
  end
15
15
 
16
- #
17
- # Member variables
18
- #
19
- attr_accessor :solr, :extractor, :index_full_text
16
+
17
+ # The instance of solr that updates will be written to
18
+ attr_accessor :solr
19
+
20
+ # The extractor to use. This is usually Solrizer::Extractor
21
+ attr_accessor :extractor
22
+
23
+ # [Boolean or "true" or "false"] tells the indexer whether to index full text or just field values
24
+ attr_accessor :index_full_text
20
25
 
21
26
  #
22
27
  # This method performs initialization tasks
@@ -36,68 +41,67 @@ class Indexer
36
41
 
37
42
  #
38
43
  # This method connects to the Solr instance. It looks to see if Blacklight is loaded first for the
39
- # Blacklight.solr_config. If not loaded, it then looks for the RAILS_ROOT/config/solr.yaml file and loads
44
+ # Blacklight.solr_config. If not loaded, it then looks for the Rails.root.to_s/config/solr.yaml file and loads
40
45
  # it to get the solr url. The configuration strucuture can take both the
41
46
  # { "development" => {"default" => { "url" => "http://localhost"}, "fulltext" => { "url" => "http://localhost"} }}
42
47
  # or { "development"=>{"url"=>"http://localhost" }}
43
48
  # Can also take Blacklight.solr_config["url"] and Blacklight.solr_config[:url]
44
49
  #
45
-
46
- def connect
47
-
48
- if ActiveFedora.fedora_config.empty?
49
- ActiveFedora.init
50
- end
51
-
52
- if defined?(Blacklight)
53
- solr_config = Blacklight.solr_config
54
- else
55
- if defined?(RAILS_ROOT)
56
- config_path = File.join(RAILS_ROOT, "config")
57
- yaml = YAML.load(File.open(File.join(config_path, "solr.yml")))
58
- puts RAILS_ENV + "*****"
59
- solr_config = yaml[RAILS_ENV]
60
- puts solr_config.inspect
61
- else
62
- config_path = File.join("config","solr.yml")
63
- unless File.exist?(config_path)
64
- config_path = File.join(File.dirname(__FILE__), "..", "..", "..", "config", "solr.yml")
65
- end
66
- logger.debug "SOLRIZER: reading config from " + config_path.inspect
67
- yaml = YAML.load(File.open(config_path))
68
-
69
- if ENV["environment"].nil?
70
- environment = "development"
71
- else
72
- environment = ENV["environment"]
73
- end #if
74
-
75
- solr_config = yaml[environment]
76
- logger.debug "SOLRIZER solr_config:" + solr_config.inspect
77
- end #if defined?(RAILS_ROOT)
78
-
79
- end #if defined?(Blacklight)
80
-
81
- if index_full_text == true && solr_config['fulltext'].has_key?('url')
82
- url = solr_config['fulltext']['url']
83
- elsif solr_config.has_key?("default") &&
84
- url = solr_config['default']['url']
85
- elsif solr_config.has_key?('url')
86
- url = solr_config['url']
87
- elsif solr_config.has_key?(:url)
88
- url = solr_config[:url]
89
- else
90
- raise
91
- end
92
50
 
93
- @solr = RSolr.connect :url => url
94
- # @connection = Solr::Connection.new(url, :autocommit => :on )
51
+ def connect
52
+
53
+ if ActiveFedora.fedora_config.empty?
54
+ ActiveFedora.init
55
+ end
56
+
57
+ if defined?(Blacklight)
58
+ solr_config = Blacklight.solr_config
59
+ else
60
+ if defined?(Rails.root.to_s)
61
+ config_path = File.join(Rails.root.to_s, "config", "solr.yml")
62
+ yaml = YAML.load(File.open(File.join(config_path, "solr.yml")))
63
+ puts RAILS_ENV + "*****"
64
+ solr_config = yaml[RAILS_ENV]
65
+ puts solr_config.inspect
66
+ else
67
+ config_path = File.join("config","solr.yml")
68
+ unless File.exist?(config_path)
69
+ config_path = File.join(File.dirname(__FILE__), "..", "..", "..", "config", "solr.yml")
70
+ end
71
+ logger.debug "SOLRIZER: reading config from " + config_path.inspect
72
+ yaml = YAML.load(File.open(config_path))
95
73
 
96
- rescue RuntimeError => e
97
- logger.debug "Unable to establish SOLR Connection with #{solr_config.inspect}. Failed with #{e.message}"
98
- raise URI::InvalidURIError
74
+ if ENV["environment"].nil?
75
+ environment = "development"
76
+ else
77
+ environment = ENV["environment"]
78
+ end #if
79
+
80
+ solr_config = yaml[environment]
81
+ logger.debug "SOLRIZER solr_config:" + solr_config.inspect
82
+ end #if defined?(Rails.root)
83
+ end #if defined?(Blacklight)
84
+
85
+ if index_full_text == true && solr_config.has_key?('fulltext') && solr_config['fulltext'].has_key?('url')
86
+ url = solr_config['fulltext']['url']
87
+ elsif solr_config.has_key?("default") && solr_config['default'].has_key?('url')
88
+ url = solr_config['default']['url']
89
+ elsif solr_config.has_key?('url')
90
+ url = solr_config['url']
91
+ elsif solr_config.has_key?(:url)
92
+ url = solr_config[:url]
93
+ else
94
+ raise
99
95
  end
100
96
 
97
+ @solr = RSolr.connect :url => url
98
+ # @connection = Solr::Connection.new(url, :autocommit => :on )
99
+
100
+ rescue RuntimeError => e
101
+ logger.debug "Unable to establish SOLR Connection with #{solr_config.inspect}. Failed with #{e.message}"
102
+ raise URI::InvalidURIError
103
+ end
104
+
101
105
  #
102
106
  # This method extracts the facet categories from the given Fedora object's external tag datastream
103
107
  #
@@ -153,7 +157,6 @@ class Indexer
153
157
  end
154
158
 
155
159
  return solr_doc
156
- # end
157
160
 
158
161
  end
159
162
 
@@ -8,7 +8,7 @@ require 'solrizer/html'
8
8
  # Let people explicitly require xml support if they want it ...
9
9
  # require 'solrizer/xml.rb'
10
10
 
11
- # require 'fastercsv'
11
+ require 'fastercsv' # this is used by solrize_objects when you pass it a csv file of pids
12
12
  require "ruby-debug"
13
13
 
14
14
 
@@ -0,0 +1,5 @@
1
+ module Solrizer
2
+ module Fedora
3
+ VERSION = "1.1.0"
4
+ end
5
+ end
@@ -1,3 +1,44 @@
1
+ desc "Task to execute builds on a Hudson Continuous Integration Server."
2
+ task :hudson do
3
+ if (ENV['RAILS_ENV'] == "test")
4
+ require "jettywrapper"
5
+ jetty_params = {
6
+ :jetty_home => File.expand_path(File.dirname(__FILE__) + '/../../jetty'),
7
+ :quiet => false,
8
+ :jetty_port => 8983,
9
+ :solr_home => File.expand_path(File.dirname(__FILE__) + '/../../jetty/solr/test-core'),
10
+ :fedora_home => File.expand_path(File.dirname(__FILE__) + '/../../jetty/fedora/default'),
11
+ :startup_wait => 25
12
+ }
13
+ error = Jettywrapper.wrap(jetty_params) do
14
+ Rake::Task["doc"].invoke
15
+ Rake::Task["solrizer:fedora:rspec"].invoke
16
+ end
17
+ raise "test failures: #{error}" if error
18
+ else
19
+ system("rake hudson RAILS_ENV=test")
20
+ end
21
+ end
22
+
23
+ # Use yard to build docs
24
+ begin
25
+ require 'yard'
26
+ require 'yard/rake/yardoc_task'
27
+ project_root = File.expand_path("#{File.dirname(__FILE__)}/../../")
28
+ doc_destination = File.join(project_root, 'doc')
29
+
30
+ YARD::Rake::YardocTask.new(:doc) do |yt|
31
+ yt.files = Dir.glob(File.join(project_root, 'lib', '**', '*.rb')) +
32
+ [ File.join(project_root, 'README.textile') ]
33
+ yt.options = ['--output-dir', doc_destination, '--readme', 'README.textile']
34
+ end
35
+ rescue LoadError
36
+ desc "Generate YARD Documentation"
37
+ task :doc do
38
+ abort "Please install the YARD gem to generate rdoc."
39
+ end
40
+ end
41
+
1
42
  namespace :solrizer do
2
43
 
3
44
  namespace :fedora do
@@ -30,6 +71,15 @@ namespace :solrizer do
30
71
  solrizer.solrize_objects
31
72
  puts "Solrizer task complete."
32
73
  end
74
+
75
+ Spec::Rake::SpecTask.new(:rspec) do |t|
76
+ t.spec_files = FileList['spec/**/*_spec.rb']
77
+ t.rcov = true
78
+ t.rcov_opts = lambda do
79
+ IO.readlines("spec/rcov.opts").map {|l| l.chomp.split " "}.flatten
80
+ end
81
+ end
82
+
33
83
  end
34
84
 
35
85
  end
@@ -1,93 +1,34 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
1
  # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "solrizer/fedora/version"
5
4
 
6
5
  Gem::Specification.new do |s|
7
- s.name = %q{solrizer-fedora}
8
- s.version = "1.0.3"
9
-
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
6
+ s.name = "solrizer-fedora"
7
+ s.version = Solrizer::Fedora::VERSION
11
8
  s.authors = ["Matt Zumwalt"]
12
- s.date = %q{2011-05-03}
9
+ s.date = %q{2011-05-20}
13
10
  s.description = %q{An extension to projecthydra/solrizer that provides utilities for loading objects from Fedora Repositories and creating solr documents from them.}
14
11
  s.email = %q{matt.zumwalt@yourmediashelf.com}
15
12
  s.extra_rdoc_files = [
16
13
  "LICENSE",
17
14
  "README.textile"
18
15
  ]
19
- s.files = [
20
- ".document",
21
- "Gemfile",
22
- "Gemfile.lock",
23
- "History.textile",
24
- "LICENSE",
25
- "README.textile",
26
- "Rakefile",
27
- "VERSION",
28
- "config/fedora.yml",
29
- "config/hydra_types.yml",
30
- "config/solr.yml",
31
- "lib/solrizer-fedora.rb",
32
- "lib/solrizer/fedora.rb",
33
- "lib/solrizer/fedora/extractor.rb",
34
- "lib/solrizer/fedora/indexer.rb",
35
- "lib/solrizer/fedora/repository.rb",
36
- "lib/solrizer/fedora/solrizer.rb",
37
- "lib/tasks/solrizer-fedora.rake",
38
- "solrizer-fedora.gemspec",
39
- "spec/fixtures/rels_ext_cmodel.xml",
40
- "spec/integration/fedora_indexer_spec.rb",
41
- "spec/lib/solrizer/indexer_spec.rb",
42
- "spec/rcov.opts",
43
- "spec/spec.opts",
44
- "spec/spec_helper.rb",
45
- "spec/units/fedora_extractor_spec.rb",
46
- "spec/units/fedora_indexer_spec.rb",
47
- "spec/units/fedora_solrizer_spec.rb"
48
- ]
16
+ s.platform = Gem::Platform::RUBY
49
17
  s.homepage = %q{http://github.com/projecthydra/solrizer-fedora}
50
18
  s.require_paths = ["lib"]
51
- s.rubygems_version = %q{1.3.7}
19
+ s.rubygems_version = %q{1.7.2}
52
20
  s.summary = %q{An extension to solrizer that deals with Fedora objects & Repositories}
53
- s.test_files = [
54
- "spec/integration/fedora_indexer_spec.rb",
55
- "spec/lib/solrizer/indexer_spec.rb",
56
- "spec/spec_helper.rb",
57
- "spec/units/fedora_extractor_spec.rb",
58
- "spec/units/fedora_indexer_spec.rb",
59
- "spec/units/fedora_solrizer_spec.rb"
60
- ]
61
21
 
62
- if s.respond_to? :specification_version then
63
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
64
- s.specification_version = 3
22
+ s.rubyforge_project = "solrizer-fedora"
65
23
 
66
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
67
- s.add_runtime_dependency(%q<active-fedora>, [">= 2.0.0"])
68
- s.add_runtime_dependency(%q<rsolr>, [">= 0"])
69
- s.add_runtime_dependency(%q<solrizer>, [">= 1.0.0"])
70
- s.add_development_dependency(%q<ruby-debug>, [">= 0"])
71
- s.add_development_dependency(%q<ruby-debug-base>, [">= 0"])
72
- s.add_development_dependency(%q<rspec>, ["< 2.0.0"])
73
- s.add_development_dependency(%q<mocha>, [">= 0"])
74
- else
75
- s.add_dependency(%q<active-fedora>, [">= 2.0.0"])
76
- s.add_dependency(%q<rsolr>, [">= 0"])
77
- s.add_dependency(%q<solrizer>, [">= 1.0.0"])
78
- s.add_dependency(%q<ruby-debug>, [">= 0"])
79
- s.add_dependency(%q<ruby-debug-base>, [">= 0"])
80
- s.add_dependency(%q<rspec>, ["< 2.0.0"])
81
- s.add_dependency(%q<mocha>, [">= 0"])
82
- end
83
- else
84
- s.add_dependency(%q<active-fedora>, [">= 2.0.0"])
85
- s.add_dependency(%q<rsolr>, [">= 0"])
86
- s.add_dependency(%q<solrizer>, [">= 1.0.0"])
87
- s.add_dependency(%q<ruby-debug>, [">= 0"])
88
- s.add_dependency(%q<ruby-debug-base>, [">= 0"])
89
- s.add_dependency(%q<rspec>, ["< 2.0.0"])
90
- s.add_dependency(%q<mocha>, [">= 0"])
91
- end
92
- end
24
+ s.add_dependency('solr-ruby', '>= 0.0.6')
25
+ s.add_dependency('active-fedora', '>= 2.3.0')
26
+ s.add_dependency('rsolr')
27
+ s.add_dependency('solrizer', '>=1.0.0')
28
+ s.add_dependency('fastercsv') # this is used by solrize_objects when you pass it a csv file of pids
93
29
 
30
+ s.files = `git ls-files`.split("\n")
31
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
32
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
33
+ s.require_paths = ["lib"]
34
+ end
@@ -7,8 +7,9 @@ describe Solrizer::Fedora::Indexer do
7
7
 
8
8
  before(:all) do
9
9
 
10
- unless defined?(RAILS_ROOT) and defined?(RAILS_ENV)
11
- RAILS_ROOT = "."
10
+ unless defined?(Rails) and defined?(RAILS_ENV)
11
+ Object.const_set("Rails", String)
12
+ Rails.stubs(:root).returns(".") #RAILS_ROOT = "."
12
13
  RAILS_ENV = "test"
13
14
  end
14
15
 
@@ -34,45 +35,59 @@ describe Solrizer::Fedora::Indexer do
34
35
 
35
36
  describe "#new" do
36
37
  it "should return a URL from solr_config if the config has a :url" do
37
- Blacklight.stubs(:solr_config).returns({:url => "http://foo.com:8080/solr"})
38
- @indexer = Solrizer::Fedora::Indexer.new
38
+ Blacklight.stubs(:solr_config).returns({:url => "http://foo.com:8080/solr"})
39
+ @indexer = Solrizer::Fedora::Indexer.new
40
+ @indexer.solr.uri.to_s.should == "http://foo.com:8080/solr/"
39
41
  end
40
42
 
41
43
  it "should return a URL from solr_config if the config has a 'url' " do
42
- Blacklight.stubs(:solr_config).returns({'url' => "http://foo.com:8080/solr"})
43
- @indexer = Solrizer::Fedora::Indexer.new
44
+ Blacklight.stubs(:solr_config).returns({'url' => "http://foo.com:8080/solr"})
45
+ @indexer = Solrizer::Fedora::Indexer.new
46
+ @indexer.solr.uri.to_s.should == "http://foo.com:8080/solr/"
44
47
  end
45
48
 
46
49
  it "should raise and error if there is not a :url or 'url' in the config hash" do
47
- Blacklight.stubs(:solr_config).returns({'boosh' => "http://foo.com:8080/solr"})
48
- lambda { Solrizer::Fedora::Indexer.new }.should raise_error(URI::InvalidURIError)
50
+ Blacklight.stubs(:solr_config).returns({'boosh' => "http://foo.com:8080/solr"})
51
+ lambda { Solrizer::Fedora::Indexer.new }.should raise_error(URI::InvalidURIError)
49
52
  end
50
53
 
51
54
  it "should return a fulltext URL if solr_config has a fulltext url defined" do
52
- Blacklight.stubs(:solr_config).returns({'fulltext' =>{ 'url' => "http://foo.com:8080/solr"}})
53
- @indexer = Solrizer::Fedora::Indexer.new(:index_full_text => true)
55
+ Blacklight.stubs(:solr_config).returns({'fulltext' =>{ 'url' => "http://fulltext.com:8080/solr"}, 'default' =>{ 'url' => "http://default.com:8080/solr"}})
56
+ @indexer = Solrizer::Fedora::Indexer.new(:index_full_text => true)
57
+ @indexer.solr.uri.to_s.should == "http://fulltext.com:8080/solr/"
54
58
  end
55
59
 
60
+ it "should gracefully handle when index_full_text is true but there is no fulltext in the configuration" do
61
+ Blacklight.stubs(:solr_config).returns({'default' =>{ 'url' => "http://foo.com:8080/solr"}})
62
+ @indexer = Solrizer::Fedora::Indexer.new(:index_full_text => true)
63
+ @indexer.solr.uri.to_s.should == "http://foo.com:8080/solr/"
64
+ end
65
+
56
66
  it "should return a fulltext URL if solr_config has a default url defined" do
57
- Blacklight.stubs(:solr_config).returns({'default' =>{ 'url' => "http://foo.com:8080/solr"}})
58
- @indexer = Solrizer::Fedora::Indexer.new(:index_full_text => false)
67
+ Blacklight.stubs(:solr_config).returns({'default' =>{ 'url' => "http://foo.com:8080/solr"}})
68
+ @indexer = Solrizer::Fedora::Indexer.new(:index_full_text => false)
69
+ @indexer.solr.uri.to_s.should == "http://foo.com:8080/solr/"
59
70
  end
60
71
 
61
72
  it "should find the solr.yml even if Blacklight is not loaded" do
62
73
  Object.const_set("Blacklight_temp", Blacklight )
63
74
  Object.send(:remove_const, :Blacklight)
64
75
  YAML.stubs(:load).returns({'test' => {'url' => "http://thereisnoblacklightrunning.edu:8080/solr"}})
76
+ ENV["environment"]="test"
65
77
  @indexer = Solrizer::Fedora::Indexer.new
66
78
  Object.const_set("Blacklight", Blacklight_temp )
79
+ ENV["environment"]=nil
67
80
  end
68
81
 
69
82
  it "should find the solr.yml even if Blacklight is not loaded and RAILS is not loaded" do
70
83
  Object.const_set("Blacklight_temp", Blacklight )
71
84
  Object.send(:remove_const, :Blacklight)
72
- Object.send(:remove_const, :RAILS_ROOT)
85
+ Object.const_set("Rails_temp", Rails)
86
+ Object.send(:remove_const, :Rails)
73
87
  YAML.stubs(:load).returns({'development' => {'url' => "http://noblacklight.norails.edu:8080/solr"}})
74
88
  @indexer = Solrizer::Fedora::Indexer.new
75
- Object.const_set("Blacklight", Blacklight_temp )
89
+ Object.const_set("Blacklight", Blacklight_temp )
90
+ Object.const_set("Rails", Rails_temp)
76
91
  end
77
92
  end
78
93
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: solrizer-fedora
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
5
- prerelease: false
4
+ hash: 19
5
+ prerelease:
6
6
  segments:
7
7
  - 1
8
+ - 1
8
9
  - 0
9
- - 3
10
- version: 1.0.3
10
+ version: 1.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Matt Zumwalt
@@ -15,60 +15,44 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-05-03 00:00:00 -05:00
19
- default_executable:
18
+ date: 2011-05-20 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
- type: :runtime
21
+ name: solr-ruby
23
22
  prerelease: false
24
- name: active-fedora
25
- version_requirements: &id001 !ruby/object:Gem::Requirement
23
+ requirement: &id001 !ruby/object:Gem::Requirement
26
24
  none: false
27
25
  requirements:
28
26
  - - ">="
29
27
  - !ruby/object:Gem::Version
30
- hash: 15
28
+ hash: 19
31
29
  segments:
32
- - 2
33
30
  - 0
34
31
  - 0
35
- version: 2.0.0
36
- requirement: *id001
37
- - !ruby/object:Gem::Dependency
32
+ - 6
33
+ version: 0.0.6
38
34
  type: :runtime
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: active-fedora
39
38
  prerelease: false
40
- name: rsolr
41
- version_requirements: &id002 !ruby/object:Gem::Requirement
39
+ requirement: &id002 !ruby/object:Gem::Requirement
42
40
  none: false
43
41
  requirements:
44
42
  - - ">="
45
43
  - !ruby/object:Gem::Version
46
44
  hash: 3
47
45
  segments:
46
+ - 2
47
+ - 3
48
48
  - 0
49
- version: "0"
50
- requirement: *id002
51
- - !ruby/object:Gem::Dependency
49
+ version: 2.3.0
52
50
  type: :runtime
53
- prerelease: false
54
- name: solrizer
55
- version_requirements: &id003 !ruby/object:Gem::Requirement
56
- none: false
57
- requirements:
58
- - - ">="
59
- - !ruby/object:Gem::Version
60
- hash: 23
61
- segments:
62
- - 1
63
- - 0
64
- - 0
65
- version: 1.0.0
66
- requirement: *id003
51
+ version_requirements: *id002
67
52
  - !ruby/object:Gem::Dependency
68
- type: :development
53
+ name: rsolr
69
54
  prerelease: false
70
- name: ruby-debug
71
- version_requirements: &id004 !ruby/object:Gem::Requirement
55
+ requirement: &id003 !ruby/object:Gem::Requirement
72
56
  none: false
73
57
  requirements:
74
58
  - - ">="
@@ -77,42 +61,28 @@ dependencies:
77
61
  segments:
78
62
  - 0
79
63
  version: "0"
80
- requirement: *id004
64
+ type: :runtime
65
+ version_requirements: *id003
81
66
  - !ruby/object:Gem::Dependency
82
- type: :development
67
+ name: solrizer
83
68
  prerelease: false
84
- name: ruby-debug-base
85
- version_requirements: &id005 !ruby/object:Gem::Requirement
69
+ requirement: &id004 !ruby/object:Gem::Requirement
86
70
  none: false
87
71
  requirements:
88
72
  - - ">="
89
73
  - !ruby/object:Gem::Version
90
- hash: 3
91
- segments:
92
- - 0
93
- version: "0"
94
- requirement: *id005
95
- - !ruby/object:Gem::Dependency
96
- type: :development
97
- prerelease: false
98
- name: rspec
99
- version_requirements: &id006 !ruby/object:Gem::Requirement
100
- none: false
101
- requirements:
102
- - - <
103
- - !ruby/object:Gem::Version
104
- hash: 15
74
+ hash: 23
105
75
  segments:
106
- - 2
76
+ - 1
107
77
  - 0
108
78
  - 0
109
- version: 2.0.0
110
- requirement: *id006
79
+ version: 1.0.0
80
+ type: :runtime
81
+ version_requirements: *id004
111
82
  - !ruby/object:Gem::Dependency
112
- type: :development
83
+ name: fastercsv
113
84
  prerelease: false
114
- name: mocha
115
- version_requirements: &id007 !ruby/object:Gem::Requirement
85
+ requirement: &id005 !ruby/object:Gem::Requirement
116
86
  none: false
117
87
  requirements:
118
88
  - - ">="
@@ -121,7 +91,8 @@ dependencies:
121
91
  segments:
122
92
  - 0
123
93
  version: "0"
124
- requirement: *id007
94
+ type: :runtime
95
+ version_requirements: *id005
125
96
  description: An extension to projecthydra/solrizer that provides utilities for loading objects from Fedora Repositories and creating solr documents from them.
126
97
  email: matt.zumwalt@yourmediashelf.com
127
98
  executables: []
@@ -133,13 +104,15 @@ extra_rdoc_files:
133
104
  - README.textile
134
105
  files:
135
106
  - .document
107
+ - .gitignore
108
+ - .gitmodules
109
+ - .rvmrc
136
110
  - Gemfile
137
111
  - Gemfile.lock
138
112
  - History.textile
139
113
  - LICENSE
140
114
  - README.textile
141
115
  - Rakefile
142
- - VERSION
143
116
  - config/fedora.yml
144
117
  - config/hydra_types.yml
145
118
  - config/solr.yml
@@ -149,6 +122,7 @@ files:
149
122
  - lib/solrizer/fedora/indexer.rb
150
123
  - lib/solrizer/fedora/repository.rb
151
124
  - lib/solrizer/fedora/solrizer.rb
125
+ - lib/solrizer/fedora/version.rb
152
126
  - lib/tasks/solrizer-fedora.rake
153
127
  - solrizer-fedora.gemspec
154
128
  - spec/fixtures/rels_ext_cmodel.xml
@@ -160,7 +134,6 @@ files:
160
134
  - spec/units/fedora_extractor_spec.rb
161
135
  - spec/units/fedora_indexer_spec.rb
162
136
  - spec/units/fedora_solrizer_spec.rb
163
- has_rdoc: true
164
137
  homepage: http://github.com/projecthydra/solrizer-fedora
165
138
  licenses: []
166
139
 
@@ -189,15 +162,19 @@ required_rubygems_version: !ruby/object:Gem::Requirement
189
162
  version: "0"
190
163
  requirements: []
191
164
 
192
- rubyforge_project:
193
- rubygems_version: 1.3.7
165
+ rubyforge_project: solrizer-fedora
166
+ rubygems_version: 1.7.2
194
167
  signing_key:
195
168
  specification_version: 3
196
169
  summary: An extension to solrizer that deals with Fedora objects & Repositories
197
170
  test_files:
171
+ - spec/fixtures/rels_ext_cmodel.xml
198
172
  - spec/integration/fedora_indexer_spec.rb
199
173
  - spec/lib/solrizer/indexer_spec.rb
174
+ - spec/rcov.opts
175
+ - spec/spec.opts
200
176
  - spec/spec_helper.rb
201
177
  - spec/units/fedora_extractor_spec.rb
202
178
  - spec/units/fedora_indexer_spec.rb
203
179
  - spec/units/fedora_solrizer_spec.rb
180
+ has_rdoc:
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 1.0.3