solrizer-fedora 1.0.3 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,25 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+ /.bundle
21
+ /.yardoc
22
+ /doc
23
+ jetty/*
24
+
25
+ ## PROJECT::SPECIFIC
data/.gitmodules ADDED
@@ -0,0 +1,3 @@
1
+ [submodule "jetty"]
2
+ path = jetty
3
+ url = git://github.com/projecthydra/hydra-jetty.git
data/.rvmrc ADDED
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # This is an RVM Project .rvmrc file, used to automatically load the ruby
4
+ # development environment upon cd'ing into the directory
5
+
6
+ ruby_string="ree-1.8.7"
7
+ gemset_name="solrizer-fedora"
8
+
9
+ #
10
+ rvm_install_on_use_flag=1
11
+
12
+ # Specify our desired <ruby>[@<gemset>], the @gemset name is optional.
13
+ environment_id="${ruby_string}@${gemset_name}"
14
+
15
+ # First, attempt to load the desired environment directly from the environment
16
+ # file. This is very fast and efficient compared to running through the entire
17
+ # CLI and selector. If you want feedback on which environment was used then
18
+ # insert the word 'use' after --create as this triggers verbose mode.
19
+ #
20
+ if [[ -d "${rvm_path:-$HOME/.rvm}/environments" \
21
+ && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]] ; then
22
+ \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
23
+ else
24
+ # If the environment file has not yet been created, use the RVM CLI to select.
25
+ rvm --create "$environment_id"
26
+ fi
27
+
28
+ #(
29
+ # Ensure that Bundler is installed, install it if it is not.
30
+ if ! command -v bundle ; then
31
+ printf "The rubygem 'bundler' is not installed, installing it now.\n"
32
+ gem install bundler
33
+ fi
34
+ #)&
35
+
data/Gemfile CHANGED
@@ -1,11 +1,16 @@
1
1
  source "http://rubygems.org"
2
- gem 'active-fedora', '>=2.0.0'
3
- gem 'rsolr'
4
- gem 'solrizer', '>=1.0.0'
5
2
 
6
3
  group :development, :test do
4
+ gem 'jeweler'
5
+ gem 'jettywrapper'
6
+ gem 'rcov'
7
7
  gem 'ruby-debug'
8
8
  gem 'ruby-debug-base'
9
9
  gem 'rspec', '<2.0.0'
10
10
  gem 'mocha'
11
- end
11
+ gem 'yard'
12
+ gem 'RedCloth'
13
+ end
14
+
15
+ # Specify your gem's dependencies in solrizer.gemspec
16
+ gemspec
data/Gemfile.lock CHANGED
@@ -1,32 +1,58 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ solrizer-fedora (1.0.3)
5
+ active-fedora (>= 2.2.0)
6
+ fastercsv
7
+ rsolr
8
+ solr-ruby (>= 0.0.6)
9
+ solrizer (>= 1.0.0)
10
+ solrizer-fedora
11
+
1
12
  GEM
2
13
  remote: http://rubygems.org/
3
14
  specs:
4
- active-fedora (1.2.7)
5
- activeresource (< 3.0.0)
15
+ RedCloth (4.2.7)
16
+ active-fedora (2.2.0)
17
+ active-fedora
18
+ activeresource
19
+ equivalent-xml
20
+ facets
21
+ mediashelf-loggable
6
22
  mime-types (>= 1.16)
7
23
  multipart-post
8
24
  nokogiri
9
25
  om (>= 1.0)
10
26
  solr-ruby (>= 0.0.6)
11
- solrizer (>= 0.3.0)
27
+ solrizer (> 1.0.0)
12
28
  xml-simple (>= 1.0.12)
13
29
  activeresource (2.3.11)
14
30
  activesupport (= 2.3.11)
15
31
  activesupport (2.3.11)
16
32
  builder (3.0.0)
17
33
  columnize (0.3.2)
34
+ daemons (1.1.3)
35
+ equivalent-xml (0.2.6)
36
+ nokogiri (>= 1.4.3)
18
37
  facets (2.9.1)
38
+ fastercsv (1.5.4)
39
+ git (1.2.5)
40
+ jettywrapper (0.0.3)
41
+ jeweler (1.6.0)
42
+ bundler (~> 1.0.0)
43
+ git (>= 1.2.5)
44
+ rake
19
45
  linecache (0.43)
20
- mediashelf-loggable (0.4.0)
46
+ mediashelf-loggable (0.4.2)
21
47
  mime-types (1.16)
22
48
  mocha (0.9.12)
23
49
  multipart-post (1.1.0)
24
50
  nokogiri (1.4.4)
25
- om (1.0.2)
26
- facets
27
- facets
28
- nokogiri
51
+ om (1.2.4)
29
52
  nokogiri (>= 1.4.2)
53
+ om
54
+ rake (0.8.7)
55
+ rcov (0.9.9)
30
56
  rsolr (1.0.0)
31
57
  builder (>= 2.1.2)
32
58
  rspec (1.3.1)
@@ -36,26 +62,32 @@ GEM
36
62
  ruby-debug-base (0.10.4)
37
63
  linecache (>= 0.3)
38
64
  solr-ruby (0.0.8)
39
- solrizer (1.0.0)
65
+ solrizer (1.0.4)
66
+ daemons
40
67
  mediashelf-loggable
41
68
  mediashelf-loggable
42
69
  nokogiri
43
70
  nokogiri
44
71
  nokogiri
45
- om (>= 1.0.0)
46
72
  om
47
- solr-ruby
73
+ om (>= 1.0.0)
74
+ stomp
48
75
  xml-simple
49
- xml-simple (1.0.14)
76
+ stomp (1.1.8)
77
+ xml-simple (1.0.15)
78
+ yard (0.6.8)
50
79
 
51
80
  PLATFORMS
52
81
  ruby
53
82
 
54
83
  DEPENDENCIES
55
- active-fedora (= 1.2.7)
84
+ RedCloth
85
+ jettywrapper
86
+ jeweler
56
87
  mocha
57
- rsolr
88
+ rcov
58
89
  rspec (< 2.0.0)
59
90
  ruby-debug
60
91
  ruby-debug-base
61
- solrizer (>= 1.0.0)
92
+ solrizer-fedora!
93
+ yard
data/README.textile CHANGED
@@ -2,29 +2,224 @@ h1. solrizer-fedora
2
2
 
3
3
  An extension to projecthydra/solrizer that provides utilities for loading objects from Fedora Repositories and creating solr documents from them.
4
4
 
5
- h2. Usage
5
+ h2. Installation
6
+
7
+ The gem is hosted on rubygems.org. The bset way to manage the gems for your project is to use bundler. Create a Gemfile in the root of your application and include the following:
8
+
9
+ <pre>
10
+ source "http://rubygems.org"
11
+
12
+ gem 'solrizer-fedora'
13
+ </pre>
14
+
15
+ Then:
6
16
 
7
- <pre>gem install solrizer-fedora</pre>
17
+ <pre>bundle install</pre>
8
18
 
9
- You must tell the app where to find fedora and solr. Put that information into config/fedora.yml and config/solr.yml
10
19
 
11
- Then...
20
+ h2. Testing
21
+
22
+ In order to run the RSpec tests, it is necesary to have a hydra-jetty instance running. This can be accomplished two ways:
23
+
24
+ h3. Using the bundled jetty instance:
25
+
26
+ Configure the bundled hydra-jetty instance.
27
+
28
+ <pre>
29
+ git submodule init
30
+ git submodule update
31
+ </pre>
32
+
33
+ Once you have updated the jetty submodule, you can easily run the rspec tests with the following rake task:
34
+
35
+ <pre>
36
+ rake hudson
37
+ </pre>
38
+
39
+ While the primary intention of this task is to provide test coverage and documentation out on projecthydra's "continuous integration server":http://hudson.projecthydra.org, it can also be used locally to run tests without having to install and configure an instance of "hydra-jetty":https://github.com/projecthydra/hydra-jetty.
40
+
41
+ Note: if you have another instance of hydra-jetty running, you should either close it down prior to running the rake hudson task.
42
+
43
+
44
+ h3. Using a different instance of hydra-jetty:
45
+
46
+ If you prefer, you can run the specs against a different hydra-jetty instance. Follow the instructions included with those projects to start the jetty instance.
47
+
48
+
49
+
50
+ h2. Pre-requisite
51
+
52
+ h3. Setup local hydra-jetty
53
+
54
+ In order to use solrizer-fedora, you must first set up an instance of "hydra-jetty":https://github.com/projecthydra/hydra-jetty.
55
+
56
+ Once you have set this up, cd into the directory and type:
57
+
58
+ <pre>java -jar start.jar</pre>
59
+
60
+ You must tell the app where to find fedora and solr. Put that information into config/fedora.yml and config/solr.yml:
61
+
62
+ Sample config files:
63
+
64
+ config/fedora.yml
65
+ <pre>
66
+ development:
67
+ fedora:
68
+ url: http://fedoraAdmin:fedoraAdmin@127.0.0.1:8983/fedora
69
+ solr:
70
+ url: http://127.0.0.1:8983/solr/development
71
+ test:
72
+ fedora:
73
+ url: http://fedoraAdmin:fedoraAdmin@127.0.0.1:8983/fedora
74
+ solr:
75
+ url: http://127.0.0.1:8983/solr/test
76
+ production:
77
+ fedora:
78
+ url: http://fedoraAdmin:fedoraAdmin@127.0.0.1:8080/fedora
79
+ solr:
80
+ url: http://127.0.0.1:8080/solr
81
+ </pre>
82
+
83
+ config/solr.yml
84
+ <pre>
85
+ development:
86
+ default:
87
+ url: http://localhost:8983/solr
88
+ full_text:
89
+ url: http://localhost:8983/solr
90
+ test: &TEST
91
+ default:
92
+ url: http://localhost:8983/solr
93
+ full_text:
94
+ url: http://localhost:8983/solr
95
+ production:
96
+ default:
97
+ url: http://localhost:8080/solr/production
98
+ full_text:
99
+ url: http://localhost:8080/solr/production
100
+ </pre>
101
+
102
+ h2. Usage
103
+
104
+ h3. Fire up the console:
105
+
106
+ Start up a console and load solrizer-fedora:
12
107
 
13
108
  <pre>
14
109
  irb
15
110
  require "rubygems"
16
111
  require "solrizer-fedora"
112
+ </pre>
113
+
114
+ Initialize ActiveFedora:
115
+
116
+ <pre>
117
+ ActiveFedora.init
118
+ </pre>
119
+
120
+
121
+ h3. Create an instance of Solrizer::Fedora::Solrizer:
122
+
123
+ <pre>
17
124
  solrizer = Solrizer::Fedora::Solrizer.new
18
- solrizer.solrize("demo:5")
19
125
  </pre>
20
126
 
127
+ Or, if you want to index full text rather than just fields (and you have provided a full text solr index in your solr.yml):
128
+
129
+ <pre>
130
+ full_text_solrizer = Solrizer::Fedora::Solrizer.new(:index_full_text=>true)
131
+ </pre>
132
+
133
+
134
+ h3. Solrizing a single object in your repository:
135
+
136
+ If you have an existing object in your repository, you can solrize it by passing its pid:
137
+
138
+ <pre>
139
+ solrizer.solrize "demo:5"
140
+ </pre>
141
+
142
+ If you have either an instance of ActiveFedora::Base or Fedora::Object, you can solrize it by passing the object itself:
143
+
144
+ <pre>
145
+ my_object = ActiveFedora::Base.new
146
+
147
+ solrizer.solrize my_object
148
+ </pre>
149
+
150
+ To view the resulting document, open a web browser and go to the jetty's solr admin page (most likely http://localhost/solr/admin) and query your solr instance for the pid (e.g. id:demo\:5).
151
+
152
+
153
+ h3. Solrizing all the objects in your repository:
154
+
155
+ In order to solrize all the objects in your repository, run:
156
+
157
+ <pre>
158
+ solrizer.solrize_objects
159
+ </pre>
160
+
161
+
162
+ h3. Advanced usage
163
+
164
+ Solrizer-fedora inspects your repository objects and attempts to match your objects to your ruby models based on the hasModel declarations in the RELS-EXT datastream.
165
+
166
+ Go to the fedora admin interface (http://FEDORA_BASE_URL/fedora/admin) and create a new object with an PID of "changeme:123" and whatever Label you want.
167
+
168
+ Once you have created that object you will need to add the RELS-EXT datastream to it. Make the datastream ID "RELS-EXT", MIME-Type of "application/rdf+xml", and put the XML below in the XML from text field.
169
+
170
+ <pre>
171
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
172
+ <rdf:Description rdf:about="info:fedora/changeme:123">
173
+ <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:MyObject"></hasModel>
174
+ </rdf:Description>
175
+ </rdf:RDF>
176
+ </pre>
177
+
178
+ Solrizer-fedora will check to see if you have a matching ruby model called MyObject. If you do, it will load it and add any specific solr fields specified. So, assuming the following model:
179
+
180
+ <pre>
181
+ class MyObject < ActiveFedora::Base
182
+
183
+ has_metadata :name => "properties", :type=> ActiveFedora::MetadataDatastream do |m|
184
+ m.field 'foo', :string
185
+ end
186
+
187
+ end
188
+ </pre>
189
+
190
+ Now add another datastream to the changeme:123 object and give it a datastream ID of "properties", a MIME-Type of "text/xml", and add the following to the XML from text field:
191
+
192
+ <pre>
193
+ <fields>
194
+ <foo>bar</foo>
195
+ </fields>
196
+ </pre>
197
+
198
+ If you still have your fedora-solrizer class from above you can now solrize the object by ID:
199
+
200
+ <pre>
201
+ solrizer.solrize "changeme:123"
202
+ </pre>
203
+
204
+ Now your solr doc with the "changeme:123" ID will include the following:
205
+
206
+ <pre>
207
+ <arr name="foo_t">
208
+ <str>bar</str>
209
+ </arr>
210
+ </pre>
211
+
212
+
213
+
214
+
215
+
21
216
 
22
217
  h2. Note on Patches/Pull Requests
23
218
 
24
219
  * Fork the project.
25
220
  * Make your feature addition or bug fix.
26
- * Add tests for it. This is important so I don't break it in a
27
- future version unintentionally.
221
+ * Add tests for it. This is important so it doesn't get broken unintentionally
222
+ in a future version.
28
223
  * Commit, do not mess with rakefile, version, or history.
29
224
  (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
30
225
  * Send me a pull request. Bonus points for topic branches.
data/Rakefile CHANGED
@@ -1,21 +1,11 @@
1
1
  require 'rubygems'
2
2
  require 'rake'
3
3
 
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "solrizer-fedora"
8
- gem.summary = %Q{An extension to solrizer that deals with Fedora objects & Repositories}
9
- gem.description = %Q{An extension to projecthydra/solrizer that provides utilities for loading objects from Fedora Repositories and creating solr documents from them.}
10
- gem.email = "matt.zumwalt@yourmediashelf.com"
11
- gem.homepage = "http://github.com/projecthydra/solrizer-fedora"
12
- gem.authors = ["Matt Zumwalt"]
13
- # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
14
- end
15
- Jeweler::GemcutterTasks.new
16
- rescue LoadError
17
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
18
- end
4
+ # load rake tasks in lib/tasks
5
+ Dir.glob('lib/tasks/*.rake').each { |r| import r }
6
+
7
+ require 'bundler'
8
+ Bundler::GemHelper.install_tasks
19
9
 
20
10
  require 'spec/rake/spectask'
21
11
  Spec::Rake::SpecTask.new(:spec) do |spec|
@@ -1,3 +1,5 @@
1
+ # This supports an experimental feature that allows Solrizer::Fedora::Extractor.extract_rels_ext to
2
+ # map between RDF URIs and compact model names like mods_document or jp2_document
1
3
  info:fedora/afmodel:SaltDocument : salt_document
2
4
  info:fedora/afmodel:JP2Document : jp2_document
3
5
  info:fedora/afmodel:ModsDocument : mods_document
@@ -1,7 +1,14 @@
1
1
  require "rubygems"
2
2
  require "solrizer"
3
+
4
+ # Solrizer::Fedora is an implementation of Solrizer that reads content from Fedora repositories and indexes it into solr.
5
+ #
6
+ # Note: This module automatically extends Solrizer::Extractor with additional Fedora-specific extractor behaviors from Solrizer::Fedora::Extractor.
3
7
  module Solrizer::Fedora
8
+ def self.version
9
+ Solrizer::Fedora::VERSION
10
+ end
4
11
  end
5
12
  Dir[File.join(File.dirname(__FILE__),"fedora","*.rb")].each {|file| require file }
6
13
 
7
- Solrizer::Extractor.send(:include, Solrizer::Fedora::Extractor)
14
+ Solrizer::Extractor.send(:include, Solrizer::Fedora::Extractor)
@@ -2,6 +2,8 @@ require 'rexml/document'
2
2
  require "nokogiri"
3
3
  require 'yaml'
4
4
 
5
+ # Solrizer::Fedora::Extractor provides Fedora-specific extractor behaviors
6
+ # This module is automatically mixed into Solrizer::Extractor when you load the solrizer-fedora gem into an application. This is carried out in solrizer/fedora.rb
5
7
  module Solrizer::Fedora::Extractor
6
8
 
7
9
  #
@@ -10,12 +12,16 @@ module Solrizer::Fedora::Extractor
10
12
  def extract_rels_ext( text, solr_doc=Hash.new )
11
13
  # TODO: only read in this file once
12
14
 
13
- if defined?(RAILS_ROOT)
14
- config_path = File.join(RAILS_ROOT, "config")
15
- else
16
- config_path = File.join(File.dirname(__FILE__), "..", "..", "..", "config")
17
- end
18
- map = YAML.load(File.open(File.join(config_path, "hydra_types.yml")))
15
+ if defined?(Rails.root.to_s)
16
+ config_path = File.join(Rails.root.to_s, "config","hydra_types.yml")
17
+ config_path = nil unless File.exist?(config_path)
18
+ end
19
+ unless config_path
20
+ config_path = File.join(File.dirname(__FILE__), "..", "..", "..", "config","hydra_types.yml")
21
+ end
22
+
23
+
24
+ map = YAML.load(File.open(config_path))
19
25
 
20
26
  doc = Nokogiri::XML(text)
21
27
  doc.xpath( '//foo:hasModel', 'foo' => 'info:fedora/fedora-system:def/model#' ).each do |element|
@@ -13,10 +13,15 @@ class Indexer
13
13
  @@unique_id
14
14
  end
15
15
 
16
- #
17
- # Member variables
18
- #
19
- attr_accessor :solr, :extractor, :index_full_text
16
+
17
+ # The instance of solr that updates will be written to
18
+ attr_accessor :solr
19
+
20
+ # The extractor to use. This is usually Solrizer::Extractor
21
+ attr_accessor :extractor
22
+
23
+ # [Boolean or "true" or "false"] tells the indexer whether to index full text or just field values
24
+ attr_accessor :index_full_text
20
25
 
21
26
  #
22
27
  # This method performs initialization tasks
@@ -36,68 +41,67 @@ class Indexer
36
41
 
37
42
  #
38
43
  # This method connects to the Solr instance. It looks to see if Blacklight is loaded first for the
39
- # Blacklight.solr_config. If not loaded, it then looks for the RAILS_ROOT/config/solr.yaml file and loads
44
+ # Blacklight.solr_config. If not loaded, it then looks for the Rails.root.to_s/config/solr.yaml file and loads
40
45
  # it to get the solr url. The configuration strucuture can take both the
41
46
  # { "development" => {"default" => { "url" => "http://localhost"}, "fulltext" => { "url" => "http://localhost"} }}
42
47
  # or { "development"=>{"url"=>"http://localhost" }}
43
48
  # Can also take Blacklight.solr_config["url"] and Blacklight.solr_config[:url]
44
49
  #
45
-
46
- def connect
47
-
48
- if ActiveFedora.fedora_config.empty?
49
- ActiveFedora.init
50
- end
51
-
52
- if defined?(Blacklight)
53
- solr_config = Blacklight.solr_config
54
- else
55
- if defined?(RAILS_ROOT)
56
- config_path = File.join(RAILS_ROOT, "config")
57
- yaml = YAML.load(File.open(File.join(config_path, "solr.yml")))
58
- puts RAILS_ENV + "*****"
59
- solr_config = yaml[RAILS_ENV]
60
- puts solr_config.inspect
61
- else
62
- config_path = File.join("config","solr.yml")
63
- unless File.exist?(config_path)
64
- config_path = File.join(File.dirname(__FILE__), "..", "..", "..", "config", "solr.yml")
65
- end
66
- logger.debug "SOLRIZER: reading config from " + config_path.inspect
67
- yaml = YAML.load(File.open(config_path))
68
-
69
- if ENV["environment"].nil?
70
- environment = "development"
71
- else
72
- environment = ENV["environment"]
73
- end #if
74
-
75
- solr_config = yaml[environment]
76
- logger.debug "SOLRIZER solr_config:" + solr_config.inspect
77
- end #if defined?(RAILS_ROOT)
78
-
79
- end #if defined?(Blacklight)
80
-
81
- if index_full_text == true && solr_config['fulltext'].has_key?('url')
82
- url = solr_config['fulltext']['url']
83
- elsif solr_config.has_key?("default") &&
84
- url = solr_config['default']['url']
85
- elsif solr_config.has_key?('url')
86
- url = solr_config['url']
87
- elsif solr_config.has_key?(:url)
88
- url = solr_config[:url]
89
- else
90
- raise
91
- end
92
50
 
93
- @solr = RSolr.connect :url => url
94
- # @connection = Solr::Connection.new(url, :autocommit => :on )
51
+ def connect
52
+
53
+ if ActiveFedora.fedora_config.empty?
54
+ ActiveFedora.init
55
+ end
56
+
57
+ if defined?(Blacklight)
58
+ solr_config = Blacklight.solr_config
59
+ else
60
+ if defined?(Rails.root.to_s)
61
+ config_path = File.join(Rails.root.to_s, "config", "solr.yml")
62
+ yaml = YAML.load(File.open(File.join(config_path, "solr.yml")))
63
+ puts RAILS_ENV + "*****"
64
+ solr_config = yaml[RAILS_ENV]
65
+ puts solr_config.inspect
66
+ else
67
+ config_path = File.join("config","solr.yml")
68
+ unless File.exist?(config_path)
69
+ config_path = File.join(File.dirname(__FILE__), "..", "..", "..", "config", "solr.yml")
70
+ end
71
+ logger.debug "SOLRIZER: reading config from " + config_path.inspect
72
+ yaml = YAML.load(File.open(config_path))
95
73
 
96
- rescue RuntimeError => e
97
- logger.debug "Unable to establish SOLR Connection with #{solr_config.inspect}. Failed with #{e.message}"
98
- raise URI::InvalidURIError
74
+ if ENV["environment"].nil?
75
+ environment = "development"
76
+ else
77
+ environment = ENV["environment"]
78
+ end #if
79
+
80
+ solr_config = yaml[environment]
81
+ logger.debug "SOLRIZER solr_config:" + solr_config.inspect
82
+ end #if defined?(Rails.root)
83
+ end #if defined?(Blacklight)
84
+
85
+ if index_full_text == true && solr_config.has_key?('fulltext') && solr_config['fulltext'].has_key?('url')
86
+ url = solr_config['fulltext']['url']
87
+ elsif solr_config.has_key?("default") && solr_config['default'].has_key?('url')
88
+ url = solr_config['default']['url']
89
+ elsif solr_config.has_key?('url')
90
+ url = solr_config['url']
91
+ elsif solr_config.has_key?(:url)
92
+ url = solr_config[:url]
93
+ else
94
+ raise
99
95
  end
100
96
 
97
+ @solr = RSolr.connect :url => url
98
+ # @connection = Solr::Connection.new(url, :autocommit => :on )
99
+
100
+ rescue RuntimeError => e
101
+ logger.debug "Unable to establish SOLR Connection with #{solr_config.inspect}. Failed with #{e.message}"
102
+ raise URI::InvalidURIError
103
+ end
104
+
101
105
  #
102
106
  # This method extracts the facet categories from the given Fedora object's external tag datastream
103
107
  #
@@ -153,7 +157,6 @@ class Indexer
153
157
  end
154
158
 
155
159
  return solr_doc
156
- # end
157
160
 
158
161
  end
159
162
 
@@ -8,7 +8,7 @@ require 'solrizer/html'
8
8
  # Let people explicitly require xml support if they want it ...
9
9
  # require 'solrizer/xml.rb'
10
10
 
11
- # require 'fastercsv'
11
+ require 'fastercsv' # this is used by solrize_objects when you pass it a csv file of pids
12
12
  require "ruby-debug"
13
13
 
14
14
 
@@ -0,0 +1,5 @@
1
+ module Solrizer
2
+ module Fedora
3
+ VERSION = "1.1.0"
4
+ end
5
+ end
@@ -1,3 +1,44 @@
1
+ desc "Task to execute builds on a Hudson Continuous Integration Server."
2
+ task :hudson do
3
+ if (ENV['RAILS_ENV'] == "test")
4
+ require "jettywrapper"
5
+ jetty_params = {
6
+ :jetty_home => File.expand_path(File.dirname(__FILE__) + '/../../jetty'),
7
+ :quiet => false,
8
+ :jetty_port => 8983,
9
+ :solr_home => File.expand_path(File.dirname(__FILE__) + '/../../jetty/solr/test-core'),
10
+ :fedora_home => File.expand_path(File.dirname(__FILE__) + '/../../jetty/fedora/default'),
11
+ :startup_wait => 25
12
+ }
13
+ error = Jettywrapper.wrap(jetty_params) do
14
+ Rake::Task["doc"].invoke
15
+ Rake::Task["solrizer:fedora:rspec"].invoke
16
+ end
17
+ raise "test failures: #{error}" if error
18
+ else
19
+ system("rake hudson RAILS_ENV=test")
20
+ end
21
+ end
22
+
23
+ # Use yard to build docs
24
+ begin
25
+ require 'yard'
26
+ require 'yard/rake/yardoc_task'
27
+ project_root = File.expand_path("#{File.dirname(__FILE__)}/../../")
28
+ doc_destination = File.join(project_root, 'doc')
29
+
30
+ YARD::Rake::YardocTask.new(:doc) do |yt|
31
+ yt.files = Dir.glob(File.join(project_root, 'lib', '**', '*.rb')) +
32
+ [ File.join(project_root, 'README.textile') ]
33
+ yt.options = ['--output-dir', doc_destination, '--readme', 'README.textile']
34
+ end
35
+ rescue LoadError
36
+ desc "Generate YARD Documentation"
37
+ task :doc do
38
+ abort "Please install the YARD gem to generate rdoc."
39
+ end
40
+ end
41
+
1
42
  namespace :solrizer do
2
43
 
3
44
  namespace :fedora do
@@ -30,6 +71,15 @@ namespace :solrizer do
30
71
  solrizer.solrize_objects
31
72
  puts "Solrizer task complete."
32
73
  end
74
+
75
+ Spec::Rake::SpecTask.new(:rspec) do |t|
76
+ t.spec_files = FileList['spec/**/*_spec.rb']
77
+ t.rcov = true
78
+ t.rcov_opts = lambda do
79
+ IO.readlines("spec/rcov.opts").map {|l| l.chomp.split " "}.flatten
80
+ end
81
+ end
82
+
33
83
  end
34
84
 
35
85
  end
@@ -1,93 +1,34 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
1
  # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "solrizer/fedora/version"
5
4
 
6
5
  Gem::Specification.new do |s|
7
- s.name = %q{solrizer-fedora}
8
- s.version = "1.0.3"
9
-
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
6
+ s.name = "solrizer-fedora"
7
+ s.version = Solrizer::Fedora::VERSION
11
8
  s.authors = ["Matt Zumwalt"]
12
- s.date = %q{2011-05-03}
9
+ s.date = %q{2011-05-20}
13
10
  s.description = %q{An extension to projecthydra/solrizer that provides utilities for loading objects from Fedora Repositories and creating solr documents from them.}
14
11
  s.email = %q{matt.zumwalt@yourmediashelf.com}
15
12
  s.extra_rdoc_files = [
16
13
  "LICENSE",
17
14
  "README.textile"
18
15
  ]
19
- s.files = [
20
- ".document",
21
- "Gemfile",
22
- "Gemfile.lock",
23
- "History.textile",
24
- "LICENSE",
25
- "README.textile",
26
- "Rakefile",
27
- "VERSION",
28
- "config/fedora.yml",
29
- "config/hydra_types.yml",
30
- "config/solr.yml",
31
- "lib/solrizer-fedora.rb",
32
- "lib/solrizer/fedora.rb",
33
- "lib/solrizer/fedora/extractor.rb",
34
- "lib/solrizer/fedora/indexer.rb",
35
- "lib/solrizer/fedora/repository.rb",
36
- "lib/solrizer/fedora/solrizer.rb",
37
- "lib/tasks/solrizer-fedora.rake",
38
- "solrizer-fedora.gemspec",
39
- "spec/fixtures/rels_ext_cmodel.xml",
40
- "spec/integration/fedora_indexer_spec.rb",
41
- "spec/lib/solrizer/indexer_spec.rb",
42
- "spec/rcov.opts",
43
- "spec/spec.opts",
44
- "spec/spec_helper.rb",
45
- "spec/units/fedora_extractor_spec.rb",
46
- "spec/units/fedora_indexer_spec.rb",
47
- "spec/units/fedora_solrizer_spec.rb"
48
- ]
16
+ s.platform = Gem::Platform::RUBY
49
17
  s.homepage = %q{http://github.com/projecthydra/solrizer-fedora}
50
18
  s.require_paths = ["lib"]
51
- s.rubygems_version = %q{1.3.7}
19
+ s.rubygems_version = %q{1.7.2}
52
20
  s.summary = %q{An extension to solrizer that deals with Fedora objects & Repositories}
53
- s.test_files = [
54
- "spec/integration/fedora_indexer_spec.rb",
55
- "spec/lib/solrizer/indexer_spec.rb",
56
- "spec/spec_helper.rb",
57
- "spec/units/fedora_extractor_spec.rb",
58
- "spec/units/fedora_indexer_spec.rb",
59
- "spec/units/fedora_solrizer_spec.rb"
60
- ]
61
21
 
62
- if s.respond_to? :specification_version then
63
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
64
- s.specification_version = 3
22
+ s.rubyforge_project = "solrizer-fedora"
65
23
 
66
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
67
- s.add_runtime_dependency(%q<active-fedora>, [">= 2.0.0"])
68
- s.add_runtime_dependency(%q<rsolr>, [">= 0"])
69
- s.add_runtime_dependency(%q<solrizer>, [">= 1.0.0"])
70
- s.add_development_dependency(%q<ruby-debug>, [">= 0"])
71
- s.add_development_dependency(%q<ruby-debug-base>, [">= 0"])
72
- s.add_development_dependency(%q<rspec>, ["< 2.0.0"])
73
- s.add_development_dependency(%q<mocha>, [">= 0"])
74
- else
75
- s.add_dependency(%q<active-fedora>, [">= 2.0.0"])
76
- s.add_dependency(%q<rsolr>, [">= 0"])
77
- s.add_dependency(%q<solrizer>, [">= 1.0.0"])
78
- s.add_dependency(%q<ruby-debug>, [">= 0"])
79
- s.add_dependency(%q<ruby-debug-base>, [">= 0"])
80
- s.add_dependency(%q<rspec>, ["< 2.0.0"])
81
- s.add_dependency(%q<mocha>, [">= 0"])
82
- end
83
- else
84
- s.add_dependency(%q<active-fedora>, [">= 2.0.0"])
85
- s.add_dependency(%q<rsolr>, [">= 0"])
86
- s.add_dependency(%q<solrizer>, [">= 1.0.0"])
87
- s.add_dependency(%q<ruby-debug>, [">= 0"])
88
- s.add_dependency(%q<ruby-debug-base>, [">= 0"])
89
- s.add_dependency(%q<rspec>, ["< 2.0.0"])
90
- s.add_dependency(%q<mocha>, [">= 0"])
91
- end
92
- end
24
+ s.add_dependency('solr-ruby', '>= 0.0.6')
25
+ s.add_dependency('active-fedora', '>= 2.3.0')
26
+ s.add_dependency('rsolr')
27
+ s.add_dependency('solrizer', '>=1.0.0')
28
+ s.add_dependency('fastercsv') # this is used by solrize_objects when you pass it a csv file of pids
93
29
 
30
+ s.files = `git ls-files`.split("\n")
31
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
32
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
33
+ s.require_paths = ["lib"]
34
+ end
@@ -7,8 +7,9 @@ describe Solrizer::Fedora::Indexer do
7
7
 
8
8
  before(:all) do
9
9
 
10
- unless defined?(RAILS_ROOT) and defined?(RAILS_ENV)
11
- RAILS_ROOT = "."
10
+ unless defined?(Rails) and defined?(RAILS_ENV)
11
+ Object.const_set("Rails", String)
12
+ Rails.stubs(:root).returns(".") #RAILS_ROOT = "."
12
13
  RAILS_ENV = "test"
13
14
  end
14
15
 
@@ -34,45 +35,59 @@ describe Solrizer::Fedora::Indexer do
34
35
 
35
36
  describe "#new" do
36
37
  it "should return a URL from solr_config if the config has a :url" do
37
- Blacklight.stubs(:solr_config).returns({:url => "http://foo.com:8080/solr"})
38
- @indexer = Solrizer::Fedora::Indexer.new
38
+ Blacklight.stubs(:solr_config).returns({:url => "http://foo.com:8080/solr"})
39
+ @indexer = Solrizer::Fedora::Indexer.new
40
+ @indexer.solr.uri.to_s.should == "http://foo.com:8080/solr/"
39
41
  end
40
42
 
41
43
  it "should return a URL from solr_config if the config has a 'url' " do
42
- Blacklight.stubs(:solr_config).returns({'url' => "http://foo.com:8080/solr"})
43
- @indexer = Solrizer::Fedora::Indexer.new
44
+ Blacklight.stubs(:solr_config).returns({'url' => "http://foo.com:8080/solr"})
45
+ @indexer = Solrizer::Fedora::Indexer.new
46
+ @indexer.solr.uri.to_s.should == "http://foo.com:8080/solr/"
44
47
  end
45
48
 
46
49
  it "should raise and error if there is not a :url or 'url' in the config hash" do
47
- Blacklight.stubs(:solr_config).returns({'boosh' => "http://foo.com:8080/solr"})
48
- lambda { Solrizer::Fedora::Indexer.new }.should raise_error(URI::InvalidURIError)
50
+ Blacklight.stubs(:solr_config).returns({'boosh' => "http://foo.com:8080/solr"})
51
+ lambda { Solrizer::Fedora::Indexer.new }.should raise_error(URI::InvalidURIError)
49
52
  end
50
53
 
51
54
  it "should return a fulltext URL if solr_config has a fulltext url defined" do
52
- Blacklight.stubs(:solr_config).returns({'fulltext' =>{ 'url' => "http://foo.com:8080/solr"}})
53
- @indexer = Solrizer::Fedora::Indexer.new(:index_full_text => true)
55
+ Blacklight.stubs(:solr_config).returns({'fulltext' =>{ 'url' => "http://fulltext.com:8080/solr"}, 'default' =>{ 'url' => "http://default.com:8080/solr"}})
56
+ @indexer = Solrizer::Fedora::Indexer.new(:index_full_text => true)
57
+ @indexer.solr.uri.to_s.should == "http://fulltext.com:8080/solr/"
54
58
  end
55
59
 
60
+ it "should gracefully handle when index_full_text is true but there is no fulltext in the configuration" do
61
+ Blacklight.stubs(:solr_config).returns({'default' =>{ 'url' => "http://foo.com:8080/solr"}})
62
+ @indexer = Solrizer::Fedora::Indexer.new(:index_full_text => true)
63
+ @indexer.solr.uri.to_s.should == "http://foo.com:8080/solr/"
64
+ end
65
+
56
66
  it "should return a fulltext URL if solr_config has a default url defined" do
57
- Blacklight.stubs(:solr_config).returns({'default' =>{ 'url' => "http://foo.com:8080/solr"}})
58
- @indexer = Solrizer::Fedora::Indexer.new(:index_full_text => false)
67
+ Blacklight.stubs(:solr_config).returns({'default' =>{ 'url' => "http://foo.com:8080/solr"}})
68
+ @indexer = Solrizer::Fedora::Indexer.new(:index_full_text => false)
69
+ @indexer.solr.uri.to_s.should == "http://foo.com:8080/solr/"
59
70
  end
60
71
 
61
72
  it "should find the solr.yml even if Blacklight is not loaded" do
62
73
  Object.const_set("Blacklight_temp", Blacklight )
63
74
  Object.send(:remove_const, :Blacklight)
64
75
  YAML.stubs(:load).returns({'test' => {'url' => "http://thereisnoblacklightrunning.edu:8080/solr"}})
76
+ ENV["environment"]="test"
65
77
  @indexer = Solrizer::Fedora::Indexer.new
66
78
  Object.const_set("Blacklight", Blacklight_temp )
79
+ ENV["environment"]=nil
67
80
  end
68
81
 
69
82
  it "should find the solr.yml even if Blacklight is not loaded and RAILS is not loaded" do
70
83
  Object.const_set("Blacklight_temp", Blacklight )
71
84
  Object.send(:remove_const, :Blacklight)
72
- Object.send(:remove_const, :RAILS_ROOT)
85
+ Object.const_set("Rails_temp", Rails)
86
+ Object.send(:remove_const, :Rails)
73
87
  YAML.stubs(:load).returns({'development' => {'url' => "http://noblacklight.norails.edu:8080/solr"}})
74
88
  @indexer = Solrizer::Fedora::Indexer.new
75
- Object.const_set("Blacklight", Blacklight_temp )
89
+ Object.const_set("Blacklight", Blacklight_temp )
90
+ Object.const_set("Rails", Rails_temp)
76
91
  end
77
92
  end
78
93
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: solrizer-fedora
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
5
- prerelease: false
4
+ hash: 19
5
+ prerelease:
6
6
  segments:
7
7
  - 1
8
+ - 1
8
9
  - 0
9
- - 3
10
- version: 1.0.3
10
+ version: 1.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Matt Zumwalt
@@ -15,60 +15,44 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-05-03 00:00:00 -05:00
19
- default_executable:
18
+ date: 2011-05-20 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
- type: :runtime
21
+ name: solr-ruby
23
22
  prerelease: false
24
- name: active-fedora
25
- version_requirements: &id001 !ruby/object:Gem::Requirement
23
+ requirement: &id001 !ruby/object:Gem::Requirement
26
24
  none: false
27
25
  requirements:
28
26
  - - ">="
29
27
  - !ruby/object:Gem::Version
30
- hash: 15
28
+ hash: 19
31
29
  segments:
32
- - 2
33
30
  - 0
34
31
  - 0
35
- version: 2.0.0
36
- requirement: *id001
37
- - !ruby/object:Gem::Dependency
32
+ - 6
33
+ version: 0.0.6
38
34
  type: :runtime
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: active-fedora
39
38
  prerelease: false
40
- name: rsolr
41
- version_requirements: &id002 !ruby/object:Gem::Requirement
39
+ requirement: &id002 !ruby/object:Gem::Requirement
42
40
  none: false
43
41
  requirements:
44
42
  - - ">="
45
43
  - !ruby/object:Gem::Version
46
44
  hash: 3
47
45
  segments:
46
+ - 2
47
+ - 3
48
48
  - 0
49
- version: "0"
50
- requirement: *id002
51
- - !ruby/object:Gem::Dependency
49
+ version: 2.3.0
52
50
  type: :runtime
53
- prerelease: false
54
- name: solrizer
55
- version_requirements: &id003 !ruby/object:Gem::Requirement
56
- none: false
57
- requirements:
58
- - - ">="
59
- - !ruby/object:Gem::Version
60
- hash: 23
61
- segments:
62
- - 1
63
- - 0
64
- - 0
65
- version: 1.0.0
66
- requirement: *id003
51
+ version_requirements: *id002
67
52
  - !ruby/object:Gem::Dependency
68
- type: :development
53
+ name: rsolr
69
54
  prerelease: false
70
- name: ruby-debug
71
- version_requirements: &id004 !ruby/object:Gem::Requirement
55
+ requirement: &id003 !ruby/object:Gem::Requirement
72
56
  none: false
73
57
  requirements:
74
58
  - - ">="
@@ -77,42 +61,28 @@ dependencies:
77
61
  segments:
78
62
  - 0
79
63
  version: "0"
80
- requirement: *id004
64
+ type: :runtime
65
+ version_requirements: *id003
81
66
  - !ruby/object:Gem::Dependency
82
- type: :development
67
+ name: solrizer
83
68
  prerelease: false
84
- name: ruby-debug-base
85
- version_requirements: &id005 !ruby/object:Gem::Requirement
69
+ requirement: &id004 !ruby/object:Gem::Requirement
86
70
  none: false
87
71
  requirements:
88
72
  - - ">="
89
73
  - !ruby/object:Gem::Version
90
- hash: 3
91
- segments:
92
- - 0
93
- version: "0"
94
- requirement: *id005
95
- - !ruby/object:Gem::Dependency
96
- type: :development
97
- prerelease: false
98
- name: rspec
99
- version_requirements: &id006 !ruby/object:Gem::Requirement
100
- none: false
101
- requirements:
102
- - - <
103
- - !ruby/object:Gem::Version
104
- hash: 15
74
+ hash: 23
105
75
  segments:
106
- - 2
76
+ - 1
107
77
  - 0
108
78
  - 0
109
- version: 2.0.0
110
- requirement: *id006
79
+ version: 1.0.0
80
+ type: :runtime
81
+ version_requirements: *id004
111
82
  - !ruby/object:Gem::Dependency
112
- type: :development
83
+ name: fastercsv
113
84
  prerelease: false
114
- name: mocha
115
- version_requirements: &id007 !ruby/object:Gem::Requirement
85
+ requirement: &id005 !ruby/object:Gem::Requirement
116
86
  none: false
117
87
  requirements:
118
88
  - - ">="
@@ -121,7 +91,8 @@ dependencies:
121
91
  segments:
122
92
  - 0
123
93
  version: "0"
124
- requirement: *id007
94
+ type: :runtime
95
+ version_requirements: *id005
125
96
  description: An extension to projecthydra/solrizer that provides utilities for loading objects from Fedora Repositories and creating solr documents from them.
126
97
  email: matt.zumwalt@yourmediashelf.com
127
98
  executables: []
@@ -133,13 +104,15 @@ extra_rdoc_files:
133
104
  - README.textile
134
105
  files:
135
106
  - .document
107
+ - .gitignore
108
+ - .gitmodules
109
+ - .rvmrc
136
110
  - Gemfile
137
111
  - Gemfile.lock
138
112
  - History.textile
139
113
  - LICENSE
140
114
  - README.textile
141
115
  - Rakefile
142
- - VERSION
143
116
  - config/fedora.yml
144
117
  - config/hydra_types.yml
145
118
  - config/solr.yml
@@ -149,6 +122,7 @@ files:
149
122
  - lib/solrizer/fedora/indexer.rb
150
123
  - lib/solrizer/fedora/repository.rb
151
124
  - lib/solrizer/fedora/solrizer.rb
125
+ - lib/solrizer/fedora/version.rb
152
126
  - lib/tasks/solrizer-fedora.rake
153
127
  - solrizer-fedora.gemspec
154
128
  - spec/fixtures/rels_ext_cmodel.xml
@@ -160,7 +134,6 @@ files:
160
134
  - spec/units/fedora_extractor_spec.rb
161
135
  - spec/units/fedora_indexer_spec.rb
162
136
  - spec/units/fedora_solrizer_spec.rb
163
- has_rdoc: true
164
137
  homepage: http://github.com/projecthydra/solrizer-fedora
165
138
  licenses: []
166
139
 
@@ -189,15 +162,19 @@ required_rubygems_version: !ruby/object:Gem::Requirement
189
162
  version: "0"
190
163
  requirements: []
191
164
 
192
- rubyforge_project:
193
- rubygems_version: 1.3.7
165
+ rubyforge_project: solrizer-fedora
166
+ rubygems_version: 1.7.2
194
167
  signing_key:
195
168
  specification_version: 3
196
169
  summary: An extension to solrizer that deals with Fedora objects & Repositories
197
170
  test_files:
171
+ - spec/fixtures/rels_ext_cmodel.xml
198
172
  - spec/integration/fedora_indexer_spec.rb
199
173
  - spec/lib/solrizer/indexer_spec.rb
174
+ - spec/rcov.opts
175
+ - spec/spec.opts
200
176
  - spec/spec_helper.rb
201
177
  - spec/units/fedora_extractor_spec.rb
202
178
  - spec/units/fedora_indexer_spec.rb
203
179
  - spec/units/fedora_solrizer_spec.rb
180
+ has_rdoc:
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 1.0.3