solr_ead 0.4.4 → 0.4.5

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- solr_ead (0.4.4)
4
+ solr_ead (0.4.5)
5
5
  om
6
6
  rsolr
7
7
  sanitize
data/History.txt ADDED
@@ -0,0 +1,6 @@
1
+ 0.4.5
2
+ New Features:
3
+ - Ability to pass in a custom component and the simple param to the rake task
4
+
5
+ Fixes:
6
+ - fixed greedy regex to allow for "messier" ead xml files
data/README.md CHANGED
@@ -39,6 +39,8 @@ Or install it yourself:
39
39
  ## Usage
40
40
 
41
41
  $ rake solr_ead:index FILE=/path/to/your/ead.xml
42
+ $ rake solr_ead:index_dir DIR=/path/to/your/eads SIMPLE=true
43
+ $ rake solr_ead:index_dir DIR=/path/to/your/eads SOLR_URL=http://127.0.0.1:8983
42
44
 
43
45
  You can also do this via the command line:
44
46
 
@@ -121,9 +123,29 @@ following content:
121
123
  From the console, index you ead document using your new definition.
122
124
 
123
125
  > file = "path/to/ead.xml"
124
- > indexer = SolrEad::Indexer.new(:document=>"CustomDocument")
126
+ > indexer = SolrEad::Indexer.new(:document=>CustomDocument)
125
127
  > indexer.create(file)
126
128
 
129
+ Or index from the rake task
130
+
131
+ $ rake solr_ead:index FILE=path/to/file.xml CUSTOM_DOCUMENT=path/to/custom_document.rb
132
+
133
+ ### Writing a custom component definition
134
+
135
+ Similar to the custom document definition, you can create a custom component definition for component indexing:
136
+
137
+ class CustomComponent < SolrEad::Component
138
+ ...
139
+ end
140
+
141
+ Call this from the console
142
+
143
+ > indexer = SolrEad::Indexer.new(:document=>CustomDocument, :component=>CustomComponent)
144
+
145
+ Or from the rake task
146
+
147
+ $ rake solr_ead:index FILE=path/to/file.xml CUSTOM_DOCUMENT=path/to/custom_document.rb CUSTOM_COMPONENT=path/to/custom_component.rb
148
+
127
149
  ### Adding custom methods
128
150
 
129
151
  Suppose you want to add some custom methods that perform additional manipulations of
@@ -7,7 +7,7 @@ module SolrEad::Behaviors
7
7
  # It'll make an attempt at substituting numbered component levels for non-numbered
8
8
  # ones.
9
9
  def components(file)
10
- raw = File.read(file).gsub!(/xmlns=".*"/, '')
10
+ raw = File.read(file).gsub!(/xmlns="(.*?)"/, '')
11
11
  raw.gsub!(/c[0-9]{2,2}/,"c")
12
12
  xml = Nokogiri::XML(raw)
13
13
  return xml.xpath("//c")
@@ -1,3 +1,3 @@
1
1
  module SolrEad
2
- VERSION = "0.4.4"
2
+ VERSION = "0.4.5"
3
3
  end
@@ -29,16 +29,20 @@ end
29
29
 
30
30
  # Set up a new indexer object
31
31
  #
32
- # If CUSTOM_DOCUMENT is present, require the file and instantiate the indexer with it
33
- # Otherwise instantiate a default indexer
32
+ # Instantiate a new indexer object with a custom document, component and/or simple switch if present
34
33
  def load_indexer
34
+ options = {}
35
35
  if ENV['CUSTOM_DOCUMENT']
36
36
  raise "Please specify a valid file for your custom document." unless File.exists? ENV['CUSTOM_DOCUMENT']
37
37
  require File.join(Rails.root, ENV['CUSTOM_DOCUMENT'])
38
- custom_document = File.basename(ENV['CUSTOM_DOCUMENT']).split(".").first.classify.constantize
39
- indexer = SolrEad::Indexer.new(:document=>custom_document)
40
- else
41
- indexer = SolrEad::Indexer.new
38
+ options[:document] = File.basename(ENV['CUSTOM_DOCUMENT']).split(".").first.classify.constantize
39
+ end
40
+ if ENV['CUSTOM_COMPONENT']
41
+ raise "Please specify a valid file for your custom component." unless File.exists? ENV['CUSTOM_COMPONENT']
42
+ require File.join(Rails.root, ENV['CUSTOM_COMPONENT'])
43
+ options[:component] = File.basename(ENV['CUSTOM_COMPONENT']).split(".").first.classify.constantize
42
44
  end
45
+ options[:simple] = (ENV['SIMPLE'] or ENV['SIMPLE']=="true") ? true : false
46
+ indexer = SolrEad::Indexer.new(options)
43
47
  return indexer
44
48
  end
@@ -5,6 +5,7 @@ describe SolrEad::Behaviors do
5
5
  before :all do
6
6
  @not_numbered = fixture "ARC-0005.xml"
7
7
  @numbered = fixture "pp002010.xml"
8
+ @messy = fixture "ead_messy_format.xml"
8
9
  class TestClass
9
10
  include SolrEad::Behaviors
10
11
  end
@@ -16,6 +17,7 @@ describe SolrEad::Behaviors do
16
17
  before :all do
17
18
  @non_numbered_nodeset = @test.components(@not_numbered)
18
19
  @numbered_nodeset = @test.components(@numbered)
20
+ @messy_nodeset = @test.components(@messy)
19
21
  end
20
22
 
21
23
  it "should return a nodeset" do
@@ -27,6 +29,11 @@ describe SolrEad::Behaviors do
27
29
  @non_numbered_nodeset.count.should == 135
28
30
  @numbered_nodeset.count.should == 83
29
31
  end
32
+
33
+ it "should find some components even if ead is messily formatted" do
34
+ @messy_nodeset.count.should > 0
35
+ end
36
+
30
37
  end
31
38
 
32
39
  describe "#prep" do
@@ -0,0 +1,27 @@
1
+ <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2
+ <ead xsi:schemaLocation="urn:isbn:1-931666-22-9 http://www.loc.gov/ead/ead.xsd" xmlns:ns2="http://www.w3.org/1999/xlink" xmlns="urn:isbn:1-931666-22-9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <eadheader findaidstatus="Complete" repositoryencoding="iso15511" countryencoding="iso3166-1" dateencoding="iso8601" langencoding="iso639-2b">
3
+ <eadid>sample_ead2</eadid>
4
+ <filedesc></filedesc>
5
+ <profiledesc></profiledesc>
6
+ <revisiondesc></revisiondesc>
7
+ </eadheader>
8
+ <archdesc>
9
+ <dsc>
10
+ <c id="series1" level="series">
11
+ <did>
12
+ <unittitle>sample series</unittitle>
13
+ <unitdate>sample series date</unitdate>
14
+ </did>
15
+ <scopecontent>
16
+ <head>scopecontent heading</head>
17
+ <p>Sample scopecontent text</p>
18
+ </scopecontent>
19
+ </c>
20
+ </dsc>
21
+ </archdesc>
22
+ <!-- Not found in AT EAD -->
23
+ <frontmatter></frontmatter>
24
+ </ead>
25
+
26
+
27
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: solr_ead
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.4.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-04-22 00:00:00.000000000 Z
12
+ date: 2013-05-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: om
@@ -184,6 +184,7 @@ files:
184
184
  - .rvmrc
185
185
  - Gemfile
186
186
  - Gemfile.lock
187
+ - History.txt
187
188
  - LICENSE
188
189
  - README.md
189
190
  - Rakefile
@@ -205,6 +206,7 @@ files:
205
206
  - spec/fixtures/ARC-0005.xml
206
207
  - spec/fixtures/ARC-0161.xml
207
208
  - spec/fixtures/component_template.xml
209
+ - spec/fixtures/ead_messy_format.xml
208
210
  - spec/fixtures/ead_sample.xml
209
211
  - spec/fixtures/ead_template.xml
210
212
  - spec/fixtures/pp002010.xml
@@ -224,7 +226,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
224
226
  version: '0'
225
227
  segments:
226
228
  - 0
227
- hash: 2881959855902156324
229
+ hash: 3066311826305966167
228
230
  required_rubygems_version: !ruby/object:Gem::Requirement
229
231
  none: false
230
232
  requirements:
@@ -233,7 +235,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
233
235
  version: '0'
234
236
  segments:
235
237
  - 0
236
- hash: 2881959855902156324
238
+ hash: 3066311826305966167
237
239
  requirements: []
238
240
  rubyforge_project:
239
241
  rubygems_version: 1.8.23
@@ -247,6 +249,7 @@ test_files:
247
249
  - spec/fixtures/ARC-0005.xml
248
250
  - spec/fixtures/ARC-0161.xml
249
251
  - spec/fixtures/component_template.xml
252
+ - spec/fixtures/ead_messy_format.xml
250
253
  - spec/fixtures/ead_sample.xml
251
254
  - spec/fixtures/ead_template.xml
252
255
  - spec/fixtures/pp002010.xml