solr_ead 0.4.4 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- solr_ead (0.4.4)
4
+ solr_ead (0.4.5)
5
5
  om
6
6
  rsolr
7
7
  sanitize
data/History.txt ADDED
@@ -0,0 +1,6 @@
1
+ 0.4.5
2
+ New Features:
3
+ - Ability to pass in a custom component and the simple param to the rake task
4
+
5
+ Fixes:
6
+ - fixed greedy regex to allow for "messier" ead xml files
data/README.md CHANGED
@@ -39,6 +39,8 @@ Or install it yourself:
39
39
  ## Usage
40
40
 
41
41
  $ rake solr_ead:index FILE=/path/to/your/ead.xml
42
+ $ rake solr_ead:index_dir DIR=/path/to/your/eads SIMPLE=true
43
+ $ rake solr_ead:index_dir DIR=/path/to/your/eads SOLR_URL=http://127.0.0.1:8983
42
44
 
43
45
  You can also do this via the command line:
44
46
 
@@ -121,9 +123,29 @@ following content:
121
123
  From the console, index you ead document using your new definition.
122
124
 
123
125
  > file = "path/to/ead.xml"
124
- > indexer = SolrEad::Indexer.new(:document=>"CustomDocument")
126
+ > indexer = SolrEad::Indexer.new(:document=>CustomDocument)
125
127
  > indexer.create(file)
126
128
 
129
+ Or index from the rake task
130
+
131
+ $ rake solr_ead:index FILE=path/to/file.xml CUSTOM_DOCUMENT=path/to/custom_document.rb
132
+
133
+ ### Writing a custom component definition
134
+
135
+ Similar to the custom document definition, you can create a custom component definition for component indexing:
136
+
137
+ class CustomComponent < SolrEad::Component
138
+ ...
139
+ end
140
+
141
+ Call this from the console
142
+
143
+ > indexer = SolrEad::Indexer.new(:document=>CustomDocument, :component=>CustomComponent)
144
+
145
+ Or from the rake task
146
+
147
+ $ rake solr_ead:index FILE=path/to/file.xml CUSTOM_DOCUMENT=path/to/custom_document.rb CUSTOM_COMPONENT=path/to/custom_component.rb
148
+
127
149
  ### Adding custom methods
128
150
 
129
151
  Suppose you want to add some custom methods that perform additional manipulations of
@@ -7,7 +7,7 @@ module SolrEad::Behaviors
7
7
  # It'll make an attempt at substituting numbered component levels for non-numbered
8
8
  # ones.
9
9
  def components(file)
10
- raw = File.read(file).gsub!(/xmlns=".*"/, '')
10
+ raw = File.read(file).gsub!(/xmlns="(.*?)"/, '')
11
11
  raw.gsub!(/c[0-9]{2,2}/,"c")
12
12
  xml = Nokogiri::XML(raw)
13
13
  return xml.xpath("//c")
@@ -1,3 +1,3 @@
1
1
  module SolrEad
2
- VERSION = "0.4.4"
2
+ VERSION = "0.4.5"
3
3
  end
@@ -29,16 +29,20 @@ end
29
29
 
30
30
  # Set up a new indexer object
31
31
  #
32
- # If CUSTOM_DOCUMENT is present, require the file and instantiate the indexer with it
33
- # Otherwise instantiate a default indexer
32
+ # Instantiate a new indexer object with a custom document, component and/or simple switch if present
34
33
  def load_indexer
34
+ options = {}
35
35
  if ENV['CUSTOM_DOCUMENT']
36
36
  raise "Please specify a valid file for your custom document." unless File.exists? ENV['CUSTOM_DOCUMENT']
37
37
  require File.join(Rails.root, ENV['CUSTOM_DOCUMENT'])
38
- custom_document = File.basename(ENV['CUSTOM_DOCUMENT']).split(".").first.classify.constantize
39
- indexer = SolrEad::Indexer.new(:document=>custom_document)
40
- else
41
- indexer = SolrEad::Indexer.new
38
+ options[:document] = File.basename(ENV['CUSTOM_DOCUMENT']).split(".").first.classify.constantize
39
+ end
40
+ if ENV['CUSTOM_COMPONENT']
41
+ raise "Please specify a valid file for your custom component." unless File.exists? ENV['CUSTOM_COMPONENT']
42
+ require File.join(Rails.root, ENV['CUSTOM_COMPONENT'])
43
+ options[:component] = File.basename(ENV['CUSTOM_COMPONENT']).split(".").first.classify.constantize
42
44
  end
45
+ options[:simple] = (ENV['SIMPLE'] or ENV['SIMPLE']=="true") ? true : false
46
+ indexer = SolrEad::Indexer.new(options)
43
47
  return indexer
44
48
  end
@@ -5,6 +5,7 @@ describe SolrEad::Behaviors do
5
5
  before :all do
6
6
  @not_numbered = fixture "ARC-0005.xml"
7
7
  @numbered = fixture "pp002010.xml"
8
+ @messy = fixture "ead_messy_format.xml"
8
9
  class TestClass
9
10
  include SolrEad::Behaviors
10
11
  end
@@ -16,6 +17,7 @@ describe SolrEad::Behaviors do
16
17
  before :all do
17
18
  @non_numbered_nodeset = @test.components(@not_numbered)
18
19
  @numbered_nodeset = @test.components(@numbered)
20
+ @messy_nodeset = @test.components(@messy)
19
21
  end
20
22
 
21
23
  it "should return a nodeset" do
@@ -27,6 +29,11 @@ describe SolrEad::Behaviors do
27
29
  @non_numbered_nodeset.count.should == 135
28
30
  @numbered_nodeset.count.should == 83
29
31
  end
32
+
33
+ it "should find some components even if ead is messily formatted" do
34
+ @messy_nodeset.count.should > 0
35
+ end
36
+
30
37
  end
31
38
 
32
39
  describe "#prep" do
@@ -0,0 +1,27 @@
1
+ <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2
+ <ead xsi:schemaLocation="urn:isbn:1-931666-22-9 http://www.loc.gov/ead/ead.xsd" xmlns:ns2="http://www.w3.org/1999/xlink" xmlns="urn:isbn:1-931666-22-9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <eadheader findaidstatus="Complete" repositoryencoding="iso15511" countryencoding="iso3166-1" dateencoding="iso8601" langencoding="iso639-2b">
3
+ <eadid>sample_ead2</eadid>
4
+ <filedesc></filedesc>
5
+ <profiledesc></profiledesc>
6
+ <revisiondesc></revisiondesc>
7
+ </eadheader>
8
+ <archdesc>
9
+ <dsc>
10
+ <c id="series1" level="series">
11
+ <did>
12
+ <unittitle>sample series</unittitle>
13
+ <unitdate>sample series date</unitdate>
14
+ </did>
15
+ <scopecontent>
16
+ <head>scopecontent heading</head>
17
+ <p>Sample scopecontent text</p>
18
+ </scopecontent>
19
+ </c>
20
+ </dsc>
21
+ </archdesc>
22
+ <!-- Not found in AT EAD -->
23
+ <frontmatter></frontmatter>
24
+ </ead>
25
+
26
+
27
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: solr_ead
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.4.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-04-22 00:00:00.000000000 Z
12
+ date: 2013-05-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: om
@@ -184,6 +184,7 @@ files:
184
184
  - .rvmrc
185
185
  - Gemfile
186
186
  - Gemfile.lock
187
+ - History.txt
187
188
  - LICENSE
188
189
  - README.md
189
190
  - Rakefile
@@ -205,6 +206,7 @@ files:
205
206
  - spec/fixtures/ARC-0005.xml
206
207
  - spec/fixtures/ARC-0161.xml
207
208
  - spec/fixtures/component_template.xml
209
+ - spec/fixtures/ead_messy_format.xml
208
210
  - spec/fixtures/ead_sample.xml
209
211
  - spec/fixtures/ead_template.xml
210
212
  - spec/fixtures/pp002010.xml
@@ -224,7 +226,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
224
226
  version: '0'
225
227
  segments:
226
228
  - 0
227
- hash: 2881959855902156324
229
+ hash: 3066311826305966167
228
230
  required_rubygems_version: !ruby/object:Gem::Requirement
229
231
  none: false
230
232
  requirements:
@@ -233,7 +235,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
233
235
  version: '0'
234
236
  segments:
235
237
  - 0
236
- hash: 2881959855902156324
238
+ hash: 3066311826305966167
237
239
  requirements: []
238
240
  rubyforge_project:
239
241
  rubygems_version: 1.8.23
@@ -247,6 +249,7 @@ test_files:
247
249
  - spec/fixtures/ARC-0005.xml
248
250
  - spec/fixtures/ARC-0161.xml
249
251
  - spec/fixtures/component_template.xml
252
+ - spec/fixtures/ead_messy_format.xml
250
253
  - spec/fixtures/ead_sample.xml
251
254
  - spec/fixtures/ead_template.xml
252
255
  - spec/fixtures/pp002010.xml