stead 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +21 -0
- data/README.rdoc +75 -0
- data/Rakefile +72 -0
- data/VERSION +1 -0
- data/bin/csv2ead +70 -0
- data/examples/ncsu.rb +74 -0
- data/lib/stead/ead.rb +270 -0
- data/lib/stead/error.rb +6 -0
- data/lib/stead/stead.rb +80 -0
- data/lib/stead/templates/ead.xml +44 -0
- data/lib/stead/templates/ead.xsd +2728 -0
- data/lib/stead/templates/ncsu_ead.xml +69 -0
- data/lib/stead.rb +56 -0
- data/test/helper.rb +25 -0
- data/test/test_ead_bad_container_type.rb +42 -0
- data/test/test_ead_no_series.rb +89 -0
- data/test/test_ead_series.rb +42 -0
- data/test/test_stead.rb +43 -0
- metadata +167 -0
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2009 North Carolina State University
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
data/README.rdoc
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
= stead
|
2
|
+
|
3
|
+
Spreadsheets To Encoded Archival Description. Turns CSV files of container lists
|
4
|
+
into a stub EAD XML record.
|
5
|
+
|
6
|
+
== Story
|
7
|
+
|
8
|
+
Sometimes donors have spreadsheets which list the contents of their collections.
|
9
|
+
Rather than retype all of these into Archivists' Toolkit or an XML editor,
|
10
|
+
wouldn't it be nice to automatically generate a stub EAD XML document from the
|
11
|
+
spreadsheet?
|
12
|
+
|
13
|
+
With Stead you can. Just edit the headers (first row of the spreadsheet) to
|
14
|
+
conform to the Stead schema. This may involve splitting some columns to conform
|
15
|
+
to the schema, adding columns, and other editing. All of this is likely easier,
|
16
|
+
faster and more accurate to do in a spreadsheet than trying to do it elsewhere
|
17
|
+
retyping the whole thing.
|
18
|
+
|
19
|
+
Once the spreadsheet is ready just save it as a CSV and use the commandline tool
|
20
|
+
csv2ead to output an EAD XML document. Import into Archivists' Toolkit.
|
21
|
+
|
22
|
+
== Requirements
|
23
|
+
|
24
|
+
Ruby
|
25
|
+
|
26
|
+
== Examples that follow the schema
|
27
|
+
|
28
|
+
Look in test/contianer_lists/ at the following good examples of the CSV schema:
|
29
|
+
mc00000_container_list.csv
|
30
|
+
mc00000_container_list_no_series.csv
|
31
|
+
The order of the columns does not matter, but the headings must be exactly the
|
32
|
+
same case and spaces as those found in these files.
|
33
|
+
|
34
|
+
== Instructions
|
35
|
+
|
36
|
+
Once you have your spreadsheet in the correct schema, do the following:
|
37
|
+
- Save the spreadsheet as a CSV file.
|
38
|
+
- csv2ead --help for current commandline options.
|
39
|
+
|
40
|
+
= Stead::Extra
|
41
|
+
|
42
|
+
From the commandline you can specify a Stead::Extra class which will be required.
|
43
|
+
This class must define a Stead::Extra.run method which accepts an ead and eadid,
|
44
|
+
creates a new Stead::Extra object and then does any further processing you'd
|
45
|
+
like. See examples/ncsu.rb.
|
46
|
+
|
47
|
+
== Support
|
48
|
+
|
49
|
+
Please let me know what else you need in such a tool and I'll try to work it in.
|
50
|
+
|
51
|
+
== Limitations
|
52
|
+
|
53
|
+
- Some of this is still be NCSU and Archivists' Toolkit specific.
|
54
|
+
- This tool has only been used a handful of times so far.
|
55
|
+
- Only works with this specific schema.
|
56
|
+
- Only known to work with series at the c01 level and files at the c02 level.
|
57
|
+
Other deeper levels of nesting will not currently work. ()May work with subseries.)
|
58
|
+
- Column values like series must be duplicated for each row.
|
59
|
+
|
60
|
+
== TODO
|
61
|
+
|
62
|
+
- More tests (though there are already lots of tests).
|
63
|
+
- Better documentation on the CSV file schema.
|
64
|
+
- Rdoc.
|
65
|
+
- Automate tests of csv2ead tool.
|
66
|
+
- Expand the schema to other parts of the EAD?
|
67
|
+
|
68
|
+
== Author
|
69
|
+
|
70
|
+
Jason Ronallo
|
71
|
+
|
72
|
+
== Copyright
|
73
|
+
|
74
|
+
Copyright (c) 2010 North Carolina State University. See LICENSE for details.
|
75
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "stead"
|
8
|
+
gem.summary = %Q{Spreadsheets To Encoded Archival Description}
|
9
|
+
gem.description = %Q{Converts CSV files of a specific schema into EAD XML.}
|
10
|
+
gem.email = "jronallo@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/jronallo/stead"
|
12
|
+
gem.authors = ["Jason Ronallo"]
|
13
|
+
gem.add_dependency "nokogiri", ">= 1.4.1"
|
14
|
+
gem.add_dependency "fastercsv", ">= 1.5.0"
|
15
|
+
gem.add_dependency "activesupport", ">= 2.3.5"
|
16
|
+
gem.add_dependency "trollop", ">= 1.16.2"
|
17
|
+
gem.add_development_dependency "shoulda", ">= 0"
|
18
|
+
gem.files = FileList["[A-Z]*", "{bin,examples,lib}/**/*"]
|
19
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
20
|
+
end
|
21
|
+
Jeweler::GemcutterTasks.new
|
22
|
+
rescue LoadError
|
23
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
24
|
+
end
|
25
|
+
|
26
|
+
require 'rake/testtask'
|
27
|
+
Rake::TestTask.new(:test) do |test|
|
28
|
+
test.libs << 'lib' << 'test'
|
29
|
+
test.pattern = 'test/**/test_*.rb'
|
30
|
+
test.verbose = true
|
31
|
+
end
|
32
|
+
|
33
|
+
begin
|
34
|
+
require 'rcov/rcovtask'
|
35
|
+
Rcov::RcovTask.new do |test|
|
36
|
+
test.libs << 'test'
|
37
|
+
test.pattern = 'test/**/test_*.rb'
|
38
|
+
test.verbose = true
|
39
|
+
end
|
40
|
+
rescue LoadError
|
41
|
+
task :rcov do
|
42
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
task :test => :check_dependencies
|
47
|
+
|
48
|
+
begin
|
49
|
+
require 'reek/adapters/rake_task'
|
50
|
+
Reek::RakeTask.new do |t|
|
51
|
+
t.fail_on_error = true
|
52
|
+
t.verbose = false
|
53
|
+
t.source_files = 'lib/**/*.rb'
|
54
|
+
end
|
55
|
+
rescue LoadError
|
56
|
+
task :reek do
|
57
|
+
abort "Reek is not available. In order to run reek, you must: sudo gem install reek"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
task :default => :test
|
62
|
+
|
63
|
+
require 'rake/rdoctask'
|
64
|
+
Rake::RDocTask.new do |rdoc|
|
65
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
66
|
+
|
67
|
+
rdoc.rdoc_dir = 'rdoc'
|
68
|
+
rdoc.title = "stead #{version}"
|
69
|
+
rdoc.rdoc_files.include('README*')
|
70
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
71
|
+
end
|
72
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.2
|
data/bin/csv2ead
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
require 'pp'
|
4
|
+
require 'stead'
|
5
|
+
require 'trollop'
|
6
|
+
|
7
|
+
opts = Trollop::options do
|
8
|
+
banner <<-EOS
|
9
|
+
This script takes a csv file with a name in the format <eadid>_container_list.csv
|
10
|
+
and creates a stub EAD XML document.
|
11
|
+
|
12
|
+
Usage:
|
13
|
+
csv2ead --csv /path/to/<eadid>_container_list.csv [options]
|
14
|
+
|
15
|
+
where options are:
|
16
|
+
EOS
|
17
|
+
|
18
|
+
opt :csv, "A CSV file", :required => true, :type => String
|
19
|
+
opt :baseurl, 'Base URL for adding on the eadid', :type => String
|
20
|
+
opt :url, 'Full URL for this collection guide', :type => String
|
21
|
+
opt :template, 'Specify using a different EAD XML template', :type => String
|
22
|
+
opt :ncsu, 'Use NCSU specific template'
|
23
|
+
opt :extra, 'Full path to a Stead::Extra file to add in other data', :type => String
|
24
|
+
opt :output, 'Save the file by specifying the filename', :type => String
|
25
|
+
opt :pretty, 'If --output is specified this will pretty indent the container list.'
|
26
|
+
opt :stdout, 'Output full EAD to terminal'
|
27
|
+
end
|
28
|
+
|
29
|
+
unless opts[:output] or opts[:stdout]
|
30
|
+
puts "You must specify either --output <file> and/or --stdout to direct output to the terminal."
|
31
|
+
exit
|
32
|
+
end
|
33
|
+
|
34
|
+
if opts[:ncsu]
|
35
|
+
opts[:template] = File.join(File.dirname(__FILE__), '..', 'lib', 'stead', 'templates', 'ncsu_ead.xml')
|
36
|
+
opts[:baseurl] = 'http://www.lib.ncsu.edu/findingaids'
|
37
|
+
opts[:extra] = File.join(File.dirname(__FILE__), '..', 'examples', 'ncsu.rb')
|
38
|
+
end
|
39
|
+
|
40
|
+
ead_options = {}
|
41
|
+
# add eadid from filename
|
42
|
+
# basename will include _container_list so we need to remove that
|
43
|
+
basename = File.basename(opts[:csv], '.csv')
|
44
|
+
ead_options[:eadid] = basename.sub(/_container_list.*$/, '')
|
45
|
+
ead_options[:base_url] = opts[:baseurl] if opts[:baseurl]
|
46
|
+
[:template, :url].each do |key|
|
47
|
+
ead_options[key] = opts[key] if opts[key]
|
48
|
+
end
|
49
|
+
|
50
|
+
ead_generator = Stead::EadGenerator.from_csv(File.read(opts[:csv]), ead_options)
|
51
|
+
ead = ead_generator.to_ead
|
52
|
+
|
53
|
+
# add any extra content or elements to the EAD before outputting
|
54
|
+
if opts[:extra]
|
55
|
+
require opts[:extra]
|
56
|
+
Stead::Extra.run(ead, ead_options[:eadid])
|
57
|
+
end
|
58
|
+
|
59
|
+
if opts[:output]
|
60
|
+
File.open(opts[:output], 'w') do |fh|
|
61
|
+
if opts[:pretty]
|
62
|
+
fh.puts Stead.pretty_write(ead)
|
63
|
+
else
|
64
|
+
fh.puts ead
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
puts Stead.pretty_write(ead) if opts[:stdout]
|
70
|
+
|
data/examples/ncsu.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
module Stead
|
2
|
+
class Extra
|
3
|
+
attr_accessor :ead, :eadid
|
4
|
+
|
5
|
+
def initialize(ead,eadid)
|
6
|
+
@ead = ead
|
7
|
+
@eadid = eadid
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.run(ead, eadid)
|
11
|
+
extra = self.new(ead,eadid)
|
12
|
+
extra.add_collection_specific
|
13
|
+
ead
|
14
|
+
end
|
15
|
+
|
16
|
+
def add_collection_specific
|
17
|
+
if eadid.include?('ua')
|
18
|
+
# add additional conditions governing use note
|
19
|
+
add_ua_userestrict(ead)
|
20
|
+
append_to_titleproper(ead, eadid, 'Records')
|
21
|
+
archdesc_level(ead, 'subgrp')
|
22
|
+
elsif eadid.include?('mc')
|
23
|
+
append_to_titleproper(ead, eadid, 'Papers')
|
24
|
+
archdesc_level(ead, 'collection')
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def archdesc_level(ead, content)
|
29
|
+
archdesc = ead.xpath('//xmlns:archdesc').first
|
30
|
+
archdesc['level'] = content
|
31
|
+
end
|
32
|
+
|
33
|
+
def add_ua_userestrict(ead)
|
34
|
+
first_userestrict = ead.xpath('//xmlns:userestrict').first
|
35
|
+
userestrict = Nokogiri::XML::Node.new('userestrict', ead)
|
36
|
+
first_userestrict.add_next_sibling(userestrict)
|
37
|
+
head = Nokogiri::XML::Node.new('head', ead)
|
38
|
+
head.content = 'Confidentiality Notice'
|
39
|
+
p = Nokogiri::XML::Node.new('p', ead)
|
40
|
+
p.content = <<EOF
|
41
|
+
This collection may contain materials with sensitive or confidential
|
42
|
+
information that is protected under federal or state right to privacy laws and
|
43
|
+
regulations. Researchers are advised that the disclosure of certain information
|
44
|
+
pertaining to identifiable living individuals represented in this collection
|
45
|
+
without the consent of those individuals may have legal ramifications (e.g.,
|
46
|
+
a cause of action under common law for invasion of privacy may arise if facts
|
47
|
+
concerning an individual's private life are published that would be deemed
|
48
|
+
highly offensive to a reasonable person) for which North Carolina State
|
49
|
+
University assumes no responsibility.
|
50
|
+
EOF
|
51
|
+
userestrict.add_child(head)
|
52
|
+
userestrict.add_child(p)
|
53
|
+
end
|
54
|
+
|
55
|
+
def append_to_titleproper(ead, eadid, text)
|
56
|
+
titleproper = ead.xpath('//xmlns:titleproper').first
|
57
|
+
better_titleproper = titleproper.content.strip.chomp + ' ' + text
|
58
|
+
titleproper.content = better_titleproper
|
59
|
+
num = Nokogiri::XML::Node.new('num', ead)
|
60
|
+
better_num = eadid.upcase.gsub('_', '.')
|
61
|
+
num.content = better_num
|
62
|
+
titleproper.add_child(num)
|
63
|
+
|
64
|
+
# now also add to archdesc did
|
65
|
+
archdesc_did = ead.xpath('//xmlns:archdesc/xmlns:did').first
|
66
|
+
unittitle = archdesc_did.xpath('xmlns:unittitle').first
|
67
|
+
unittitle.content = better_titleproper
|
68
|
+
unitid = archdesc_did.xpath('xmlns:unitid').first
|
69
|
+
unitid.content = better_num
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
data/lib/stead/ead.rb
ADDED
@@ -0,0 +1,270 @@
|
|
1
|
+
module Stead
|
2
|
+
class EadGenerator
|
3
|
+
attr_accessor :csv, :ead, :template, :series, :component_parts
|
4
|
+
|
5
|
+
def initialize(opts = {})
|
6
|
+
@csv = opts[:csv] || nil
|
7
|
+
|
8
|
+
@template = pick_template(opts)
|
9
|
+
@eadid = opts[:eadid] if opts[:eadid]
|
10
|
+
@base_url = opts[:base_url] if opts[:base_url]
|
11
|
+
# component_parts are the rows in the csv file
|
12
|
+
@component_parts = csv_to_a
|
13
|
+
end
|
14
|
+
|
15
|
+
def pick_template(opts)
|
16
|
+
if opts[:template]
|
17
|
+
Nokogiri::XML(File.read(opts[:template]))
|
18
|
+
else
|
19
|
+
Stead.ead_template_xml
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.from_csv(csv, opts={})
|
24
|
+
lines = csv.split(/\r\n|\n/)
|
25
|
+
100.times do
|
26
|
+
lines[0] = lines.first.gsub(',,', ',nothing,')
|
27
|
+
end
|
28
|
+
csv = lines.join("\n")
|
29
|
+
self.new(opts.merge(:csv => csv))
|
30
|
+
end
|
31
|
+
|
32
|
+
def eadid_node
|
33
|
+
@ead.xpath('//xmlns:eadid').first
|
34
|
+
end
|
35
|
+
|
36
|
+
def add_eadid
|
37
|
+
eadid_node.content = @eadid
|
38
|
+
end
|
39
|
+
|
40
|
+
def add_eadid_url
|
41
|
+
if @base_url
|
42
|
+
eadid_node['url'] = File.join(@base_url, @eadid)
|
43
|
+
elsif @url
|
44
|
+
eadid_node['url'] = @url
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def to_ead
|
49
|
+
@ead = template.dup
|
50
|
+
add_eadid
|
51
|
+
add_eadid_url
|
52
|
+
@dsc = @ead.xpath('//xmlns:archdesc/xmlns:dsc')[0]
|
53
|
+
if series?
|
54
|
+
add_series
|
55
|
+
end
|
56
|
+
@component_parts.each do |cp|
|
57
|
+
c = node(file_component_part_name)
|
58
|
+
c['level'] = 'file'
|
59
|
+
c['audience'] = 'internal' if !cp['internal only'].blank?
|
60
|
+
did = node('did')
|
61
|
+
c.add_child(did)
|
62
|
+
add_did_nodes(cp, did)
|
63
|
+
add_containers(cp, did)
|
64
|
+
add_scopecontent(cp, did)
|
65
|
+
add_accessrestrict(cp, did)
|
66
|
+
add_file_component_part(cp, c)
|
67
|
+
end
|
68
|
+
begin
|
69
|
+
valid?
|
70
|
+
rescue Stead::InvalidEad
|
71
|
+
warn "Invalid EAD"
|
72
|
+
ead
|
73
|
+
end
|
74
|
+
ead
|
75
|
+
end
|
76
|
+
|
77
|
+
def add_series
|
78
|
+
add_arrangement
|
79
|
+
series = @component_parts.map do |cp|
|
80
|
+
[cp['series number'], cp['series title'], cp['series dates']]
|
81
|
+
end.uniq
|
82
|
+
series.each do |ser|
|
83
|
+
add_arrangement_item(ser)
|
84
|
+
# create series node and add to dsc
|
85
|
+
series_node = node('c01')
|
86
|
+
@dsc.add_child(series_node)
|
87
|
+
series_node['level'] = 'series'
|
88
|
+
# create series did and add to series node
|
89
|
+
series_did = node('did')
|
90
|
+
series_node.add_child(series_did)
|
91
|
+
unitid = node('unitid')
|
92
|
+
unitid.content = ser[0]
|
93
|
+
unittitle = node('unittitle')
|
94
|
+
unittitle.content = ser[1]
|
95
|
+
unitdate = node('unitdate')
|
96
|
+
unitdate.content = ser[2]
|
97
|
+
series_did.add_child(unitid)
|
98
|
+
series_did.add_child(unittitle)
|
99
|
+
series_did.add_child(unitdate)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def add_arrangement
|
104
|
+
arrangement = node('arrangement')
|
105
|
+
head = node('head')
|
106
|
+
head.content = 'Organization of the Collection'
|
107
|
+
arrangement.add_child(head)
|
108
|
+
p = node('p')
|
109
|
+
p.content = 'This collection is organized into series:'
|
110
|
+
arrangement.add_child(p)
|
111
|
+
list = node('list')
|
112
|
+
p.add_child(list)
|
113
|
+
@dsc.add_previous_sibling(arrangement)
|
114
|
+
end
|
115
|
+
|
116
|
+
def add_arrangement_item(ser)
|
117
|
+
list = @ead.xpath('//xmlns:arrangement/xmlns:p/xmlns:list').first
|
118
|
+
item = node('item')
|
119
|
+
contents = []
|
120
|
+
ser.each do |ser_part|
|
121
|
+
contents << ser_part unless ser_part.blank?
|
122
|
+
end
|
123
|
+
item.content = contents.join(', ')
|
124
|
+
list.add_child(item)
|
125
|
+
end
|
126
|
+
|
127
|
+
# metadata is a hash from the @component_part and c is the actual node
|
128
|
+
def add_file_component_part(metadata, c)
|
129
|
+
if series?
|
130
|
+
current_series = find_current_series(metadata)
|
131
|
+
current_series.add_child(c)
|
132
|
+
else
|
133
|
+
@dsc.add_child(c)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def find_current_series(cp)
|
138
|
+
series_title = cp['series title']
|
139
|
+
@ead.xpath("//xmlns:c01/xmlns:did/xmlns:unittitle").each do |node|
|
140
|
+
return node.parent.parent if node.content == series_title
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def file_component_part_name
|
145
|
+
if series?
|
146
|
+
'c02'
|
147
|
+
else
|
148
|
+
'c01'
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def add_did_nodes(cp, did)
|
153
|
+
field_map.each do |header, element|
|
154
|
+
if !cp[header].blank?
|
155
|
+
if element.is_a? String
|
156
|
+
node = node(element)
|
157
|
+
node.content = cp[header]
|
158
|
+
did.add_child(node)
|
159
|
+
elsif element.is_a? Array
|
160
|
+
node1 = node(element[0])
|
161
|
+
did.add_child(node1)
|
162
|
+
node2 = node(element[1])
|
163
|
+
node1.add_child(node2)
|
164
|
+
node2.content = cp[header]
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def add_containers(cp, did)
|
171
|
+
['1', '2', '3'].each do |container_number|
|
172
|
+
container_type = cp['container ' + container_number + ' type']
|
173
|
+
container_number = cp['container ' + container_number + ' number']
|
174
|
+
if !container_type.blank? and !container_number.blank?
|
175
|
+
unless valid_container_type?(container_type)
|
176
|
+
raise Stead::InvalidContainerType, container_type
|
177
|
+
end
|
178
|
+
container = node('container')
|
179
|
+
container['type'] = container_type
|
180
|
+
container['label'] = cp['instance type'] if cp['instance type']
|
181
|
+
container.content = container_number
|
182
|
+
did.add_child(container)
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def valid_container_type?(container_type)
|
188
|
+
if Stead::CONTAINER_TYPES.include?(container_type)
|
189
|
+
return true
|
190
|
+
else
|
191
|
+
return false
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def add_scopecontent(cp, did)
|
196
|
+
unless cp['scopecontent'].blank?
|
197
|
+
scopecontent = node('scopecontent')
|
198
|
+
p = node('p')
|
199
|
+
p.content = cp['scopecontent']
|
200
|
+
scopecontent.add_child(p)
|
201
|
+
did.add_next_sibling(scopecontent)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
def add_accessrestrict(cp, did)
|
206
|
+
unless cp['conditions governing access'].blank?
|
207
|
+
accessrestrict = node('accessrestrict')
|
208
|
+
p = node('p')
|
209
|
+
p.content = cp['conditions governing access']
|
210
|
+
accessrestrict.add_child(p)
|
211
|
+
did.add_next_sibling(accessrestrict)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
def node(element)
|
216
|
+
Nokogiri::XML::Node.new(element, @ead)
|
217
|
+
end
|
218
|
+
|
219
|
+
def field_map
|
220
|
+
{'file id' => 'unitid',
|
221
|
+
'file title' => 'unittitle',
|
222
|
+
'file dates' => 'unitdate',
|
223
|
+
'extent' => ['physdesc', 'extent'],
|
224
|
+
'note1' => ['note', 'p'],
|
225
|
+
'note2' => ['note', 'p']
|
226
|
+
}
|
227
|
+
end
|
228
|
+
|
229
|
+
def csv_to_a
|
230
|
+
a = []
|
231
|
+
FasterCSV.parse(csv, :headers => :first_row) do |row|
|
232
|
+
a << row.to_hash
|
233
|
+
end
|
234
|
+
if a.first.keys.include?(nil)
|
235
|
+
raise Stead::InvalidCsv
|
236
|
+
end
|
237
|
+
# TODO invalid if the last row is blank
|
238
|
+
# a.sort_by do |row|
|
239
|
+
# [
|
240
|
+
# row['series number'] || 'z',
|
241
|
+
# row['subseries number'] || 'z',
|
242
|
+
# row['container 1 number'] || 'z',
|
243
|
+
# row['container 2 number'] || 'z',
|
244
|
+
# row['file title'] || 'z'
|
245
|
+
# ]
|
246
|
+
# end
|
247
|
+
a
|
248
|
+
end
|
249
|
+
|
250
|
+
def valid?
|
251
|
+
unless Stead.xsd.valid?(ead)
|
252
|
+
raise Stead::InvalidEad
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
def series?
|
257
|
+
if series_found?
|
258
|
+
series = true
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def series_found?
|
263
|
+
@component_parts.each do |row|
|
264
|
+
return false if row['series number'].blank?
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
data/lib/stead/error.rb
ADDED
data/lib/stead/stead.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
module Stead
|
2
|
+
|
3
|
+
def self.ead_schema
|
4
|
+
File.expand_path(File.join(File.dirname(__FILE__), 'templates','ead.xsd'))
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.xsd
|
8
|
+
Nokogiri::XML::Schema(File.read(Stead.ead_schema))
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.ead_template
|
12
|
+
File.expand_path(File.join(File.dirname(__FILE__), 'templates','ead.xml'))
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.ead_template_xml
|
16
|
+
Nokogiri::XML(File.read(self.ead_template))
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.pretty_write(xml)
|
20
|
+
if xml.is_a? String
|
21
|
+
self.write(xml)
|
22
|
+
elsif xml.is_a? Nokogiri::XML::Document or xml.is_a? Nokogiri::XML::Node
|
23
|
+
self.write(xml.to_xml)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.write(buffer)
|
28
|
+
|
29
|
+
xsl =<<XSL
|
30
|
+
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
31
|
+
<xsl:output method="xml" encoding="UTF-8"/>
|
32
|
+
<xsl:param name="indent-increment" select="' '"/>
|
33
|
+
<xsl:template name="newline">
|
34
|
+
<xsl:text disable-output-escaping="yes">
|
35
|
+
</xsl:text>
|
36
|
+
</xsl:template>
|
37
|
+
<xsl:template match="comment() | processing-instruction()">
|
38
|
+
<xsl:param name="indent" select="''"/>
|
39
|
+
<xsl:call-template name="newline"/>
|
40
|
+
<xsl:value-of select="$indent"/>
|
41
|
+
<xsl:copy />
|
42
|
+
</xsl:template>
|
43
|
+
<xsl:template match="text()">
|
44
|
+
<xsl:param name="indent" select="''"/>
|
45
|
+
<xsl:call-template name="newline"/>
|
46
|
+
<xsl:value-of select="$indent"/>
|
47
|
+
<xsl:value-of select="normalize-space(.)"/>
|
48
|
+
</xsl:template>
|
49
|
+
<xsl:template match="text()[normalize-space(.)='']"/>
|
50
|
+
<xsl:template match="*">
|
51
|
+
<xsl:param name="indent" select="''"/>
|
52
|
+
<xsl:call-template name="newline"/>
|
53
|
+
<xsl:value-of select="$indent"/>
|
54
|
+
<xsl:choose>
|
55
|
+
<xsl:when test="count(child::*) > 0">
|
56
|
+
<xsl:copy>
|
57
|
+
<xsl:copy-of select="@*"/>
|
58
|
+
<xsl:apply-templates select="*|text()">
|
59
|
+
<xsl:with-param name="indent" select="concat ($indent, $indent-increment)"/>
|
60
|
+
</xsl:apply-templates>
|
61
|
+
<xsl:call-template name="newline"/>
|
62
|
+
<xsl:value-of select="$indent"/>
|
63
|
+
</xsl:copy>
|
64
|
+
</xsl:when>
|
65
|
+
<xsl:otherwise>
|
66
|
+
<xsl:copy-of select="."/>
|
67
|
+
</xsl:otherwise>
|
68
|
+
</xsl:choose>
|
69
|
+
</xsl:template>
|
70
|
+
</xsl:stylesheet>
|
71
|
+
XSL
|
72
|
+
|
73
|
+
doc = Nokogiri::XML(buffer)
|
74
|
+
xslt = Nokogiri::XSLT(xsl)
|
75
|
+
out = xslt.transform(doc)
|
76
|
+
out.to_xml
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|