mods 0.0.2 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,41 @@
1
+ module Mods
2
+ class Reader
3
+
4
+ DEFAULT_NS_AWARE = false
5
+
6
+ # true if the XML parsing should be strict about using namespaces.
7
+ attr_accessor :namespace_aware
8
+ attr_reader :mods_ng_xml
9
+
10
+ # @param ns_aware true if the XML parsing should be strict about using namespaces. Default is false
11
+ def initialize(ns_aware = DEFAULT_NS_AWARE)
12
+ @namespace_aware = ns_aware
13
+ end
14
+
15
+ # @param str - a string containing mods xml
16
+ # @return a Nokogiri::XML::Document object
17
+ def from_str(str)
18
+ @mods_ng_xml = Nokogiri::XML(str)
19
+ normalize_mods
20
+ @mods_ng_xml
21
+ end
22
+
23
+ # @param url (String) - url that has mods xml as its content
24
+ # @return a Nokogiri::XML::Document object
25
+ def from_url(url, encoding = nil, options = Nokogiri::XML::ParseOptions::DEFAULT_XML)
26
+ require 'open-uri'
27
+ @mods_ng_xml = Nokogiri::XML(open(url).read)
28
+ normalize_mods
29
+ @mods_ng_xml
30
+ end
31
+
32
+ # Whatever we get, normalize it into a Nokogiri::XML::Document,
33
+ # strip any elements enclosing the mods record
34
+ def normalize_mods
35
+ if !@namespace_aware
36
+ @mods_ng_xml.remove_namespaces!
37
+ end
38
+ end
39
+
40
+ end # class
41
+ end # module
@@ -0,0 +1,145 @@
1
+ require 'iso-639'
2
+
3
+ module Mods
4
+
5
+ class Record
6
+
7
+ attr_reader :mods_ng_xml
8
+ # string to use when combining a title and subtitle, e.g.
9
+ # for title "MODS" and subtitle "Metadata Odious Delimited Stuff" and delimiter " : "
10
+ # we get "MODS : Metadata Odious Delimited Stuff"
11
+ attr_accessor :title_delimiter
12
+
13
+ NS_HASH = {'m' => MODS_NS_V3}
14
+
15
+ ATTRIBUTES = ['id', 'version']
16
+
17
+ # @param (String) what to use when combining a title and subtitle, e.g.
18
+ # for title "MODS" and subtitle "Metadata Odious Delimited Stuff" and delimiter " : "
19
+ # we get "MODS : Metadata Odious Delimited Stuff"
20
+ def initialize(title_delimiter = Mods::TitleInfo::DEFAULT_TITLE_DELIM)
21
+ @title_delimiter = title_delimiter
22
+ end
23
+
24
+ # convenience method to call Mods::Reader.new.from_str and to nom
25
+ # @param ns_aware true if the XML parsing should be strict about using namespaces. Default is false
26
+ # @param str - a string containing mods xml
27
+ def from_str(str, ns_aware = false)
28
+ @mods_ng_xml = Mods::Reader.new(ns_aware).from_str(str)
29
+ if ns_aware
30
+ set_terminology_ns(@mods_ng_xml)
31
+ else
32
+ set_terminology_no_ns(@mods_ng_xml)
33
+ end
34
+ end
35
+
36
+ # convenience method to call Mods::Reader.new.from_url and to nom
37
+ # @param ns_aware true if the XML parsing should be strict about using namespaces. Default is false
38
+ # @param url (String) - url that has mods xml as its content
39
+ def from_url(url, namespace_aware = false)
40
+ @mods_ng_xml = Mods::Reader.new(ns_aware).from_url(url)
41
+ if ns_aware
42
+ set_terminology_ns(@mods_ng_xml)
43
+ else
44
+ set_terminology_no_ns(@mods_ng_xml)
45
+ end
46
+ end
47
+
48
+ # @return Array of Strings, each containing the text contents of <mods><titleInfo> <nonSort> + ' ' + <title> elements
49
+ # but not including any titleInfo elements with type="alternative"
50
+ def short_titles
51
+ @mods_ng_xml.title_info.short_title.map { |n| n }
52
+ end
53
+
54
+ # @return Array of Strings, each containing the text contents of <mods><titleInfo> <nonSort> + ' ' + <title> + (delim) + <subTitle> elements
55
+ def full_titles
56
+ @mods_ng_xml.title_info.full_title.map { |n| n }
57
+ end
58
+
59
+ # @return Array of Strings, each containing the text contents of <mods><titleInfo @type="alternative"><title> elements
60
+ def alternative_titles
61
+ @mods_ng_xml.title_info.alternative_title.map { |n| n }
62
+ end
63
+
64
+ # @return String containing sortable title for this mods record
65
+ def sort_title
66
+ @mods_ng_xml.title_info.sort_title.find { |n| !n.nil? }
67
+ end
68
+
69
+
70
+ # use the displayForm of a personal name if present
71
+ # if no displayForm, try to make a string from family name and given name "family_name, given_name"
72
+ # otherwise, return all nameParts concatenated together
73
+ # @return Array of Strings, each containing the above described string
74
+ def personal_names
75
+ @mods_ng_xml.personal_name.map { |n|
76
+ if n.displayForm.size > 0
77
+ n.displayForm.text
78
+ elsif n.family_name.size > 0
79
+ n.given_name.size > 0 ? n.family_name.text + ', ' + n.given_name.text : n.family_name.text
80
+ else
81
+ n.namePart.text
82
+ end
83
+ }
84
+ end
85
+
86
+ # use the displayForm of a corporate name if present
87
+ # otherwise, return all nameParts concatenated together
88
+ # @return Array of Strings, each containing the above described string
89
+ def corporate_names
90
+ @mods_ng_xml.corporate_name.map { |n|
91
+ if n.displayForm.size > 0
92
+ n.displayForm.text
93
+ else
94
+ n.namePart.text
95
+ end
96
+ }
97
+ end
98
+
99
+ # Translates iso-639 language codes, and leaves everything else alone.
100
+ # @return Array of Strings, each a (hopefully English) name of a language
101
+ def languages
102
+ result = []
103
+ @mods_ng_xml.language.each { |n|
104
+ # get languageTerm codes and add their translations to the result
105
+ n.code_term.each { |ct|
106
+ if ct.authority.match(/^iso639/)
107
+ begin
108
+ vals = ct.text.split(/[,|\ ]/).reject {|x| x.strip.length == 0 }
109
+ vals.each do |v|
110
+ result << ISO_639.find(v.strip).english_name
111
+ end
112
+ rescue => e
113
+ p "Couldn't find english name for #{code.text}"
114
+ result << ct.text
115
+ end
116
+ else
117
+ result << ct.text
118
+ end
119
+ }
120
+ # add languageTerm text values
121
+ n.text_term.each { |tt|
122
+ val = tt.text.strip
123
+ result << val if val.length > 0
124
+ }
125
+
126
+ # add language values that aren't in languageTerm subelement
127
+ if n.languageTerm.size == 0
128
+ result << n.text
129
+ end
130
+ }
131
+ result.uniq
132
+ end
133
+
134
+
135
+ def method_missing method_name, *args
136
+ if mods_ng_xml.respond_to?(method_name)
137
+ mods_ng_xml.send(method_name, *args)
138
+ else
139
+ super.method_missing(method_name, *args)
140
+ end
141
+ end
142
+
143
+ end # class Record
144
+
145
+ end # module Mods
@@ -0,0 +1,24 @@
1
+ module Mods
2
+ # NAOMI_MUST_COMMENT_THIS_CLASS
3
+ class TitleInfo
4
+ # attr_reader :ng_node
5
+
6
+ NS_HASH = {'m' => MODS_NS_V3}
7
+ SUBELEMENTS = ['title', 'subTitle', 'partNumber', 'partName', 'nonSort']
8
+
9
+ # attributes on titleInfo node
10
+ ATTRIBUTES = ['type', 'authority', 'authorityURI', 'valueURI', 'displayLabel', 'supplied', 'usage', 'altRepGroup', 'nameTitleGroup']
11
+
12
+ # valid values for type attribute on titleInfo node <titleInfo type="val">
13
+ TYPES = ['abbreviated', 'translated', 'alternative', 'uniform']
14
+
15
+ DEFAULT_TITLE_DELIM = ' '
16
+
17
+ # @param (Nokogiri::XML::Node) mods:titleInfo node
18
+ # def initialize(title_info_node)
19
+ # @ng_node = title_info_node
20
+ # end
21
+
22
+ end
23
+
24
+ end
@@ -1,3 +1,4 @@
1
1
  module Mods
2
- VERSION = "0.0.2"
2
+ # this is the Ruby Gem version
3
+ VERSION = "0.0.4"
3
4
  end
@@ -3,21 +3,27 @@ require File.expand_path('../lib/mods/version', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |gem|
5
5
  gem.name = "mods"
6
- gem.authors = ["Naomi Dushay", "Bess Sadler"]
7
6
  gem.version = Mods::VERSION
7
+ gem.authors = ["Naomi Dushay", "Bess Sadler"]
8
8
  gem.email = ["ndushay AT stanford.edu", "bess AT stanford.edu"]
9
- gem.description = %q{A Ruby gem to parse MODS (Metadata Object Description Schema) records}
10
- gem.summary = %q{A Ruby gem to parse MODS (Metadata Object Description Schema) records. More information about MODS can be found at http://www.loc.gov/standards/mods/registry.php.}
11
- gem.homepage = ""
9
+ gem.description = "Parse MODS (Metadata Object Description Schema) records. More information about MODS can be found at http://www.loc.gov/standards/mods/"
10
+ gem.summary = "Parse MODS (Metadata Object Description Schema) records."
11
+ gem.homepage = "https://github.com/sul-dlss/mods"
12
12
 
13
+ gem.extra_rdoc_files = ["LICENSE", "README.rdoc"]
13
14
  gem.files = `git ls-files`.split($\)
14
15
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
15
- gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
16
+ gem.test_files = gem.files.grep(%r{^spec/})
16
17
  gem.require_paths = ["lib"]
17
18
 
18
- # Runtime dependencies
19
19
  gem.add_dependency 'nokogiri'
20
+ gem.add_dependency 'nom-xml'
21
+ gem.add_dependency 'iso-639'
22
+
23
+ # Runtime dependencies
24
+ # gem.add_runtime_dependency 'nokogiri'
20
25
 
26
+ # Development dependencies
21
27
  # Bundler will install these gems too if you've checked out solrmarc-wrapper source from git and run 'bundle install'
22
28
  # It will not add these as dependencies if you require solrmarc-wrapper for other projects
23
29
  gem.add_development_dependency "rake"
@@ -0,0 +1,69 @@
1
+ require 'spec_helper'
2
+
3
+ describe "Mods <language> Element" do
4
+ before(:all) do
5
+ @mods_rec = Mods::Record.new
6
+ @simple = '<mods><language>Greek</language></mods>'
7
+ @iso639_2b_code = '<mods><language><languageTerm authority="iso639-2b" type="code">fre</languageTerm></language></mods>'
8
+ @iso639_2b_text = '<mods><language><languageTerm authority="iso639-2b" type="text">English</languageTerm></language></mods>'
9
+ @mult_codes = '<mods><language><languageTerm authority="iso639-2b" type="code">per ara, dut</languageTerm></language></mods>'
10
+ @mult_code_terms = '<mods><language><languageTerm authority="iso639-2b" type="code">spa</languageTerm><languageTerm authority="iso639-2b" type="code">dut</languageTerm></language></mods>'
11
+ @mult_text_terms = '<mods><language><languageTerm authority="iso639-2b" type="text">Chinese</languageTerm><languageTerm authority="iso639-2b" type="text">Spanish</languageTerm></language></mods>'
12
+ end
13
+
14
+ context "basic language terminology pieces" do
15
+ before(:all) do
16
+ @mods_rec.from_str(@iso639_2b_code)
17
+ end
18
+ it "should understand languageTerm.type_at attribute" do
19
+ @mods_rec.language.languageTerm.type_at.should == ["code"]
20
+ end
21
+ it "should understand languageTerm.authority attribute" do
22
+ @mods_rec.language.languageTerm.authority.should == ["iso639-2b"]
23
+ end
24
+ it "should understand languageTerm value" do
25
+ @mods_rec.language.languageTerm.text.should == "fre"
26
+ @mods_rec.language.languageTerm.size.should == 1
27
+ end
28
+ it "should get one language.code_term for each languageTerm element with a type attribute of 'code'" do
29
+ @mods_rec.language.code_term.size.should == 1
30
+ @mods_rec.language.code_term.text.should == "fre"
31
+ @mods_rec.from_str(@mult_code_terms)
32
+ @mods_rec.language.code_term.size.should == 2
33
+ @mods_rec.language.code_term.first.text.should include("spa")
34
+ @mods_rec.language.code_term[1].text.should == "dut"
35
+ end
36
+ it "should get one language.text_term for each languageTerm element with a type attribute of 'text'" do
37
+ @mods_rec.from_str(@mult_text_terms)
38
+ @mods_rec.language.text_term.size.should == 2
39
+ @mods_rec.language.text_term.first.text.should include("Chinese")
40
+ @mods_rec.language.text_term[1].text.should == "Spanish"
41
+ end
42
+ end
43
+
44
+ context "Mods::Record.languages convenience method" do
45
+
46
+ it "should translate iso639-2b codes to English" do
47
+ @mods_rec.from_str(@iso639_2b_code)
48
+ @mods_rec.languages.should == ["French"]
49
+ end
50
+
51
+ it "should pass thru language values that are already text (not code)" do
52
+ @mods_rec.from_str(@iso639_2b_text)
53
+ @mods_rec.languages.should == ["English"]
54
+ end
55
+
56
+ it "should keep values that are not inside <languageTerm> elements" do
57
+ @mods_rec.from_str(@simple)
58
+ @mods_rec.languages.should == ["Greek"]
59
+ end
60
+
61
+ it "should create a separate value for each language in a comma, space, or | separated list " do
62
+ @mods_rec.from_str(@mult_codes)
63
+ @mods_rec.languages.should include("Arabic")
64
+ @mods_rec.languages.should include("Persian")
65
+ @mods_rec.languages.should include("Dutch; Flemish")
66
+ end
67
+ end
68
+
69
+ end
@@ -0,0 +1,148 @@
1
+ require 'spec_helper'
2
+
3
+ describe "Mods <location> Element" do
4
+ before(:all) do
5
+ @mods_rec = Mods::Record.new
6
+ @url_and_phys = '<mods><location>
7
+ <url displayLabel="Digital collection of 46 images available online" usage="primary display">http://searchworks.stanford.edu/?f%5Bcollection%5D%5B%5D=The+Reid+W.+Dennis+Collection+of+California+Lithographs&amp;view=gallery</url>
8
+ </location><location>
9
+ <physicalLocation>Department of Special Collections, Stanford University Libraries, Stanford, CA 94305.</physicalLocation>
10
+ </location></mods>'
11
+ # from http://www.loc.gov/standards/mods/v3/mods-userguide-elements.html !!
12
+ # sublocation is not allowed directly under location
13
+ @incorrect = '<mods><location>
14
+ <physicalLocation>Library of Congress </physicalLocation>
15
+ <sublocation>Prints and Photographs Division Washington, D.C. 20540 USA</sublocation>
16
+ <shelfLocator>DAG no. 1410</shelfLocator>
17
+ </location></mods>'
18
+ end
19
+
20
+ context "basic location terminology pieces" do
21
+ before(:all) do
22
+ @mods_rec.from_str(@form_and_extent)
23
+ end
24
+
25
+ context "physicalLocation child element" do
26
+ before(:all) do
27
+ @phys_loc_only = '<mods><location><physicalLocation>here</physicalLocation></location></mods>'
28
+ @phys_loc_authority = '<mods><location><physicalLocation authority="marcorg">MnRM</physicalLocation></location></mods>'
29
+ end
30
+ it "should have access to text value of element" do
31
+ @mods_rec.from_str(@phys_loc_only)
32
+ @mods_rec.location.physicalLocation.text.should == "here"
33
+ @mods_rec.from_str(@phys_loc_authority)
34
+ @mods_rec.location.physicalLocation.map { |n| n.text }.should == ["MnRM"]
35
+ end
36
+ it "should recognize authority attribute" do
37
+ @mods_rec.from_str(@phys_loc_authority)
38
+ @mods_rec.location.physicalLocation.authority.should == ["marcorg"]
39
+ end
40
+ it "should recognize displayLabel attribute" do
41
+ @mods_rec.from_str('<mods><location><physicalLocation displayLabel="Correspondence">some address</physicalLocation></location></mods>')
42
+ @mods_rec.location.physicalLocation.displayLabel.should == ["Correspondence"]
43
+ end
44
+ end
45
+
46
+ it "shelfLocator child element" do
47
+ shelf_loc = '<mods><location>
48
+ <physicalLocation>Library of Congress </physicalLocation>
49
+ <shelfLocator>DAG no. 1410</shelfLocator>
50
+ </location></mods>'
51
+ @mods_rec.from_str(shelf_loc)
52
+ @mods_rec.location.shelfLocator.map { |n| n.text }.should == ["DAG no. 1410"]
53
+ end
54
+
55
+ context "url child element" do
56
+ before(:all) do
57
+ @empty_loc_url = '<mods><location><url/></location></mods>'
58
+ @mult_flavor_loc_urls = '<mods><location>
59
+ <url access="preview">http://preview.org</url>
60
+ <url access="object in context">http://context.org</url>
61
+ <url access="raw object">http://object.org</url>
62
+ </location></mods>'
63
+ end
64
+ it "should have access to text value of element" do
65
+ urls = @mods_rec.from_str(@mult_flavor_loc_urls).location.url.map { |e| e.text }
66
+ urls.size.should == 3
67
+ urls.should include("http://preview.org")
68
+ urls.should include("http://context.org")
69
+ urls.should include("http://object.org")
70
+ end
71
+ context "attributes" do
72
+ before(:all) do
73
+ @url_attribs = '<mods><location>
74
+ <url displayLabel="Digital collection of 46 images available online" usage="primary display">http://searchworks.stanford.edu/?f%5Bcollection%5D%5B%5D=The+Reid+W.+Dennis+Collection+of+California+Lithographs&amp;view=gallery</url>
75
+ </location></mods>'
76
+ end
77
+ it "should recognize displayLabel attribute" do
78
+ @mods_rec.from_str(@url_attribs).location.url.displayLabel.should == ["Digital collection of 46 images available online"]
79
+ end
80
+ it "should recognize access attribute" do
81
+ vals = @mods_rec.from_str(@mult_flavor_loc_urls).location.url.access
82
+ vals.size.should == 3
83
+ vals.should include("preview")
84
+ vals.should include("object in context")
85
+ vals.should include("raw object")
86
+ end
87
+ it "should recognize usage attribute" do
88
+ @mods_rec.from_str(@url_attribs).location.url.usage.should == ["primary display"]
89
+ end
90
+ it "should recognize note attribute" do
91
+ @mods_rec.from_str('<mods><location><url note="something">http://somewhere.org</url></location></mods>')
92
+ @mods_rec.location.url.note.should == ["something"]
93
+ end
94
+ it "should recognize dateLastAccessed attribute" do
95
+ @mods_rec.from_str('<mods><location><url dateLastAccessed="something">http://somewhere.org</url></location></mods>')
96
+ @mods_rec.location.url.dateLastAccessed.should == ["something"]
97
+ end
98
+ end # attributes
99
+ it "should have array with empty string for single empty url element" do
100
+ @mods_rec.from_str(@empty_loc_url).location.url.map { |n| n.text }.should == [""]
101
+ end
102
+ end # url child element
103
+
104
+ it "holdingSimple child element" do
105
+ xml = '<mods><location>
106
+ <physicalLocation authority="marcorg">MnRM</physicalLocation>
107
+ <holdingSimple>
108
+ <copyInformation>
109
+ <sublocation>Patient reading room</sublocation>
110
+ <shelfLocator>QH511.A1J68</shelfLocator>
111
+ <enumerationAndChronology unitType="1"> v.1-v.8 1970-1976</enumerationAndChronology>
112
+ </copyInformation>
113
+ </holdingSimple></location></mods>'
114
+ @mods_rec.from_str(xml).location.holdingSimple.should be_an_instance_of(Nokogiri::XML::NodeSet)
115
+ @mods_rec.from_str(xml).location.holdingSimple.first.should be_an_instance_of(Nokogiri::XML::Element)
116
+ end
117
+ it "holdingComplex child element" do
118
+ xml = '<mods>
119
+ <location>
120
+ <physicalLocation>Menlo Park Public Library</physicalLocation>
121
+ <holdingExternal>
122
+ <holding xmlns:iso20775="info:ofi/fmt:xml:xsd:iso20775" xsi:schemaLocation="info:ofi/fmt:xml:xsd:iso20775 http://www.loc.gov/standards/iso20775/N130_ISOholdings_v6_1.xsd">
123
+ <institutionIdentifier>
124
+ <value>JRF</value>
125
+ <typeOrSource>
126
+ <pointer>http://worldcat.org/registry/institutions/</pointer>
127
+ </typeOrSource>
128
+ </institutionIdentifier>
129
+ <physicalLocation>Menlo Park Public Library</physicalLocation>
130
+ <physicalAddress>
131
+ <text>Menlo Park, CA 94025 United States </text>
132
+ </physicalAddress>
133
+ <electronicAddress>
134
+ <text>http://www.worldcat.org/wcpa/oclc/15550774? page=frame&amp;url=%3D%3FUTF-8%3FB%FaHR0cDovL2NhdGFsb2cucGxzaW5mby5vcmcvc2VhcmNoL2kwMTk1MDM4NjMw%3F%3D&amp;title=Menlo+Park+Public+Library&amp;linktype=opac&amp;detail=JRF%3AMenlo+Park+Public+Library%3APublic&amp;app=wcapi&amp;id=OCL-OCLC+Staff+use</text>
135
+ </electronicAddress>
136
+ <holdingSimple>
137
+ <copiesSummary>
138
+ <copiesCount>1</copiesCount>
139
+ </copiesSummary>
140
+ </holdingSimple>
141
+ </holding>
142
+ </holdingExternal>
143
+ </mods>'
144
+ @mods_rec.from_str(xml).location.holdingExternal.should be_an_instance_of(Nokogiri::XML::NodeSet)
145
+ @mods_rec.from_str(xml).location.holdingExternal.first.should be_an_instance_of(Nokogiri::XML::Element)
146
+ end
147
+ end
148
+ end