mods 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,41 @@
1
+ module Mods
2
+ class Reader
3
+
4
+ DEFAULT_NS_AWARE = false
5
+
6
+ # true if the XML parsing should be strict about using namespaces.
7
+ attr_accessor :namespace_aware
8
+ attr_reader :mods_ng_xml
9
+
10
+ # @param ns_aware true if the XML parsing should be strict about using namespaces. Default is false
11
+ def initialize(ns_aware = DEFAULT_NS_AWARE)
12
+ @namespace_aware = ns_aware
13
+ end
14
+
15
+ # @param str - a string containing mods xml
16
+ # @return a Nokogiri::XML::Document object
17
+ def from_str(str)
18
+ @mods_ng_xml = Nokogiri::XML(str)
19
+ normalize_mods
20
+ @mods_ng_xml
21
+ end
22
+
23
+ # @param url (String) - url that has mods xml as its content
24
+ # @return a Nokogiri::XML::Document object
25
+ def from_url(url, encoding = nil, options = Nokogiri::XML::ParseOptions::DEFAULT_XML)
26
+ require 'open-uri'
27
+ @mods_ng_xml = Nokogiri::XML(open(url).read)
28
+ normalize_mods
29
+ @mods_ng_xml
30
+ end
31
+
32
+ # Whatever we get, normalize it into a Nokogiri::XML::Document,
33
+ # strip any elements enclosing the mods record
34
+ def normalize_mods
35
+ if !@namespace_aware
36
+ @mods_ng_xml.remove_namespaces!
37
+ end
38
+ end
39
+
40
+ end # class
41
+ end # module
@@ -0,0 +1,145 @@
1
+ require 'iso-639'
2
+
3
+ module Mods
4
+
5
+ class Record
6
+
7
+ attr_reader :mods_ng_xml
8
+ # string to use when combining a title and subtitle, e.g.
9
+ # for title "MODS" and subtitle "Metadata Odious Delimited Stuff" and delimiter " : "
10
+ # we get "MODS : Metadata Odious Delimited Stuff"
11
+ attr_accessor :title_delimiter
12
+
13
+ NS_HASH = {'m' => MODS_NS_V3}
14
+
15
+ ATTRIBUTES = ['id', 'version']
16
+
17
+ # @param (String) what to use when combining a title and subtitle, e.g.
18
+ # for title "MODS" and subtitle "Metadata Odious Delimited Stuff" and delimiter " : "
19
+ # we get "MODS : Metadata Odious Delimited Stuff"
20
+ def initialize(title_delimiter = Mods::TitleInfo::DEFAULT_TITLE_DELIM)
21
+ @title_delimiter = title_delimiter
22
+ end
23
+
24
+ # convenience method to call Mods::Reader.new.from_str and to nom
25
+ # @param ns_aware true if the XML parsing should be strict about using namespaces. Default is false
26
+ # @param str - a string containing mods xml
27
+ def from_str(str, ns_aware = false)
28
+ @mods_ng_xml = Mods::Reader.new(ns_aware).from_str(str)
29
+ if ns_aware
30
+ set_terminology_ns(@mods_ng_xml)
31
+ else
32
+ set_terminology_no_ns(@mods_ng_xml)
33
+ end
34
+ end
35
+
36
+ # convenience method to call Mods::Reader.new.from_url and to nom
37
+ # @param ns_aware true if the XML parsing should be strict about using namespaces. Default is false
38
+ # @param url (String) - url that has mods xml as its content
39
+ def from_url(url, namespace_aware = false)
40
+ @mods_ng_xml = Mods::Reader.new(ns_aware).from_url(url)
41
+ if ns_aware
42
+ set_terminology_ns(@mods_ng_xml)
43
+ else
44
+ set_terminology_no_ns(@mods_ng_xml)
45
+ end
46
+ end
47
+
48
+ # @return Array of Strings, each containing the text contents of <mods><titleInfo> <nonSort> + ' ' + <title> elements
49
+ # but not including any titleInfo elements with type="alternative"
50
+ def short_titles
51
+ @mods_ng_xml.title_info.short_title.map { |n| n }
52
+ end
53
+
54
+ # @return Array of Strings, each containing the text contents of <mods><titleInfo> <nonSort> + ' ' + <title> + (delim) + <subTitle> elements
55
+ def full_titles
56
+ @mods_ng_xml.title_info.full_title.map { |n| n }
57
+ end
58
+
59
+ # @return Array of Strings, each containing the text contents of <mods><titleInfo @type="alternative"><title> elements
60
+ def alternative_titles
61
+ @mods_ng_xml.title_info.alternative_title.map { |n| n }
62
+ end
63
+
64
+ # @return String containing sortable title for this mods record
65
+ def sort_title
66
+ @mods_ng_xml.title_info.sort_title.find { |n| !n.nil? }
67
+ end
68
+
69
+
70
+ # use the displayForm of a personal name if present
71
+ # if no displayForm, try to make a string from family name and given name "family_name, given_name"
72
+ # otherwise, return all nameParts concatenated together
73
+ # @return Array of Strings, each containing the above described string
74
+ def personal_names
75
+ @mods_ng_xml.personal_name.map { |n|
76
+ if n.displayForm.size > 0
77
+ n.displayForm.text
78
+ elsif n.family_name.size > 0
79
+ n.given_name.size > 0 ? n.family_name.text + ', ' + n.given_name.text : n.family_name.text
80
+ else
81
+ n.namePart.text
82
+ end
83
+ }
84
+ end
85
+
86
+ # use the displayForm of a corporate name if present
87
+ # otherwise, return all nameParts concatenated together
88
+ # @return Array of Strings, each containing the above described string
89
+ def corporate_names
90
+ @mods_ng_xml.corporate_name.map { |n|
91
+ if n.displayForm.size > 0
92
+ n.displayForm.text
93
+ else
94
+ n.namePart.text
95
+ end
96
+ }
97
+ end
98
+
99
+ # Translates iso-639 language codes, and leaves everything else alone.
100
+ # @return Array of Strings, each a (hopefully English) name of a language
101
+ def languages
102
+ result = []
103
+ @mods_ng_xml.language.each { |n|
104
+ # get languageTerm codes and add their translations to the result
105
+ n.code_term.each { |ct|
106
+ if ct.authority.match(/^iso639/)
107
+ begin
108
+ vals = ct.text.split(/[,|\ ]/).reject {|x| x.strip.length == 0 }
109
+ vals.each do |v|
110
+ result << ISO_639.find(v.strip).english_name
111
+ end
112
+ rescue => e
113
+ p "Couldn't find english name for #{code.text}"
114
+ result << ct.text
115
+ end
116
+ else
117
+ result << ct.text
118
+ end
119
+ }
120
+ # add languageTerm text values
121
+ n.text_term.each { |tt|
122
+ val = tt.text.strip
123
+ result << val if val.length > 0
124
+ }
125
+
126
+ # add language values that aren't in languageTerm subelement
127
+ if n.languageTerm.size == 0
128
+ result << n.text
129
+ end
130
+ }
131
+ result.uniq
132
+ end
133
+
134
+
135
+ def method_missing method_name, *args
136
+ if mods_ng_xml.respond_to?(method_name)
137
+ mods_ng_xml.send(method_name, *args)
138
+ else
139
+ super.method_missing(method_name, *args)
140
+ end
141
+ end
142
+
143
+ end # class Record
144
+
145
+ end # module Mods
@@ -0,0 +1,24 @@
1
+ module Mods
2
+ # NAOMI_MUST_COMMENT_THIS_CLASS
3
+ class TitleInfo
4
+ # attr_reader :ng_node
5
+
6
+ NS_HASH = {'m' => MODS_NS_V3}
7
+ SUBELEMENTS = ['title', 'subTitle', 'partNumber', 'partName', 'nonSort']
8
+
9
+ # attributes on titleInfo node
10
+ ATTRIBUTES = ['type', 'authority', 'authorityURI', 'valueURI', 'displayLabel', 'supplied', 'usage', 'altRepGroup', 'nameTitleGroup']
11
+
12
+ # valid values for type attribute on titleInfo node <titleInfo type="val">
13
+ TYPES = ['abbreviated', 'translated', 'alternative', 'uniform']
14
+
15
+ DEFAULT_TITLE_DELIM = ' '
16
+
17
+ # @param (Nokogiri::XML::Node) mods:titleInfo node
18
+ # def initialize(title_info_node)
19
+ # @ng_node = title_info_node
20
+ # end
21
+
22
+ end
23
+
24
+ end
@@ -1,3 +1,4 @@
1
1
  module Mods
2
- VERSION = "0.0.2"
2
+ # this is the Ruby Gem version
3
+ VERSION = "0.0.4"
3
4
  end
@@ -3,21 +3,27 @@ require File.expand_path('../lib/mods/version', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |gem|
5
5
  gem.name = "mods"
6
- gem.authors = ["Naomi Dushay", "Bess Sadler"]
7
6
  gem.version = Mods::VERSION
7
+ gem.authors = ["Naomi Dushay", "Bess Sadler"]
8
8
  gem.email = ["ndushay AT stanford.edu", "bess AT stanford.edu"]
9
- gem.description = %q{A Ruby gem to parse MODS (Metadata Object Description Schema) records}
10
- gem.summary = %q{A Ruby gem to parse MODS (Metadata Object Description Schema) records. More information about MODS can be found at http://www.loc.gov/standards/mods/registry.php.}
11
- gem.homepage = ""
9
+ gem.description = "Parse MODS (Metadata Object Description Schema) records. More information about MODS can be found at http://www.loc.gov/standards/mods/"
10
+ gem.summary = "Parse MODS (Metadata Object Description Schema) records."
11
+ gem.homepage = "https://github.com/sul-dlss/mods"
12
12
 
13
+ gem.extra_rdoc_files = ["LICENSE", "README.rdoc"]
13
14
  gem.files = `git ls-files`.split($\)
14
15
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
15
- gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
16
+ gem.test_files = gem.files.grep(%r{^spec/})
16
17
  gem.require_paths = ["lib"]
17
18
 
18
- # Runtime dependencies
19
19
  gem.add_dependency 'nokogiri'
20
+ gem.add_dependency 'nom-xml'
21
+ gem.add_dependency 'iso-639'
22
+
23
+ # Runtime dependencies
24
+ # gem.add_runtime_dependency 'nokogiri'
20
25
 
26
+ # Development dependencies
21
27
  # Bundler will install these gems too if you've checked out solrmarc-wrapper source from git and run 'bundle install'
22
28
  # It will not add these as dependencies if you require solrmarc-wrapper for other projects
23
29
  gem.add_development_dependency "rake"
@@ -0,0 +1,69 @@
1
+ require 'spec_helper'
2
+
3
+ describe "Mods <language> Element" do
4
+ before(:all) do
5
+ @mods_rec = Mods::Record.new
6
+ @simple = '<mods><language>Greek</language></mods>'
7
+ @iso639_2b_code = '<mods><language><languageTerm authority="iso639-2b" type="code">fre</languageTerm></language></mods>'
8
+ @iso639_2b_text = '<mods><language><languageTerm authority="iso639-2b" type="text">English</languageTerm></language></mods>'
9
+ @mult_codes = '<mods><language><languageTerm authority="iso639-2b" type="code">per ara, dut</languageTerm></language></mods>'
10
+ @mult_code_terms = '<mods><language><languageTerm authority="iso639-2b" type="code">spa</languageTerm><languageTerm authority="iso639-2b" type="code">dut</languageTerm></language></mods>'
11
+ @mult_text_terms = '<mods><language><languageTerm authority="iso639-2b" type="text">Chinese</languageTerm><languageTerm authority="iso639-2b" type="text">Spanish</languageTerm></language></mods>'
12
+ end
13
+
14
+ context "basic language terminology pieces" do
15
+ before(:all) do
16
+ @mods_rec.from_str(@iso639_2b_code)
17
+ end
18
+ it "should understand languageTerm.type_at attribute" do
19
+ @mods_rec.language.languageTerm.type_at.should == ["code"]
20
+ end
21
+ it "should understand languageTerm.authority attribute" do
22
+ @mods_rec.language.languageTerm.authority.should == ["iso639-2b"]
23
+ end
24
+ it "should understand languageTerm value" do
25
+ @mods_rec.language.languageTerm.text.should == "fre"
26
+ @mods_rec.language.languageTerm.size.should == 1
27
+ end
28
+ it "should get one language.code_term for each languageTerm element with a type attribute of 'code'" do
29
+ @mods_rec.language.code_term.size.should == 1
30
+ @mods_rec.language.code_term.text.should == "fre"
31
+ @mods_rec.from_str(@mult_code_terms)
32
+ @mods_rec.language.code_term.size.should == 2
33
+ @mods_rec.language.code_term.first.text.should include("spa")
34
+ @mods_rec.language.code_term[1].text.should == "dut"
35
+ end
36
+ it "should get one language.text_term for each languageTerm element with a type attribute of 'text'" do
37
+ @mods_rec.from_str(@mult_text_terms)
38
+ @mods_rec.language.text_term.size.should == 2
39
+ @mods_rec.language.text_term.first.text.should include("Chinese")
40
+ @mods_rec.language.text_term[1].text.should == "Spanish"
41
+ end
42
+ end
43
+
44
+ context "Mods::Record.languages convenience method" do
45
+
46
+ it "should translate iso639-2b codes to English" do
47
+ @mods_rec.from_str(@iso639_2b_code)
48
+ @mods_rec.languages.should == ["French"]
49
+ end
50
+
51
+ it "should pass thru language values that are already text (not code)" do
52
+ @mods_rec.from_str(@iso639_2b_text)
53
+ @mods_rec.languages.should == ["English"]
54
+ end
55
+
56
+ it "should keep values that are not inside <languageTerm> elements" do
57
+ @mods_rec.from_str(@simple)
58
+ @mods_rec.languages.should == ["Greek"]
59
+ end
60
+
61
+ it "should create a separate value for each language in a comma, space, or | separated list " do
62
+ @mods_rec.from_str(@mult_codes)
63
+ @mods_rec.languages.should include("Arabic")
64
+ @mods_rec.languages.should include("Persian")
65
+ @mods_rec.languages.should include("Dutch; Flemish")
66
+ end
67
+ end
68
+
69
+ end
@@ -0,0 +1,148 @@
1
+ require 'spec_helper'
2
+
3
+ describe "Mods <location> Element" do
4
+ before(:all) do
5
+ @mods_rec = Mods::Record.new
6
+ @url_and_phys = '<mods><location>
7
+ <url displayLabel="Digital collection of 46 images available online" usage="primary display">http://searchworks.stanford.edu/?f%5Bcollection%5D%5B%5D=The+Reid+W.+Dennis+Collection+of+California+Lithographs&amp;view=gallery</url>
8
+ </location><location>
9
+ <physicalLocation>Department of Special Collections, Stanford University Libraries, Stanford, CA 94305.</physicalLocation>
10
+ </location></mods>'
11
+ # from http://www.loc.gov/standards/mods/v3/mods-userguide-elements.html !!
12
+ # sublocation is not allowed directly under location
13
+ @incorrect = '<mods><location>
14
+ <physicalLocation>Library of Congress </physicalLocation>
15
+ <sublocation>Prints and Photographs Division Washington, D.C. 20540 USA</sublocation>
16
+ <shelfLocator>DAG no. 1410</shelfLocator>
17
+ </location></mods>'
18
+ end
19
+
20
+ context "basic location terminology pieces" do
21
+ before(:all) do
22
+ @mods_rec.from_str(@form_and_extent)
23
+ end
24
+
25
+ context "physicalLocation child element" do
26
+ before(:all) do
27
+ @phys_loc_only = '<mods><location><physicalLocation>here</physicalLocation></location></mods>'
28
+ @phys_loc_authority = '<mods><location><physicalLocation authority="marcorg">MnRM</physicalLocation></location></mods>'
29
+ end
30
+ it "should have access to text value of element" do
31
+ @mods_rec.from_str(@phys_loc_only)
32
+ @mods_rec.location.physicalLocation.text.should == "here"
33
+ @mods_rec.from_str(@phys_loc_authority)
34
+ @mods_rec.location.physicalLocation.map { |n| n.text }.should == ["MnRM"]
35
+ end
36
+ it "should recognize authority attribute" do
37
+ @mods_rec.from_str(@phys_loc_authority)
38
+ @mods_rec.location.physicalLocation.authority.should == ["marcorg"]
39
+ end
40
+ it "should recognize displayLabel attribute" do
41
+ @mods_rec.from_str('<mods><location><physicalLocation displayLabel="Correspondence">some address</physicalLocation></location></mods>')
42
+ @mods_rec.location.physicalLocation.displayLabel.should == ["Correspondence"]
43
+ end
44
+ end
45
+
46
+ it "shelfLocator child element" do
47
+ shelf_loc = '<mods><location>
48
+ <physicalLocation>Library of Congress </physicalLocation>
49
+ <shelfLocator>DAG no. 1410</shelfLocator>
50
+ </location></mods>'
51
+ @mods_rec.from_str(shelf_loc)
52
+ @mods_rec.location.shelfLocator.map { |n| n.text }.should == ["DAG no. 1410"]
53
+ end
54
+
55
+ context "url child element" do
56
+ before(:all) do
57
+ @empty_loc_url = '<mods><location><url/></location></mods>'
58
+ @mult_flavor_loc_urls = '<mods><location>
59
+ <url access="preview">http://preview.org</url>
60
+ <url access="object in context">http://context.org</url>
61
+ <url access="raw object">http://object.org</url>
62
+ </location></mods>'
63
+ end
64
+ it "should have access to text value of element" do
65
+ urls = @mods_rec.from_str(@mult_flavor_loc_urls).location.url.map { |e| e.text }
66
+ urls.size.should == 3
67
+ urls.should include("http://preview.org")
68
+ urls.should include("http://context.org")
69
+ urls.should include("http://object.org")
70
+ end
71
+ context "attributes" do
72
+ before(:all) do
73
+ @url_attribs = '<mods><location>
74
+ <url displayLabel="Digital collection of 46 images available online" usage="primary display">http://searchworks.stanford.edu/?f%5Bcollection%5D%5B%5D=The+Reid+W.+Dennis+Collection+of+California+Lithographs&amp;view=gallery</url>
75
+ </location></mods>'
76
+ end
77
+ it "should recognize displayLabel attribute" do
78
+ @mods_rec.from_str(@url_attribs).location.url.displayLabel.should == ["Digital collection of 46 images available online"]
79
+ end
80
+ it "should recognize access attribute" do
81
+ vals = @mods_rec.from_str(@mult_flavor_loc_urls).location.url.access
82
+ vals.size.should == 3
83
+ vals.should include("preview")
84
+ vals.should include("object in context")
85
+ vals.should include("raw object")
86
+ end
87
+ it "should recognize usage attribute" do
88
+ @mods_rec.from_str(@url_attribs).location.url.usage.should == ["primary display"]
89
+ end
90
+ it "should recognize note attribute" do
91
+ @mods_rec.from_str('<mods><location><url note="something">http://somewhere.org</url></location></mods>')
92
+ @mods_rec.location.url.note.should == ["something"]
93
+ end
94
+ it "should recognize dateLastAccessed attribute" do
95
+ @mods_rec.from_str('<mods><location><url dateLastAccessed="something">http://somewhere.org</url></location></mods>')
96
+ @mods_rec.location.url.dateLastAccessed.should == ["something"]
97
+ end
98
+ end # attributes
99
+ it "should have array with empty string for single empty url element" do
100
+ @mods_rec.from_str(@empty_loc_url).location.url.map { |n| n.text }.should == [""]
101
+ end
102
+ end # url child element
103
+
104
+ it "holdingSimple child element" do
105
+ xml = '<mods><location>
106
+ <physicalLocation authority="marcorg">MnRM</physicalLocation>
107
+ <holdingSimple>
108
+ <copyInformation>
109
+ <sublocation>Patient reading room</sublocation>
110
+ <shelfLocator>QH511.A1J68</shelfLocator>
111
+ <enumerationAndChronology unitType="1"> v.1-v.8 1970-1976</enumerationAndChronology>
112
+ </copyInformation>
113
+ </holdingSimple></location></mods>'
114
+ @mods_rec.from_str(xml).location.holdingSimple.should be_an_instance_of(Nokogiri::XML::NodeSet)
115
+ @mods_rec.from_str(xml).location.holdingSimple.first.should be_an_instance_of(Nokogiri::XML::Element)
116
+ end
117
+ it "holdingComplex child element" do
118
+ xml = '<mods>
119
+ <location>
120
+ <physicalLocation>Menlo Park Public Library</physicalLocation>
121
+ <holdingExternal>
122
+ <holding xmlns:iso20775="info:ofi/fmt:xml:xsd:iso20775" xsi:schemaLocation="info:ofi/fmt:xml:xsd:iso20775 http://www.loc.gov/standards/iso20775/N130_ISOholdings_v6_1.xsd">
123
+ <institutionIdentifier>
124
+ <value>JRF</value>
125
+ <typeOrSource>
126
+ <pointer>http://worldcat.org/registry/institutions/</pointer>
127
+ </typeOrSource>
128
+ </institutionIdentifier>
129
+ <physicalLocation>Menlo Park Public Library</physicalLocation>
130
+ <physicalAddress>
131
+ <text>Menlo Park, CA 94025 United States </text>
132
+ </physicalAddress>
133
+ <electronicAddress>
134
+ <text>http://www.worldcat.org/wcpa/oclc/15550774? page=frame&amp;url=%3D%3FUTF-8%3FB%FaHR0cDovL2NhdGFsb2cucGxzaW5mby5vcmcvc2VhcmNoL2kwMTk1MDM4NjMw%3F%3D&amp;title=Menlo+Park+Public+Library&amp;linktype=opac&amp;detail=JRF%3AMenlo+Park+Public+Library%3APublic&amp;app=wcapi&amp;id=OCL-OCLC+Staff+use</text>
135
+ </electronicAddress>
136
+ <holdingSimple>
137
+ <copiesSummary>
138
+ <copiesCount>1</copiesCount>
139
+ </copiesSummary>
140
+ </holdingSimple>
141
+ </holding>
142
+ </holdingExternal>
143
+ </mods>'
144
+ @mods_rec.from_str(xml).location.holdingExternal.should be_an_instance_of(Nokogiri::XML::NodeSet)
145
+ @mods_rec.from_str(xml).location.holdingExternal.first.should be_an_instance_of(Nokogiri::XML::Element)
146
+ end
147
+ end
148
+ end