opinionated-xml 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{opinionated-xml}
8
- s.version = "0.0.1"
8
+ s.version = "0.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Matt Zumwalt"]
12
- s.date = %q{2010-05-16}
12
+ s.date = %q{2010-06-20}
13
13
  s.description = %q{A library to help you tame sprawling XML schemas like MODS. Wraps Nokogiri documents in objects with miscellaneous helper methods for doing things like retrieve generated xpath queries or look up properties based on a simplified DSL}
14
14
  s.email = %q{matt.zumwalt@yourmediashelf.com}
15
15
  s.extra_rdoc_files = [
@@ -24,36 +24,48 @@ Gem::Specification.new do |s|
24
24
  "README.rdoc",
25
25
  "Rakefile",
26
26
  "VERSION",
27
- "lib/opinionated-xml.rb",
28
- "lib/opinionated-xml/ox.rb",
29
- "lib/opinionated-xml/ox_property_values_helper.rb",
27
+ "lib/om.rb",
28
+ "lib/om/xml.rb",
29
+ "lib/om/xml/accessors.rb",
30
+ "lib/om/xml/container.rb",
31
+ "lib/om/xml/properties.rb",
32
+ "lib/om/xml/property_value_operators.rb",
33
+ "lib/om/xml/validation.rb",
30
34
  "opinionated-xml.gemspec",
31
35
  "spec/fixtures/CBF_MODS/ARS0025_016.xml",
36
+ "spec/fixtures/RUBRIC_mods_article_template.xml",
32
37
  "spec/fixtures/mods-3-2.xsd",
38
+ "spec/fixtures/mods_articles/hydrangea_article1.xml",
33
39
  "spec/fixtures/test_dummy_mods.xml",
34
- "spec/helpers/ox_property_values_helper_spec.rb",
35
40
  "spec/spec.opts",
36
41
  "spec/spec_helper.rb",
37
- "spec/unit/opinionated-xml_spec.rb",
38
- "spec/unit/ox_integration_spec.rb"
42
+ "spec/unit/accessors_spec.rb",
43
+ "spec/unit/container_spec.rb",
44
+ "spec/unit/properties_spec.rb",
45
+ "spec/unit/property_value_operators_spec.rb",
46
+ "spec/unit/validation_spec.rb",
47
+ "spec/unit/xml_spec.rb"
39
48
  ]
40
49
  s.homepage = %q{http://github.com/mediashelf/opinionated-xml}
41
50
  s.rdoc_options = ["--charset=UTF-8"]
42
51
  s.require_paths = ["lib"]
43
- s.rubygems_version = %q{1.3.6}
52
+ s.rubygems_version = %q{1.3.7}
44
53
  s.summary = %q{A library to help you tame sprawling XML schemas like MODS.}
45
54
  s.test_files = [
46
- "spec/helpers/ox_property_values_helper_spec.rb",
47
- "spec/spec_helper.rb",
48
- "spec/unit/opinionated-xml_spec.rb",
49
- "spec/unit/ox_integration_spec.rb"
55
+ "spec/spec_helper.rb",
56
+ "spec/unit/accessors_spec.rb",
57
+ "spec/unit/container_spec.rb",
58
+ "spec/unit/properties_spec.rb",
59
+ "spec/unit/property_value_operators_spec.rb",
60
+ "spec/unit/validation_spec.rb",
61
+ "spec/unit/xml_spec.rb"
50
62
  ]
51
63
 
52
64
  if s.respond_to? :specification_version then
53
65
  current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
54
66
  s.specification_version = 3
55
67
 
56
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
68
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
57
69
  s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
58
70
  s.add_runtime_dependency(%q<facets>, [">= 0"])
59
71
  s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
@@ -0,0 +1,89 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <!-- Based on MODS JOURNAL ARTICLE TEMPLATE edited with XMLSpy v2006 rel. 3 sp1
3
+ (http://www.altova.com) by Neil Godfrey (RUBRIC) -->
4
+ <!-- NOTE: For further details on more granular and other
5
+ applications of elements, see the MODS User Guidelines
6
+ at http://www.loc.gov/standards/mods/v3/mods-userguide-elements.html
7
+ Modifications will be necessary for local harvesting and mapping
8
+ requirements -->
9
+
10
+ <mods version="3.0" xsi:schemaLocation="http://www.loc.gov/mods/v3
11
+ http://www.loc.gov/standards/mods/v3/mods-3-0.xsd" xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
12
+
13
+ <titleInfo>
14
+ <nonSort>THE</nonSort>
15
+ <title>ARTICLE TITLE</title>
16
+ <subTitle>SUBTITLE</subTitle>
17
+ </titleInfo>
18
+ <titleInfo type="alternative">
19
+ <title>VARYING FORM OF TITLE</title>
20
+ </titleInfo>
21
+
22
+ <name type="personal">
23
+ <namePart type="family">FAMILY NAME</namePart>
24
+ <namePart type="given">GIVEN NAMES</namePart>
25
+ <namePart type="termsOfAddress">DR.</namePart>
26
+ <displayForm>NAME AS IT APPEARS</displayForm>
27
+ <affiliation>FACULTY, UNIVERSITY</affiliation>
28
+ <role>
29
+ <roleTerm authority="marcrelator" type="text">creator</roleTerm>
30
+ </role>
31
+ <role>
32
+ <roleTerm type="text">submitter</roleTerm>
33
+ </role>
34
+ </name>
35
+
36
+ <typeOfResource>text</typeOfResource>
37
+ <genre authority="local">journal article</genre>
38
+
39
+ <abstract>ABSTRACT</abstract>
40
+ <subject>
41
+ <topic>TOPIC 1</topic>
42
+ <topic>TOPIC 2</topic>
43
+ </subject>
44
+ <subject authority="AUTHORITY SOURCE (RFCD, LCSH)">
45
+ <topic>CONTROLLED TERM</topic>
46
+ </subject>
47
+
48
+ <language>
49
+ <languageTerm authority="iso639-2b" type="code">en-aus </languageTerm>
50
+ </language>
51
+
52
+ <physicalDescription>
53
+ <internetMediaType>application/pdf</internetMediaType>
54
+ <extent>36 p.</extent>
55
+ </physicalDescription>
56
+
57
+ <relatedItem type="host">
58
+ <titleInfo>
59
+ <title>TITLE OF HOST JOURNAL</title>
60
+ </titleInfo>
61
+ <originInfo>
62
+ <publisher>PUBLISHER</publisher>
63
+ <dateIssued>DATE</dateIssued>
64
+ </originInfo>
65
+ <identifier type="issn">0013-8908</identifier>
66
+ <part>
67
+ <detail type="volume">
68
+ <number>2</number>
69
+ </detail>
70
+ <detail type="level">
71
+ <number>2</number>
72
+ </detail>
73
+ <extent unit="pages">
74
+ <start>195</start>
75
+ <end>230</end>
76
+ </extent>
77
+ <date>FEB. 2007</date>
78
+ </part>
79
+ </relatedItem>
80
+
81
+ <identifier type="uri">http://URL.edu.au/</identifier>
82
+ <identifier type="doi">doi:10.1006/jmbi.1995.0238</identifier>
83
+ <location>
84
+ <url>http://URL.edu.au/</url>
85
+ </location>
86
+ <accessCondition type="restrictionOnAccess">EMBARGO NOTE</accessCondition>
87
+ <accessCondition type="use and reproduction">OPEN ACCESS</accessCondition>
88
+
89
+ </mods>
@@ -0,0 +1,90 @@
1
+ <mods version="3.0" xsi:schemaLocation="http://www.loc.gov/mods/v3
2
+ http://www.loc.gov/standards/mods/v3/mods-3-0.xsd" xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
+
4
+ <titleInfo>
5
+ <nonSort>THE</nonSort>
6
+ <title>ARTICLE TITLE HYDRANGEA ARTICLE 1</title>
7
+ <subTitle>SUBTITLE</subTitle>
8
+ </titleInfo>
9
+ <titleInfo lang="finnish">
10
+ <title>Artikkelin otsikko Hydrangea artiklan 1</title>
11
+ </titleInfo>
12
+
13
+ <name type="personal">
14
+ <namePart type="family">FAMILY NAME</namePart>
15
+ <namePart type="given">GIVEN NAMES</namePart>
16
+ <namePart type="termsOfAddress">DR.</namePart>
17
+ <displayForm>NAME AS IT APPEARS</displayForm>
18
+ <affiliation>FACULTY, UNIVERSITY</affiliation>
19
+ <role>
20
+ <roleTerm authority="marcrelator" type="text">creator</roleTerm>
21
+ </role>
22
+ <role>
23
+ <roleTerm type="text">submitter</roleTerm>
24
+ </role>
25
+ </name>
26
+
27
+ <name type="personal">
28
+ <namePart type="family">Gautama</namePart>
29
+ <namePart type="given">Siddartha</namePart>
30
+ <namePart type="termsOfAddress">Prince</namePart>
31
+ <affiliation>Nirvana</affiliation>
32
+ <role>
33
+ <roleTerm authority="marcrelator" type="text">teacher</roleTerm>
34
+ </role>
35
+ </name>
36
+
37
+ <typeOfResource>text</typeOfResource>
38
+ <genre authority="local">journal article</genre>
39
+
40
+ <abstract>ABSTRACT</abstract>
41
+ <subject>
42
+ <topic>TOPIC 1</topic>
43
+ <topic>TOPIC 2</topic>
44
+ </subject>
45
+ <subject authority="AUTHORITY SOURCE (RFCD, LCSH)">
46
+ <topic>CONTROLLED TERM</topic>
47
+ </subject>
48
+
49
+ <language>
50
+ <languageTerm authority="iso639-2b" type="code">en-aus </languageTerm>
51
+ </language>
52
+
53
+ <physicalDescription>
54
+ <internetMediaType>application/pdf</internetMediaType>
55
+ <extent>36 p.</extent>
56
+ </physicalDescription>
57
+
58
+ <relatedItem type="host">
59
+ <titleInfo>
60
+ <title>TITLE OF HOST JOURNAL</title>
61
+ </titleInfo>
62
+ <originInfo>
63
+ <publisher>PUBLISHER</publisher>
64
+ <dateIssued>DATE</dateIssued>
65
+ </originInfo>
66
+ <identifier type="issn">0013-8908</identifier>
67
+ <part>
68
+ <detail type="volume">
69
+ <number>2</number>
70
+ </detail>
71
+ <detail type="level">
72
+ <number>2</number>
73
+ </detail>
74
+ <extent unit="pages">
75
+ <start>195</start>
76
+ <end>230</end>
77
+ </extent>
78
+ <date>FEB. 2007</date>
79
+ </part>
80
+ </relatedItem>
81
+
82
+ <identifier type="uri">http://URL.edu.au/</identifier>
83
+ <identifier type="doi">doi:10.1006/jmbi.1995.0238</identifier>
84
+ <location>
85
+ <url>http://URL.edu.au/</url>
86
+ </location>
87
+ <accessCondition type="restrictionOnAccess">EMBARGO NOTE</accessCondition>
88
+ <accessCondition type="use and reproduction">OPEN ACCESS</accessCondition>
89
+
90
+ </mods>
@@ -3,10 +3,19 @@
3
3
  <ns3:name type="personal">
4
4
  <ns3:namePart type="family">Berners-Lee</ns3:namePart>
5
5
  <ns3:namePart type="given">Tim</ns3:namePart>
6
+ <ns3:role>
7
+ <ns3:roleTerm type="text" authority="marcrelator">creator</ns3:roleTerm>
8
+ <ns3:roleTerm type="code" authority="marcrelator">cre</ns3:roleTerm>
9
+ </ns3:role>
6
10
  </ns3:name>
7
11
  <ns3:name type="personal">
8
12
  <ns3:namePart type="family">Jobs</ns3:namePart>
9
13
  <ns3:namePart type="given">Steve</ns3:namePart>
14
+ <ns3:namePart type="date">2004</ns3:namePart>
15
+ <ns3:role>
16
+ <ns3:roleTerm type="text" authority="marcrelator">creator</ns3:roleTerm>
17
+ <ns3:roleTerm type="code" authority="marcrelator">cre</ns3:roleTerm>
18
+ </ns3:role>
10
19
  </ns3:name>
11
20
  <ns3:name type="personal">
12
21
  <ns3:namePart type="family">Wozniak</ns3:namePart>
@@ -15,5 +24,13 @@
15
24
  <ns3:name type="personal">
16
25
  <ns3:namePart type="family">Klimt</ns3:namePart>
17
26
  <ns3:namePart type="given">Gustav</ns3:namePart>
27
+ <ns3:role>
28
+ <ns3:roleTerm type="text" authority="marcrelator">creator</ns3:roleTerm>
29
+ <ns3:roleTerm type="code" authority="marcrelator">cre</ns3:roleTerm>
30
+ </ns3:role>
31
+ <ns3:role>
32
+ <ns3:roleTerm type="text" authority="marcrelator">visionary</ns3:roleTerm>
33
+ <ns3:roleTerm type="code" authority="marcrelator">vry</ns3:roleTerm>
34
+ </ns3:role>
18
35
  </ns3:name>
19
36
  </ns3:mods>
@@ -1,6 +1,6 @@
1
1
  $LOAD_PATH.unshift(File.dirname(__FILE__))
2
2
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
- require 'opinionated-xml'
3
+ require 'om'
4
4
  require 'spec'
5
5
  require 'spec/autorun'
6
6
  require 'ruby-debug'
@@ -0,0 +1,156 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require "nokogiri"
3
+ require "om"
4
+
5
+ describe "OM::XML::Accessors" do
6
+
7
+ before(:all) do
8
+ class AccessorTest
9
+
10
+ include OM::XML::Container
11
+ include OM::XML::Accessors
12
+ #accessor :title, :relative_xpath=>[:titleInfo, :title]}}
13
+
14
+ accessor :title_info, :relative_xpath=>'oxns:titleInfo', :children=>[
15
+ {:main_title=>{:relative_xpath=>'oxns:title'}},
16
+ {:language =>{:relative_xpath=>{:attribute=>"lang"} }}
17
+ ] # this allows you to access the language attribute as if it was a regular child accessor
18
+ accessor :abstract
19
+ accessor :topic_tag, :relative_xpath=>'oxns:subject/oxns:topic'
20
+ accessor :person, :relative_xpath=>'oxns:name[@type="personal"]', :children=>[
21
+ {:last_name=>{:relative_xpath=>'oxns:namePart[@type="family"]'}},
22
+ {:first_name=>{:relative_xpath=>'oxns:namePart[@type="given"]'}},
23
+ {:institution=>{:relative_xpath=>'oxns:affiliation'}},
24
+ {:role=>{:children=>[
25
+ {:text=>{:relative_xpath=>'oxns:roleTerm[@type="text"]'}},
26
+ {:code=>{:relative_xpath=>'oxns:roleTerm[@type="code"]'}}
27
+ ]}}
28
+ ]
29
+ accessor :organization, :relative_xpath=>'oxns:name[@type="institutional"]', :children=>[
30
+ {:role=>{:children=>[
31
+ {:text=>{:relative_xpath=>'oxns:roleTerm[@type="text"]'}},
32
+ {:code=>{:relative_xpath=>'oxns:roleTerm[@type="code"]'}}
33
+ ]}}
34
+ ]
35
+ accessor :conference, :relative_xpath=>'oxns:name[@type="conference"]', :children=>[
36
+ {:role=>{:children=>[
37
+ {:text=>{:relative_xpath=>'oxns:roleTerm[@type="text"]'}},
38
+ {:code=>{:relative_xpath=>'oxns:roleTerm[@type="code"]'}}
39
+ ]}}
40
+ ]
41
+ accessor :journal, :relative_xpath=>'oxns:relatedItem[@type="host"]', :children=>[
42
+ # allows for children that are hashes...
43
+ # this allows for more robust handling of nested values (in generating views and when generating solr field names)
44
+ {:title=>{:relative_xpath=>'oxns:titleInfo/oxns:title'}},
45
+ {:publisher=>{:relative_xpath=>'oxns:originInfo/oxns:publisher'}},
46
+ {:issn=>{:relative_xpath=>'oxns:identifier[@type="issn"]'}},
47
+ {:date_issued=>{:relative_xpath=>'oxns:originInfo/oxns:dateIssued'}},
48
+ {:issue => {:relative_xpath=>"oxns:part", :children=>[
49
+ {:volume=>{:relative_xpath=>'oxns:detail[@type="volume"]'}},
50
+ {:level=>{:relative_xpath=>'oxns:detail[@type="level"]'}},
51
+ {:start_page=>{:relative_xpath=>'oxns:extent[@unit="pages"]/oxns:start'}},
52
+ {:end_page=>{:relative_xpath=>'oxns:extent[@unit="pages"]/oxns:end'}},
53
+ {:publication_date=>{:relative_xpath=>'oxns:date'}}
54
+ ]}}
55
+ ]
56
+ end
57
+
58
+ end
59
+
60
+ before(:each) do
61
+ article_xml = fixture( File.join("mods_articles", "hydrangea_article1.xml") )
62
+ @sample = AccessorTest.from_xml(article_xml)
63
+ end
64
+
65
+ describe '#accessor' do
66
+ it "should populate the .accessors hash" do
67
+ AccessorTest.accessors[:abstract][:relative_xpath].should == "oxns:abstract"
68
+ AccessorTest.accessors[:journal][:relative_xpath].should == 'oxns:relatedItem[@type="host"]'
69
+ AccessorTest.accessors[:journal][:children][:issue][:relative_xpath].should == "oxns:part"
70
+ AccessorTest.accessors[:journal][:children][:issue][:children][:end_page][:relative_xpath].should == 'oxns:extent[@unit="pages"]/oxns:end'
71
+
72
+ AccessorTest.accessors[:person][:children][:role][:children][:text][:relative_xpath].should == 'oxns:roleTerm[@type="text"]'
73
+ end
74
+ end
75
+
76
+ describe ".retrieve" do
77
+ it "should use Nokogiri to retrieve a NodeSet corresponding to the combination of accessor keys and array/nodeset indexes" do
78
+ @sample.retrieve( :person ).length.should == 2
79
+
80
+ @sample.retrieve( :person, 1 ).first.should == @sample.ng_xml.xpath('//oxns:name[@type="personal" and position()=2]', "oxns"=>"http://www.loc.gov/mods/v3").first
81
+ @sample.retrieve( :person, 1, :first_name ).class.should == Nokogiri::XML::NodeSet
82
+ @sample.retrieve( :person, 1, :first_name ).first.text.should == "Siddartha"
83
+ end
84
+
85
+ it "should support accessors whose relative_xpath is a lookup array instead of an xpath string" do
86
+ # pending "this only impacts scenarios where we want to display & edit"
87
+ AccessorTest.accessors[:title_info][:children][:language][:relative_xpath].should == {:attribute=>"lang"}
88
+ # @sample.retrieve( :title, 1 ).first.text.should == "Artikkelin otsikko Hydrangea artiklan 1"
89
+ @sample.retrieve( :title_info, 1, :language ).first.text.should == "finnish"
90
+ end
91
+
92
+ end
93
+
94
+ describe ".retrieve_at" do
95
+ it "should return the first node in the resulting set (uses Nokogiri xpath_at)" do
96
+ pending "might be able to make this implicit in the last value of call to .retrieve"
97
+ @sample.retrieve_at(:person, 1, :first_name).text.should == "Siddartha"
98
+ @sample.retrieve_at(:person, 1, :first_name).should == @sample.retrieve( :person, 1, :first_name).first
99
+ end
100
+ end
101
+
102
+ describe "generated accessor methods" do
103
+ it "should mix accessor methods into nodesets so you can use regular array syntax to access stuff" do
104
+ pending "This is tempting, but somewhat difficult to implement and potentially slow at runtime. Might never be worth it?"
105
+ @sample.persons.length.should == 2
106
+ @sample.persons[1].first_name.text.should == "Siddartha"
107
+ @sample.persons.last.roles.length.should == 1
108
+ @sample.persons.last.roles[0].text.should == "teacher"
109
+ end
110
+ end
111
+
112
+ describe "#accessor_info" do
113
+ it "should return the xpath given in the call to #accessor" do
114
+ AccessorTest.accessor_info( :abstract ).should == AccessorTest.accessors[:abstract]
115
+ end
116
+ it "should return the xpath given in the call to #accessor" do
117
+ AccessorTest.accessor_info( :abstract ).should == AccessorTest.accessors[:abstract]
118
+ end
119
+ it "should dig into the accessors hash as far as you want, ignoring index values" do
120
+ AccessorTest.accessor_info( :conference, 0, :role, 1, :text ).should == AccessorTest.accessors[:conference][:children][:role][:children][:text]
121
+ AccessorTest.accessor_info( :conference, :role, :text ).should == AccessorTest.accessors[:conference][:children][:role][:children][:text]
122
+ end
123
+ end
124
+
125
+ describe "#accessor_xpath" do
126
+ it "should return the xpath given in the call to #accessor" do
127
+ AccessorTest.accessor_xpath( :abstract ).should == '//oxns:abstract'
128
+ end
129
+ # Note: Ruby array indexes begin from 0. In xpath queries (which start from 1 instead of 0), this will be translated accordingly.
130
+ it "should prepend the xpath for any parent nodes, inserting calls to xpath:position() function where necessary" do
131
+ AccessorTest.accessor_xpath( :conference, 0, :role, 1, :text ).should == '//oxns:name[@type="conference" and position()=1]/oxns:role[position()=2]/oxns:roleTerm[@type="text"]'
132
+ end
133
+ end
134
+ # describe ".accessor_xpath (instance method)" do
135
+ # it "should delegate to the class method" do
136
+ # AccessorTest.expects(:accessor_xpath).with( [:conference, conference_index, :text_role] )
137
+ # @sample.accessor_xpath( [:conference, conference_index, :role] )
138
+ # end
139
+ # end
140
+ #
141
+ # describe "generated catchall xpaths" do
142
+ # it "should return an xpath query that will catch all nodes corresponding to the specified accessor" do
143
+ # AccessorTest.journal_issue_end_page_xpath.should == 'oxns:relatedItem[@type="host"]/oxns:part/oxns:extent[@unit="pages"]/oxns:end'
144
+ # end
145
+ # it "should rely on #catchall_xpath" do
146
+ # AccessorTest.expects(:catchall_xpath).with(:journal, :issue, :end_page)
147
+ # AccessorTest.journal_issue_end_page_xpath
148
+ # end
149
+ # end
150
+ #
151
+ # describe ".catchall_xpath" do
152
+ # it "should return an xpath query that will catch all nodes corresponding to the specified accessor" do
153
+ # AccessorTest.catchall_xpath(:journal, :issue, :end_page).should == 'oxns:relatedItem[@type="host"]/oxns:part/oxns:extent[@unit="pages"]/oxns:end'
154
+ # end
155
+ # end
156
+ end