gutenberg_rdf 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5eb92b0007804d96f27544f3e4248ed20d9a6a33
4
- data.tar.gz: 4d58bb7b868157dfe461a7a0633ed75eb7933af2
3
+ metadata.gz: 26b9401e7a3223ad122100f096eebf5585a97e0e
4
+ data.tar.gz: 5fe2df876d2878fd9628edf6367c2d2d6c381803
5
5
  SHA512:
6
- metadata.gz: 74a4672ad2629c43853abcd4d15f6c033572ad0f71b0eb5177fa1c84dac697a35a775e0e1ba660fea2920b1edf69066aa2b11429fb31b36d5539c44f3d99fcfb
7
- data.tar.gz: bcf6c1bc87aa2e6692ae7db25ae69d2a137467dba227e0702eacc02d8345e901dda2de9f6f5e7fedf32bba98ee47fc639f8008d9dcd55d2757644dccbc6321e7
6
+ metadata.gz: 7fa836f7d515c77d2418e700b43ac00e1e4cef0809b967ed2a58ce929f7edb7b68c6d33a64c7f20920f775cf7b07a5e5a8d6d63e0df5a4f5f6efdb4b0a5e6cc0
7
+ data.tar.gz: 80db01f7fc89fc2bbfb4536454a8b37fa962328e3a93ac1a487c009a92963549fa241e938bedbabedfb85a997911a3b29ccde294ed07502ddb0907b02eeccbd7
data/README.md CHANGED
@@ -1,11 +1,18 @@
1
1
  # Gutenberg RDF
2
2
 
3
3
  Gutenberg RDF is a Ruby wrapper for the Project Gutenberg RDF catalog book files,
4
- providing a nice API to all the metadata contained within.
4
+ providing an API to all the metadata contained within.
5
+
6
+ The official RDF catalog can be found here: http://www.gutenberg.org/wiki/Gutenberg:Feeds
7
+
5
8
 
6
9
  ## Requirements
7
10
 
8
- * Ruby 2.0 - this is so we get UTF-8 by default
11
+ * Ruby 2.0 - this is so we get UTF-8 by default
12
+
13
+ NOTE: In around April 2014 PG changed the XML format in their RDF files
14
+ considerably, so this GEM will no longer work on files from before that date.
15
+ Please make sure to download the latest catalog using the link above.
9
16
 
10
17
 
11
18
  ## Installation
data/bin/rdf ADDED
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH << File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+
5
+ require "gutenberg_rdf"
6
+
7
+ unless ARGV.last && File.exist?(ARGV.last)
8
+ puts
9
+ puts "ERROR: Please provide the full path to an RDF file:\n\n$ bin/rdf /gutenberg/pg1.rdf"
10
+ puts
11
+ exit 1
12
+ end
13
+
14
+ rdf = GutenbergRdf.parse(ARGV.last)
15
+
16
+ puts " ID: #{rdf.id}"
17
+ puts " Type: #{rdf.type}"
18
+ puts " Title: #{rdf.title}"
19
+ puts " Subtitle: #{rdf.subtitle}"
20
+ puts " Authors: #{rdf.authors.join(', ')}"
21
+ puts "Published: #{rdf.published}"
22
+ puts " Language: #{rdf.language}"
23
+ puts " Rights: #{rdf.rights}"
24
+ puts "Bookcover: #{rdf.covers.first}"
25
+ puts " eBooks: \n#{rdf.ebooks.map{|b| " #{b.uri}" }.join("\n")}"
26
+ puts " Subjects: \n #{rdf.subjects.join("\n ")}"
@@ -48,7 +48,7 @@ module GutenbergRdf
48
48
  end
49
49
 
50
50
  def language
51
- xml.elements['pgterms:ebook/dcterms:language'].text
51
+ xml.elements['pgterms:ebook/dcterms:language/rdf:Description/rdf:value'].text
52
52
  end
53
53
 
54
54
  def rights
@@ -56,13 +56,14 @@ module GutenbergRdf
56
56
  end
57
57
 
58
58
  def covers
59
- official_cover_images.concat(other_cover_images).sort.uniq
59
+ official_cover_images.concat(other_cover_images).uniq
60
60
  end
61
61
 
62
62
  def ebooks
63
63
  files = Array.new
64
- xml.elements.each('pgterms:file') do |file|
65
- files << Media.new(file)
64
+ xml.elements.each('pgterms:ebook/dcterms:hasFormat') do |format|
65
+ file = format.elements['pgterms:file']
66
+ files << Media.new(file) if file.elements['dcterms:format/rdf:Description/rdf:value'].text.match(/\Atext|\Aapplication/)
66
67
  end
67
68
  files
68
69
  end
@@ -85,37 +86,28 @@ module GutenbergRdf
85
86
  title_array.map(&:strip)
86
87
  end
87
88
 
88
- def roles
89
- @roles ||= extract_roles
90
- end
91
-
92
- def extract_roles
93
- entries = Hash.new
94
- xml.elements.each('pgterms:ebook/dcterms:creator') do |entry|
95
- entries["#{entry.attributes['rdf:resource'].sub('2009/agents/', '')}"] = 'aut'
96
- end
97
- xml.elements.each('pgterms:ebook/marcrel:*') do |entry|
98
- entries["#{entry.attributes['rdf:resource'].sub('2009/agents/', '')}"] = entry.name
99
- end
100
- entries
101
- end
102
-
103
89
  def extract_authors
104
- entries = Array.new
105
- xml.elements.each('pgterms:agent') do |agent|
106
- entry = Agent.new(agent)
107
- entry.assign_role(roles)
108
- entries << entry
90
+ agents = Array.new
91
+ xml.elements.each('pgterms:ebook/dcterms:creator') do |contributor|
92
+ agent = Agent.new(contributor.elements['pgterms:agent'])
93
+ agent.role = 'aut'
94
+ agents << agent
109
95
  end
110
- entries
96
+ xml.elements.each('pgterms:ebook/marcrel:*') do |contributor|
97
+ agent = Agent.new(contributor.elements['pgterms:agent'])
98
+ agent.role = contributor.name
99
+ agents << agent
100
+ end
101
+ agents
111
102
  end
112
103
 
113
104
  def official_cover_images
114
105
  entries = Array.new
115
- xml.elements.each('pgterms:file') do |file|
106
+ xml.elements.each('pgterms:ebook/dcterms:hasFormat') do |format|
107
+ file = format.elements['pgterms:file']
116
108
  entries << file.attributes['about'] if file_is_image?(file)
117
109
  end
118
- entries
110
+ entries.sort
119
111
  end
120
112
 
121
113
  def file_is_image?(node)
@@ -132,7 +124,7 @@ module GutenbergRdf
132
124
  cover.sub!(/\Afile:\/\/\/public\/vhost\/g\/gutenberg\/html/, 'http://www.gutenberg.org')
133
125
  entries << cover
134
126
  end
135
- entries
127
+ entries.sort
136
128
  end
137
129
 
138
130
  end
@@ -20,10 +20,6 @@ module GutenbergRdf
20
20
  @role ||= 'oth'
21
21
  end
22
22
 
23
- def assign_role(roles)
24
- self.role = roles["#{id}"]
25
- end
26
-
27
23
  def fullname
28
24
  [firstname, lastname].reject(&:empty?).join(' ')
29
25
  end
@@ -1,3 +1,3 @@
1
1
  module GutenbergRdf
2
- VERSION = "0.3.1"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -3,18 +3,24 @@ require 'spec_helper'
3
3
  module GutenbergRdf
4
4
  class Rdf
5
5
  describe Agent do
6
+ let(:xml) do
7
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:marcrel="http://id.loc.gov/vocabulary/relators/">
8
+ <pgterms:ebook rdf:about="ebooks/99999999">
9
+ <dcterms:creator>
10
+ <pgterms:agent rdf:about="2009/agents/402">
11
+ <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
12
+ <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
13
+ <pgterms:name>Doe, Jon James</pgterms:name>
14
+ <pgterms:alias>Doe, Jon</pgterms:alias>
15
+ <pgterms:alias>Doe, J. J.</pgterms:alias>
16
+ <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Jon_James_Doe"/>
17
+ </pgterms:agent>
18
+ </dcterms:creator>
19
+ </pgterms:ebook>
20
+ </rdf:RDF>'
21
+ end
6
22
  let(:agent) do
7
- xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
8
- <pgterms:agent rdf:about="2009/agents/402">
9
- <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
10
- <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
11
- <pgterms:name>Doe, Jon James</pgterms:name>
12
- <pgterms:alias>Doe, Jon</pgterms:alias>
13
- <pgterms:alias>Doe, J. J.</pgterms:alias>
14
- <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Jon_James_Doe"/>
15
- </pgterms:agent>
16
- </rdf:RDF>'
17
- Agent.new(REXML::Document.new(xml).root.elements['pgterms:agent'])
23
+ Agent.new(REXML::Document.new(xml).root.elements['pgterms:ebook/dcterms:creator/pgterms:agent'])
18
24
  end
19
25
 
20
26
  it "expects an agent ID" do
@@ -25,15 +31,6 @@ module GutenbergRdf
25
31
  expect(agent.role).to eq 'oth'
26
32
  end
27
33
 
28
- describe "Assigning Roles" do
29
- it "assigns the correct value to .role" do
30
- roles = {'402' => 'aut', '116' => 'ctb'}
31
- agent.assign_role(roles)
32
-
33
- expect(agent.role).to eq 'aut'
34
- end
35
- end
36
-
37
34
  it "expects the last name" do
38
35
  expect(agent.lastname).to eq 'Doe'
39
36
  end
@@ -3,23 +3,26 @@ require 'spec_helper'
3
3
  module GutenbergRdf
4
4
  class Rdf
5
5
  describe Media do
6
-
7
6
  let(:xml) do
8
- '<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
9
- <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
10
- <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
11
- <dcterms:format>
12
- <rdf:Description>
13
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
14
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
15
- </rdf:Description>
16
- </dcterms:format>
17
- <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
18
- <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
19
- </pgterms:file>
7
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
8
+ <pgterms:ebook rdf:about="ebooks/98765">
9
+ <dcterms:hasFormat>
10
+ <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
11
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
12
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
13
+ <dcterms:format>
14
+ <rdf:Description rdf:nodeID="N87dfy78yd78s6gsg6f8970d76g0f6d9b">
15
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
16
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
17
+ </rdf:Description>
18
+ </dcterms:format>
19
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
20
+ </pgterms:file>
21
+ </dcterms:hasFormat>
22
+ </pgterms:ebook>
20
23
  </rdf:RDF>'
21
24
  end
22
- let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:file']) }
25
+ let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:ebook/dcterms:hasFormat/pgterms:file']) }
23
26
 
24
27
  it "expects the file URI" do
25
28
  expect(media.uri).to eql 'http://www.gutenberg.org/ebooks/98765.txt.utf-8'
@@ -36,34 +39,6 @@ module GutenbergRdf
36
39
  it "should return the modified datetime" do
37
40
  expect(media.modified.to_s).to eql '2010-02-16T08:29:52-07:00'
38
41
  end
39
-
40
- context "when there are two media types" do
41
- let(:xml) do
42
- '<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
43
- <pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
44
- <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
45
- <dcterms:format>
46
- <rdf:Description>
47
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
48
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
49
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
50
- </rdf:Description>
51
- </dcterms:format>
52
- <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
53
- <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
54
- </pgterms:file>
55
- </rdf:RDF>'
56
- end
57
- let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:file']) }
58
-
59
- it "expects the first entry to be used" do
60
- expect(media.media_type).to eql 'application/zip'
61
- end
62
- it "expects the encoding to be an empty string" do
63
- expect(media.encoding).to eql ''
64
- end
65
- end
66
-
67
42
  end
68
43
  end
69
44
  end
@@ -2,42 +2,49 @@ require 'spec_helper'
2
2
 
3
3
  module GutenbergRdf
4
4
  describe Rdf do
5
- let(:xml) do
6
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
5
+
6
+ describe "basic metadata" do
7
+ let(:xml) do
8
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
7
9
  <pgterms:ebook rdf:about="ebooks/98765">
8
10
  <dcterms:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2006-09-28</dcterms:issued>
9
- <dcterms:language rdf:datatype="http://purl.org/dc/terms/RFC4646">en</dcterms:language>
11
+ <dcterms:language>
12
+ <rdf:Description rdf:nodeID="N88989dfs7984987df987cvcsd876ew79">
13
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/RFC4646">en</rdf:value>
14
+ </rdf:Description>
15
+ </dcterms:language>
10
16
  <dcterms:publisher>Project Gutenberg</dcterms:publisher>
11
17
  <dcterms:rights>Public domain in the USA.</dcterms:rights>
12
18
  </pgterms:ebook>
13
- </rdf:RDF>'
14
- end
15
- let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
19
+ </rdf:RDF>'
20
+ end
21
+ let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
16
22
 
17
- it "expects an id" do
18
- expect(rdf.id).to eql "98765"
19
- end
20
- it "expects a published date" do
21
- expect(rdf.published).to eql "2006-09-28"
22
- end
23
- it "expects a publisher" do
24
- expect(rdf.publisher).to eql "Project Gutenberg"
25
- end
26
- it "expects a language" do
27
- expect(rdf.language).to eql "en"
28
- end
29
- it "expects the rights" do
30
- expect(rdf.rights).to eql "Public domain in the USA."
23
+ it "expects an id" do
24
+ expect(rdf.id).to eql "98765"
25
+ end
26
+ it "expects a published date" do
27
+ expect(rdf.published).to eql "2006-09-28"
28
+ end
29
+ it "expects a publisher" do
30
+ expect(rdf.publisher).to eql "Project Gutenberg"
31
+ end
32
+ it "expects a language" do
33
+ expect(rdf.language).to eql "en"
34
+ end
35
+ it "expects the rights" do
36
+ expect(rdf.rights).to eql "Public domain in the USA."
37
+ end
31
38
  end
32
39
 
33
40
  describe "#type" do
34
41
  let(:xml) do
35
- '<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
42
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
36
43
  <pgterms:ebook rdf:about="ebooks/98765">
37
44
  <dcterms:type>
38
- <rdf:Description>
39
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/DCMIType"/>
45
+ <rdf:Description rdf:nodeID="Nd89943yhljdsf93489ydfs897g7fd897">
40
46
  <rdf:value>Text</rdf:value>
47
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/DCMIType"/>
41
48
  </rdf:Description>
42
49
  </dcterms:type>
43
50
  </pgterms:ebook>
@@ -52,7 +59,7 @@ module GutenbergRdf
52
59
 
53
60
  describe "Titles" do
54
61
  let(:xml) do
55
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
62
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
56
63
  <pgterms:ebook rdf:about="ebooks/98765">
57
64
  <dcterms:title>A Great Title</dcterms:title>
58
65
  </pgterms:ebook>
@@ -69,7 +76,7 @@ module GutenbergRdf
69
76
 
70
77
  context "with a title and subtitle, on separate lines" do
71
78
  let(:xml) do
72
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
79
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
73
80
  <pgterms:ebook rdf:about="ebooks/98765">
74
81
  <dcterms:title>A Great Multi-Title
75
82
  Or, a Subtitle</dcterms:title>
@@ -88,7 +95,7 @@ module GutenbergRdf
88
95
 
89
96
  context "with; title, or, subtitle (we need to split on the 'or')" do
90
97
  let(:xml) do
91
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
98
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
92
99
  <pgterms:ebook rdf:about="ebooks/98765">
93
100
  <dcterms:title>A Great Multi-Title, or, a Subtitle</dcterms:title>
94
101
  </pgterms:ebook>
@@ -106,7 +113,7 @@ module GutenbergRdf
106
113
 
107
114
  context "when title:subtitle are separated by a colon" do
108
115
  let(:xml) do
109
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
116
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
110
117
  <pgterms:ebook rdf:about="ebooks/98765">
111
118
  <dcterms:title>A Great Multi-Title: And a Subtitle</dcterms:title>
112
119
  </pgterms:ebook>
@@ -124,7 +131,7 @@ module GutenbergRdf
124
131
 
125
132
  context "when title; and subtitle are separated by a semi-colon" do
126
133
  let(:xml) do
127
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
134
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
128
135
  <pgterms:ebook rdf:about="ebooks/98765">
129
136
  <dcterms:title>A Great Multi-Title; Or, a Subtitle</dcterms:title>
130
137
  </pgterms:ebook>
@@ -140,12 +147,12 @@ module GutenbergRdf
140
147
 
141
148
  context "...except when subtitles already exists" do
142
149
  let(:xml) do
143
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
144
- <pgterms:ebook rdf:about="ebooks/98765">
145
- <dcterms:title>A Great Multi-Title; and some other text
146
- Then a Subtitle on a newline</dcterms:title>
147
- </pgterms:ebook>
148
- </rdf:RDF>'
150
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
151
+ <pgterms:ebook rdf:about="ebooks/98765">
152
+ <dcterms:title>A Great Multi-Title; and some other text
153
+ Then a Subtitle on a newline</dcterms:title>
154
+ </pgterms:ebook>
155
+ </rdf:RDF>'
149
156
  end
150
157
  let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
151
158
  it "expects a title" do
@@ -160,22 +167,24 @@ module GutenbergRdf
160
167
 
161
168
  describe "#authors" do
162
169
  let(:xml) do
163
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:marcrel="http://id.loc.gov/vocabulary/relators">
170
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:marcrel="http://id.loc.gov/vocabulary/relators/">
164
171
  <pgterms:ebook rdf:about="ebooks/99999999">
165
- <marcrel:ctb rdf:resource="2009/agents/402"/>
166
- <dcterms:creator rdf:resource="2009/agents/116"/>
172
+ <dcterms:creator>
173
+ <pgterms:agent rdf:about="2009/agents/116">
174
+ <pgterms:alias>Verschillende</pgterms:alias>
175
+ <pgterms:name>Various</pgterms:name>
176
+ </pgterms:agent>
177
+ </dcterms:creator>
178
+ <marcrel:ctb>
179
+ <pgterms:agent rdf:about="2009/agents/402">
180
+ <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:deathdate>
181
+ <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:birthdate>
182
+ <pgterms:name>Dodge, Mary Mapes</pgterms:name>
183
+ <pgterms:alias>Dodge, Mary</pgterms:alias>
184
+ <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Mary_Mapes_Dodge"/>
185
+ </pgterms:agent>
186
+ </marcrel:ctb>
167
187
  </pgterms:ebook>
168
- <pgterms:agent rdf:about="2009/agents/402">
169
- <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
170
- <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
171
- <pgterms:name>Dodge, Mary Mapes</pgterms:name>
172
- <pgterms:alias>Dodge, Mary</pgterms:alias>
173
- <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Mary_Mapes_Dodge"/>
174
- </pgterms:agent>
175
- <pgterms:agent rdf:about="2009/agents/116">
176
- <pgterms:alias>Verschillende</pgterms:alias>
177
- <pgterms:name>Various</pgterms:name>
178
- </pgterms:agent>
179
188
  </rdf:RDF>'
180
189
  end
181
190
  let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
@@ -183,35 +192,35 @@ module GutenbergRdf
183
192
  it "returns the correct number of authors" do
184
193
  expect(rdf.authors.count).to be 2
185
194
  end
186
- it "expects an author object" do
187
- expect(rdf.authors.first.class).to be Rdf::Agent
188
- end
189
- it "has the correct author names" do
190
- expect(rdf.authors.first.fullname).to eq 'Mary Mapes Dodge'
195
+ it "expects an Agent object" do
196
+ expect(rdf.authors[0]).to be_an_instance_of Rdf::Agent
191
197
  end
192
198
  it "expects the author to have an aut role" do
193
- expect(rdf.authors.last.role).to eq 'aut'
199
+ expect(rdf.authors[0].role).to eq 'aut'
200
+ end
201
+ it "has the correct author names" do
202
+ expect(rdf.authors[1].fullname).to eq 'Mary Mapes Dodge'
194
203
  end
195
204
  it "expects other agents to have the correct role" do
196
- expect(rdf.authors.first.role).to eq 'ctb'
205
+ expect(rdf.authors[1].role).to eq 'ctb'
197
206
  end
198
207
  end
199
208
 
200
209
  describe "#subjects" do
201
210
  let(:xml) do
202
- %q{<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
211
+ %q{<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
203
212
  <pgterms:ebook rdf:about="ebooks/98765">
204
213
  <dcterms:subject>
205
- <rdf:Description>
206
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
207
- <rdf:value>Children's literature -- Periodicals</rdf:value>
208
- <rdf:value>Children's periodicals, American</rdf:value>
214
+ <rdf:Description rdf:nodeID="Ndfsc8xdsfwar734897n7sdofyhod11b9">
215
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCC"/>
216
+ <rdf:value>PZ</rdf:value>
209
217
  </rdf:Description>
210
218
  </dcterms:subject>
211
219
  <dcterms:subject>
212
- <rdf:Description>
213
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCC"/>
214
- <rdf:value>PZ</rdf:value>
220
+ <rdf:Description rdf:nodeID="Ndfcdh8934hsdljkfh98y89hlfhltyab8">
221
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
222
+ <rdf:value>Children's literature -- Periodicals</rdf:value>
223
+ <rdf:value>Children's periodicals, American</rdf:value>
215
224
  </rdf:Description>
216
225
  </dcterms:subject>
217
226
  </pgterms:ebook>
@@ -230,69 +239,63 @@ module GutenbergRdf
230
239
  describe "#covers" do
231
240
  describe "official PG covers" do
232
241
  let(:xml) do
233
- '<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
234
- <pgterms:ebook rdf:about="ebooks/12345">
235
- <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/12345.epub.noimages"/>
236
- <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/12345.cover.medium"/>
237
- <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/12345.cover.small"/>
238
- <pgterms:marc901>http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg</pgterms:marc901>
242
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
243
+ <pgterms:ebook rdf:about="ebooks/98765">
244
+ <dcterms:hasFormat>
245
+ <pgterms:file rdf:about="http://www.gutenberg.org/cache/epub/98765/pg98765.cover.small.jpg">
246
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">2699</dcterms:extent>
247
+ <dcterms:format>
248
+ <rdf:Description rdf:nodeID="N9u34589eyfdiuy8934y787d8f97sg786">
249
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
250
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
251
+ </rdf:Description>
252
+ </dcterms:format>
253
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
254
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-03-25T20:57:55.668737</dcterms:modified>
255
+ </pgterms:file>
256
+ </dcterms:hasFormat>
257
+ <pgterms:marc901>http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
258
+ <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
259
+ <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-rst/images/cover.jpg</pgterms:marc901>
260
+ <dcterms:hasFormat>
261
+ <pgterms:file rdf:about="http://www.gutenberg.org/cache/epub/98765/pg98765.cover.medium.jpg">
262
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-03-25T20:57:55.889736</dcterms:modified>
263
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
264
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">10856</dcterms:extent>
265
+ <dcterms:format>
266
+ <rdf:Description rdf:nodeID="N8df89ys8993p4qu89uenf89dusp38a07">
267
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
268
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
269
+ </rdf:Description>
270
+ </dcterms:format>
271
+ </pgterms:file>
272
+ </dcterms:hasFormat>
239
273
  </pgterms:ebook>
240
- <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.epub.noimages">
241
- <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">92652</dcterms:extent>
242
- <dcterms:format>
243
- <rdf:Description>
244
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
245
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/epub+zip</rdf:value>
246
- </rdf:Description>
247
- </dcterms:format>
248
- <dcterms:isFormatOf rdf:resource="ebooks/12345"/>
249
- <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:32.115259</dcterms:modified>
250
- </pgterms:file>
251
- <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.cover.medium">
252
- <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">10856</dcterms:extent>
253
- <dcterms:format>
254
- <rdf:Description>
255
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
256
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
257
- </rdf:Description>
258
- </dcterms:format>
259
- <dcterms:isFormatOf rdf:resource="ebooks/12345"/>
260
- <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:34.484114</dcterms:modified>
261
- </pgterms:file>
262
- <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.cover.small">
263
- <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1904</dcterms:extent>
264
- <dcterms:format>
265
- <rdf:Description>
266
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
267
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
268
- </rdf:Description>
269
- </dcterms:format>
270
- <dcterms:isFormatOf rdf:resource="ebooks/12345"/>
271
- <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:34.379124</dcterms:modified>
272
- </pgterms:file>
273
274
  </rdf:RDF>'
274
275
  end
275
276
  let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
276
277
 
277
278
  it "expects the correct number of entries returned" do
278
- expect(rdf.covers.count).to be 3
279
+ expect(rdf.covers.count).to be 4
280
+ end
281
+ it "expect medium cover url to be first in the list" do
282
+ expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/cache/epub/98765/pg98765.cover.medium.jpg'
279
283
  end
280
- it "expects those to be used" do
281
- expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/ebooks/12345.cover.medium'
282
- expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/ebooks/12345.cover.small'
284
+ it "expect the small cover url after the medium" do
285
+ expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/cache/epub/98765/pg98765.cover.small.jpg'
283
286
  end
284
287
  it "expects any other images to be listed after the official ones" do
285
- expect(rdf.covers[2]).to eql 'http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg'
288
+ expect(rdf.covers[2]).to eql 'http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg'
286
289
  end
287
290
  end
288
291
 
289
292
  describe "HTML ebook cover image" do
290
293
  let(:xml) do
291
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
292
- <pgterms:ebook rdf:about="ebooks/12345">
293
- <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/12345/12345-rst/images/cover.jpg</pgterms:marc901>
294
- <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/12345/12345-h/images/cover.jpg</pgterms:marc901>
295
- <pgterms:marc901>http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg</pgterms:marc901>
294
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
295
+ <pgterms:ebook rdf:about="ebooks/98765">
296
+ <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-rst/images/cover.jpg</pgterms:marc901>
297
+ <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
298
+ <pgterms:marc901>http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
296
299
  </pgterms:ebook>
297
300
  </rdf:RDF>'
298
301
  end
@@ -304,43 +307,52 @@ module GutenbergRdf
304
307
  it "should convert File URIs to the Gutenberg URL" do
305
308
  expect(rdf.covers.first).to match 'http://www.gutenberg.org'
306
309
  end
307
- it "expects the covers to be listed in the correct order" do
308
- expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg'
309
- expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/files/12345/12345-rst/images/cover.jpg'
310
+ it "expects the HTML cover to be listed first" do
311
+ expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg'
312
+ end
313
+ it "expects the RST cover to be listed after the HTML" do
314
+ expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/files/98765/98765-rst/images/cover.jpg'
310
315
  end
311
316
  end
312
317
  end
313
318
 
314
319
  describe "#ebook" do
315
320
  let(:xml) do
316
- '<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
321
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
317
322
  <pgterms:ebook rdf:about="ebooks/98765">
318
- <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/98765.txt.utf-8"/>
319
- <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/98765.zip"/>
323
+ <dcterms:hasFormat>
324
+ <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
325
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
326
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
327
+ <dcterms:format>
328
+ <rdf:Description rdf:nodeID="N87dfy78yd78s6gsg6f8970d76g0f6d9b">
329
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain</rdf:value>
330
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
331
+ </rdf:Description>
332
+ </dcterms:format>
333
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
334
+ </pgterms:file>
335
+ </dcterms:hasFormat>
336
+ <dcterms:hasFormat>
337
+ <pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
338
+ <dcterms:format>
339
+ <rdf:Description rdf:nodeID="Ndfsd78tf34tukjehdsouyo4yrefh6dea">
340
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
341
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
342
+ </rdf:Description>
343
+ </dcterms:format>
344
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
345
+ <dcterms:format>
346
+ <rdf:Description rdf:nodeID="Nfy7we43yhluwe9syrqyp2ewufy0f6d1e">
347
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
348
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
349
+ </rdf:Description>
350
+ </dcterms:format>
351
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
352
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
353
+ </pgterms:file>
354
+ </dcterms:hasFormat>
320
355
  </pgterms:ebook>
321
- <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
322
- <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
323
- <dcterms:format>
324
- <rdf:Description>
325
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
326
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
327
- </rdf:Description>
328
- </dcterms:format>
329
- <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
330
- <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
331
- </pgterms:file>
332
- <pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
333
- <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
334
- <dcterms:format>
335
- <rdf:Description>
336
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
337
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
338
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
339
- </rdf:Description>
340
- </dcterms:format>
341
- <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
342
- <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
343
- </pgterms:file>
344
356
  </rdf:RDF>'
345
357
  end
346
358
  let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
@@ -349,7 +361,50 @@ module GutenbergRdf
349
361
  expect(rdf.ebooks.count).to be 2
350
362
  end
351
363
  it "expects an entry to be a Media class" do
352
- expect(rdf.ebooks.first.class).to be Rdf::Media
364
+ expect(rdf.ebooks.first).to be_an_instance_of Rdf::Media
365
+ end
366
+
367
+ context "only collect ebook media files" do
368
+ let(:xml) do
369
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
370
+ <pgterms:ebook rdf:about="ebooks/98765">
371
+ <dcterms:hasFormat>
372
+ <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
373
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
374
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
375
+ <dcterms:format>
376
+ <rdf:Description rdf:nodeID="N87dfy78yd78s6gsg6f8970d76g0f6d9b">
377
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain</rdf:value>
378
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
379
+ </rdf:Description>
380
+ </dcterms:format>
381
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
382
+ </pgterms:file>
383
+ </dcterms:hasFormat>
384
+ <dcterms:hasFormat>
385
+ <pgterms:file rdf:about="http://www.gutenberg.org/cache/epub/98765/pg98765.cover.small.jpg">
386
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">2699</dcterms:extent>
387
+ <dcterms:format>
388
+ <rdf:Description rdf:nodeID="N9u34589eyfdiuy8934y787d8f97sg786">
389
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
390
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
391
+ </rdf:Description>
392
+ </dcterms:format>
393
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
394
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-03-25T20:57:55.668737</dcterms:modified>
395
+ </pgterms:file>
396
+ </dcterms:hasFormat>
397
+ </pgterms:ebook>
398
+ </rdf:RDF>'
399
+ end
400
+ let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
401
+
402
+ it "only extracts one media file" do
403
+ expect(rdf.ebooks.count).to be 1
404
+ end
405
+ it "expects the media type to be for an ebook" do
406
+ expect(rdf.ebooks[0].media_type).to eq 'text/plain'
407
+ end
353
408
  end
354
409
  end
355
410
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gutenberg_rdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Cook
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-07 00:00:00.000000000 Z
11
+ date: 2014-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -28,7 +28,8 @@ description: A Ruby wrapper for the Project Gutenberg RDF catalog files (require
28
28
  Ruby 2).
29
29
  email:
30
30
  - m@mikecook.co.uk
31
- executables: []
31
+ executables:
32
+ - rdf
32
33
  extensions: []
33
34
  extra_rdoc_files: []
34
35
  files:
@@ -38,6 +39,7 @@ files:
38
39
  - LICENSE.txt
39
40
  - README.md
40
41
  - Rakefile
42
+ - bin/rdf
41
43
  - gutenberg_rdf.gemspec
42
44
  - lib/gutenberg_rdf.rb
43
45
  - lib/gutenberg_rdf/rdf.rb
@@ -69,7 +71,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
69
71
  version: '0'
70
72
  requirements: []
71
73
  rubyforge_project:
72
- rubygems_version: 2.2.1
74
+ rubygems_version: 2.3.0
73
75
  signing_key:
74
76
  specification_version: 4
75
77
  summary: A Ruby wrapper for the Project Gutenberg RDF catalog files.
@@ -79,3 +81,4 @@ test_files:
79
81
  - spec/gutenberg_rdf/rdf_spec.rb
80
82
  - spec/gutenberg_rdf_spec.rb
81
83
  - spec/spec_helper.rb
84
+ has_rdoc: