gutenberg_rdf 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5eb92b0007804d96f27544f3e4248ed20d9a6a33
4
- data.tar.gz: 4d58bb7b868157dfe461a7a0633ed75eb7933af2
3
+ metadata.gz: 26b9401e7a3223ad122100f096eebf5585a97e0e
4
+ data.tar.gz: 5fe2df876d2878fd9628edf6367c2d2d6c381803
5
5
  SHA512:
6
- metadata.gz: 74a4672ad2629c43853abcd4d15f6c033572ad0f71b0eb5177fa1c84dac697a35a775e0e1ba660fea2920b1edf69066aa2b11429fb31b36d5539c44f3d99fcfb
7
- data.tar.gz: bcf6c1bc87aa2e6692ae7db25ae69d2a137467dba227e0702eacc02d8345e901dda2de9f6f5e7fedf32bba98ee47fc639f8008d9dcd55d2757644dccbc6321e7
6
+ metadata.gz: 7fa836f7d515c77d2418e700b43ac00e1e4cef0809b967ed2a58ce929f7edb7b68c6d33a64c7f20920f775cf7b07a5e5a8d6d63e0df5a4f5f6efdb4b0a5e6cc0
7
+ data.tar.gz: 80db01f7fc89fc2bbfb4536454a8b37fa962328e3a93ac1a487c009a92963549fa241e938bedbabedfb85a997911a3b29ccde294ed07502ddb0907b02eeccbd7
data/README.md CHANGED
@@ -1,11 +1,18 @@
1
1
  # Gutenberg RDF
2
2
 
3
3
  Gutenberg RDF is a Ruby wrapper for the Project Gutenberg RDF catalog book files,
4
- providing a nice API to all the metadata contained within.
4
+ providing an API to all the metadata contained within.
5
+
6
+ The official RDF catalog can be found here: http://www.gutenberg.org/wiki/Gutenberg:Feeds
7
+
5
8
 
6
9
  ## Requirements
7
10
 
8
- * Ruby 2.0 - this is so we get UTF-8 by default
11
+ * Ruby 2.0 - this is so we get UTF-8 by default
12
+
13
+ NOTE: In around April 2014 PG changed the XML format in their RDF files
14
+ considerably, so this GEM will no longer work on files from before that date.
15
+ Please make sure to download the latest catalog using the link above.
9
16
 
10
17
 
11
18
  ## Installation
data/bin/rdf ADDED
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH << File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+
5
+ require "gutenberg_rdf"
6
+
7
+ unless ARGV.last && File.exist?(ARGV.last)
8
+ puts
9
+ puts "ERROR: Please provide the full path to an RDF file:\n\n$ bin/rdf /gutenberg/pg1.rdf"
10
+ puts
11
+ exit 1
12
+ end
13
+
14
+ rdf = GutenbergRdf.parse(ARGV.last)
15
+
16
+ puts " ID: #{rdf.id}"
17
+ puts " Type: #{rdf.type}"
18
+ puts " Title: #{rdf.title}"
19
+ puts " Subtitle: #{rdf.subtitle}"
20
+ puts " Authors: #{rdf.authors.join(', ')}"
21
+ puts "Published: #{rdf.published}"
22
+ puts " Language: #{rdf.language}"
23
+ puts " Rights: #{rdf.rights}"
24
+ puts "Bookcover: #{rdf.covers.first}"
25
+ puts " eBooks: \n#{rdf.ebooks.map{|b| " #{b.uri}" }.join("\n")}"
26
+ puts " Subjects: \n #{rdf.subjects.join("\n ")}"
@@ -48,7 +48,7 @@ module GutenbergRdf
48
48
  end
49
49
 
50
50
  def language
51
- xml.elements['pgterms:ebook/dcterms:language'].text
51
+ xml.elements['pgterms:ebook/dcterms:language/rdf:Description/rdf:value'].text
52
52
  end
53
53
 
54
54
  def rights
@@ -56,13 +56,14 @@ module GutenbergRdf
56
56
  end
57
57
 
58
58
  def covers
59
- official_cover_images.concat(other_cover_images).sort.uniq
59
+ official_cover_images.concat(other_cover_images).uniq
60
60
  end
61
61
 
62
62
  def ebooks
63
63
  files = Array.new
64
- xml.elements.each('pgterms:file') do |file|
65
- files << Media.new(file)
64
+ xml.elements.each('pgterms:ebook/dcterms:hasFormat') do |format|
65
+ file = format.elements['pgterms:file']
66
+ files << Media.new(file) if file.elements['dcterms:format/rdf:Description/rdf:value'].text.match(/\Atext|\Aapplication/)
66
67
  end
67
68
  files
68
69
  end
@@ -85,37 +86,28 @@ module GutenbergRdf
85
86
  title_array.map(&:strip)
86
87
  end
87
88
 
88
- def roles
89
- @roles ||= extract_roles
90
- end
91
-
92
- def extract_roles
93
- entries = Hash.new
94
- xml.elements.each('pgterms:ebook/dcterms:creator') do |entry|
95
- entries["#{entry.attributes['rdf:resource'].sub('2009/agents/', '')}"] = 'aut'
96
- end
97
- xml.elements.each('pgterms:ebook/marcrel:*') do |entry|
98
- entries["#{entry.attributes['rdf:resource'].sub('2009/agents/', '')}"] = entry.name
99
- end
100
- entries
101
- end
102
-
103
89
  def extract_authors
104
- entries = Array.new
105
- xml.elements.each('pgterms:agent') do |agent|
106
- entry = Agent.new(agent)
107
- entry.assign_role(roles)
108
- entries << entry
90
+ agents = Array.new
91
+ xml.elements.each('pgterms:ebook/dcterms:creator') do |contributor|
92
+ agent = Agent.new(contributor.elements['pgterms:agent'])
93
+ agent.role = 'aut'
94
+ agents << agent
109
95
  end
110
- entries
96
+ xml.elements.each('pgterms:ebook/marcrel:*') do |contributor|
97
+ agent = Agent.new(contributor.elements['pgterms:agent'])
98
+ agent.role = contributor.name
99
+ agents << agent
100
+ end
101
+ agents
111
102
  end
112
103
 
113
104
  def official_cover_images
114
105
  entries = Array.new
115
- xml.elements.each('pgterms:file') do |file|
106
+ xml.elements.each('pgterms:ebook/dcterms:hasFormat') do |format|
107
+ file = format.elements['pgterms:file']
116
108
  entries << file.attributes['about'] if file_is_image?(file)
117
109
  end
118
- entries
110
+ entries.sort
119
111
  end
120
112
 
121
113
  def file_is_image?(node)
@@ -132,7 +124,7 @@ module GutenbergRdf
132
124
  cover.sub!(/\Afile:\/\/\/public\/vhost\/g\/gutenberg\/html/, 'http://www.gutenberg.org')
133
125
  entries << cover
134
126
  end
135
- entries
127
+ entries.sort
136
128
  end
137
129
 
138
130
  end
@@ -20,10 +20,6 @@ module GutenbergRdf
20
20
  @role ||= 'oth'
21
21
  end
22
22
 
23
- def assign_role(roles)
24
- self.role = roles["#{id}"]
25
- end
26
-
27
23
  def fullname
28
24
  [firstname, lastname].reject(&:empty?).join(' ')
29
25
  end
@@ -1,3 +1,3 @@
1
1
  module GutenbergRdf
2
- VERSION = "0.3.1"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -3,18 +3,24 @@ require 'spec_helper'
3
3
  module GutenbergRdf
4
4
  class Rdf
5
5
  describe Agent do
6
+ let(:xml) do
7
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:marcrel="http://id.loc.gov/vocabulary/relators/">
8
+ <pgterms:ebook rdf:about="ebooks/99999999">
9
+ <dcterms:creator>
10
+ <pgterms:agent rdf:about="2009/agents/402">
11
+ <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
12
+ <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
13
+ <pgterms:name>Doe, Jon James</pgterms:name>
14
+ <pgterms:alias>Doe, Jon</pgterms:alias>
15
+ <pgterms:alias>Doe, J. J.</pgterms:alias>
16
+ <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Jon_James_Doe"/>
17
+ </pgterms:agent>
18
+ </dcterms:creator>
19
+ </pgterms:ebook>
20
+ </rdf:RDF>'
21
+ end
6
22
  let(:agent) do
7
- xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
8
- <pgterms:agent rdf:about="2009/agents/402">
9
- <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
10
- <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
11
- <pgterms:name>Doe, Jon James</pgterms:name>
12
- <pgterms:alias>Doe, Jon</pgterms:alias>
13
- <pgterms:alias>Doe, J. J.</pgterms:alias>
14
- <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Jon_James_Doe"/>
15
- </pgterms:agent>
16
- </rdf:RDF>'
17
- Agent.new(REXML::Document.new(xml).root.elements['pgterms:agent'])
23
+ Agent.new(REXML::Document.new(xml).root.elements['pgterms:ebook/dcterms:creator/pgterms:agent'])
18
24
  end
19
25
 
20
26
  it "expects an agent ID" do
@@ -25,15 +31,6 @@ module GutenbergRdf
25
31
  expect(agent.role).to eq 'oth'
26
32
  end
27
33
 
28
- describe "Assigning Roles" do
29
- it "assigns the correct value to .role" do
30
- roles = {'402' => 'aut', '116' => 'ctb'}
31
- agent.assign_role(roles)
32
-
33
- expect(agent.role).to eq 'aut'
34
- end
35
- end
36
-
37
34
  it "expects the last name" do
38
35
  expect(agent.lastname).to eq 'Doe'
39
36
  end
@@ -3,23 +3,26 @@ require 'spec_helper'
3
3
  module GutenbergRdf
4
4
  class Rdf
5
5
  describe Media do
6
-
7
6
  let(:xml) do
8
- '<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
9
- <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
10
- <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
11
- <dcterms:format>
12
- <rdf:Description>
13
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
14
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
15
- </rdf:Description>
16
- </dcterms:format>
17
- <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
18
- <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
19
- </pgterms:file>
7
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
8
+ <pgterms:ebook rdf:about="ebooks/98765">
9
+ <dcterms:hasFormat>
10
+ <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
11
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
12
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
13
+ <dcterms:format>
14
+ <rdf:Description rdf:nodeID="N87dfy78yd78s6gsg6f8970d76g0f6d9b">
15
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
16
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
17
+ </rdf:Description>
18
+ </dcterms:format>
19
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
20
+ </pgterms:file>
21
+ </dcterms:hasFormat>
22
+ </pgterms:ebook>
20
23
  </rdf:RDF>'
21
24
  end
22
- let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:file']) }
25
+ let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:ebook/dcterms:hasFormat/pgterms:file']) }
23
26
 
24
27
  it "expects the file URI" do
25
28
  expect(media.uri).to eql 'http://www.gutenberg.org/ebooks/98765.txt.utf-8'
@@ -36,34 +39,6 @@ module GutenbergRdf
36
39
  it "should return the modified datetime" do
37
40
  expect(media.modified.to_s).to eql '2010-02-16T08:29:52-07:00'
38
41
  end
39
-
40
- context "when there are two media types" do
41
- let(:xml) do
42
- '<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
43
- <pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
44
- <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
45
- <dcterms:format>
46
- <rdf:Description>
47
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
48
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
49
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
50
- </rdf:Description>
51
- </dcterms:format>
52
- <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
53
- <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
54
- </pgterms:file>
55
- </rdf:RDF>'
56
- end
57
- let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:file']) }
58
-
59
- it "expects the first entry to be used" do
60
- expect(media.media_type).to eql 'application/zip'
61
- end
62
- it "expects the encoding to be an empty string" do
63
- expect(media.encoding).to eql ''
64
- end
65
- end
66
-
67
42
  end
68
43
  end
69
44
  end
@@ -2,42 +2,49 @@ require 'spec_helper'
2
2
 
3
3
  module GutenbergRdf
4
4
  describe Rdf do
5
- let(:xml) do
6
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
5
+
6
+ describe "basic metadata" do
7
+ let(:xml) do
8
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
7
9
  <pgterms:ebook rdf:about="ebooks/98765">
8
10
  <dcterms:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2006-09-28</dcterms:issued>
9
- <dcterms:language rdf:datatype="http://purl.org/dc/terms/RFC4646">en</dcterms:language>
11
+ <dcterms:language>
12
+ <rdf:Description rdf:nodeID="N88989dfs7984987df987cvcsd876ew79">
13
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/RFC4646">en</rdf:value>
14
+ </rdf:Description>
15
+ </dcterms:language>
10
16
  <dcterms:publisher>Project Gutenberg</dcterms:publisher>
11
17
  <dcterms:rights>Public domain in the USA.</dcterms:rights>
12
18
  </pgterms:ebook>
13
- </rdf:RDF>'
14
- end
15
- let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
19
+ </rdf:RDF>'
20
+ end
21
+ let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
16
22
 
17
- it "expects an id" do
18
- expect(rdf.id).to eql "98765"
19
- end
20
- it "expects a published date" do
21
- expect(rdf.published).to eql "2006-09-28"
22
- end
23
- it "expects a publisher" do
24
- expect(rdf.publisher).to eql "Project Gutenberg"
25
- end
26
- it "expects a language" do
27
- expect(rdf.language).to eql "en"
28
- end
29
- it "expects the rights" do
30
- expect(rdf.rights).to eql "Public domain in the USA."
23
+ it "expects an id" do
24
+ expect(rdf.id).to eql "98765"
25
+ end
26
+ it "expects a published date" do
27
+ expect(rdf.published).to eql "2006-09-28"
28
+ end
29
+ it "expects a publisher" do
30
+ expect(rdf.publisher).to eql "Project Gutenberg"
31
+ end
32
+ it "expects a language" do
33
+ expect(rdf.language).to eql "en"
34
+ end
35
+ it "expects the rights" do
36
+ expect(rdf.rights).to eql "Public domain in the USA."
37
+ end
31
38
  end
32
39
 
33
40
  describe "#type" do
34
41
  let(:xml) do
35
- '<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
42
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
36
43
  <pgterms:ebook rdf:about="ebooks/98765">
37
44
  <dcterms:type>
38
- <rdf:Description>
39
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/DCMIType"/>
45
+ <rdf:Description rdf:nodeID="Nd89943yhljdsf93489ydfs897g7fd897">
40
46
  <rdf:value>Text</rdf:value>
47
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/DCMIType"/>
41
48
  </rdf:Description>
42
49
  </dcterms:type>
43
50
  </pgterms:ebook>
@@ -52,7 +59,7 @@ module GutenbergRdf
52
59
 
53
60
  describe "Titles" do
54
61
  let(:xml) do
55
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
62
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
56
63
  <pgterms:ebook rdf:about="ebooks/98765">
57
64
  <dcterms:title>A Great Title</dcterms:title>
58
65
  </pgterms:ebook>
@@ -69,7 +76,7 @@ module GutenbergRdf
69
76
 
70
77
  context "with a title and subtitle, on separate lines" do
71
78
  let(:xml) do
72
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
79
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
73
80
  <pgterms:ebook rdf:about="ebooks/98765">
74
81
  <dcterms:title>A Great Multi-Title
75
82
  Or, a Subtitle</dcterms:title>
@@ -88,7 +95,7 @@ module GutenbergRdf
88
95
 
89
96
  context "with; title, or, subtitle (we need to split on the 'or')" do
90
97
  let(:xml) do
91
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
98
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
92
99
  <pgterms:ebook rdf:about="ebooks/98765">
93
100
  <dcterms:title>A Great Multi-Title, or, a Subtitle</dcterms:title>
94
101
  </pgterms:ebook>
@@ -106,7 +113,7 @@ module GutenbergRdf
106
113
 
107
114
  context "when title:subtitle are separated by a colon" do
108
115
  let(:xml) do
109
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
116
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
110
117
  <pgterms:ebook rdf:about="ebooks/98765">
111
118
  <dcterms:title>A Great Multi-Title: And a Subtitle</dcterms:title>
112
119
  </pgterms:ebook>
@@ -124,7 +131,7 @@ module GutenbergRdf
124
131
 
125
132
  context "when title; and subtitle are separated by a semi-colon" do
126
133
  let(:xml) do
127
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
134
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
128
135
  <pgterms:ebook rdf:about="ebooks/98765">
129
136
  <dcterms:title>A Great Multi-Title; Or, a Subtitle</dcterms:title>
130
137
  </pgterms:ebook>
@@ -140,12 +147,12 @@ module GutenbergRdf
140
147
 
141
148
  context "...except when subtitles already exists" do
142
149
  let(:xml) do
143
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
144
- <pgterms:ebook rdf:about="ebooks/98765">
145
- <dcterms:title>A Great Multi-Title; and some other text
146
- Then a Subtitle on a newline</dcterms:title>
147
- </pgterms:ebook>
148
- </rdf:RDF>'
150
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
151
+ <pgterms:ebook rdf:about="ebooks/98765">
152
+ <dcterms:title>A Great Multi-Title; and some other text
153
+ Then a Subtitle on a newline</dcterms:title>
154
+ </pgterms:ebook>
155
+ </rdf:RDF>'
149
156
  end
150
157
  let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
151
158
  it "expects a title" do
@@ -160,22 +167,24 @@ module GutenbergRdf
160
167
 
161
168
  describe "#authors" do
162
169
  let(:xml) do
163
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:marcrel="http://id.loc.gov/vocabulary/relators">
170
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:marcrel="http://id.loc.gov/vocabulary/relators/">
164
171
  <pgterms:ebook rdf:about="ebooks/99999999">
165
- <marcrel:ctb rdf:resource="2009/agents/402"/>
166
- <dcterms:creator rdf:resource="2009/agents/116"/>
172
+ <dcterms:creator>
173
+ <pgterms:agent rdf:about="2009/agents/116">
174
+ <pgterms:alias>Verschillende</pgterms:alias>
175
+ <pgterms:name>Various</pgterms:name>
176
+ </pgterms:agent>
177
+ </dcterms:creator>
178
+ <marcrel:ctb>
179
+ <pgterms:agent rdf:about="2009/agents/402">
180
+ <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:deathdate>
181
+ <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:birthdate>
182
+ <pgterms:name>Dodge, Mary Mapes</pgterms:name>
183
+ <pgterms:alias>Dodge, Mary</pgterms:alias>
184
+ <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Mary_Mapes_Dodge"/>
185
+ </pgterms:agent>
186
+ </marcrel:ctb>
167
187
  </pgterms:ebook>
168
- <pgterms:agent rdf:about="2009/agents/402">
169
- <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
170
- <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
171
- <pgterms:name>Dodge, Mary Mapes</pgterms:name>
172
- <pgterms:alias>Dodge, Mary</pgterms:alias>
173
- <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Mary_Mapes_Dodge"/>
174
- </pgterms:agent>
175
- <pgterms:agent rdf:about="2009/agents/116">
176
- <pgterms:alias>Verschillende</pgterms:alias>
177
- <pgterms:name>Various</pgterms:name>
178
- </pgterms:agent>
179
188
  </rdf:RDF>'
180
189
  end
181
190
  let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
@@ -183,35 +192,35 @@ module GutenbergRdf
183
192
  it "returns the correct number of authors" do
184
193
  expect(rdf.authors.count).to be 2
185
194
  end
186
- it "expects an author object" do
187
- expect(rdf.authors.first.class).to be Rdf::Agent
188
- end
189
- it "has the correct author names" do
190
- expect(rdf.authors.first.fullname).to eq 'Mary Mapes Dodge'
195
+ it "expects an Agent object" do
196
+ expect(rdf.authors[0]).to be_an_instance_of Rdf::Agent
191
197
  end
192
198
  it "expects the author to have an aut role" do
193
- expect(rdf.authors.last.role).to eq 'aut'
199
+ expect(rdf.authors[0].role).to eq 'aut'
200
+ end
201
+ it "has the correct author names" do
202
+ expect(rdf.authors[1].fullname).to eq 'Mary Mapes Dodge'
194
203
  end
195
204
  it "expects other agents to have the correct role" do
196
- expect(rdf.authors.first.role).to eq 'ctb'
205
+ expect(rdf.authors[1].role).to eq 'ctb'
197
206
  end
198
207
  end
199
208
 
200
209
  describe "#subjects" do
201
210
  let(:xml) do
202
- %q{<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
211
+ %q{<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
203
212
  <pgterms:ebook rdf:about="ebooks/98765">
204
213
  <dcterms:subject>
205
- <rdf:Description>
206
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
207
- <rdf:value>Children's literature -- Periodicals</rdf:value>
208
- <rdf:value>Children's periodicals, American</rdf:value>
214
+ <rdf:Description rdf:nodeID="Ndfsc8xdsfwar734897n7sdofyhod11b9">
215
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCC"/>
216
+ <rdf:value>PZ</rdf:value>
209
217
  </rdf:Description>
210
218
  </dcterms:subject>
211
219
  <dcterms:subject>
212
- <rdf:Description>
213
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCC"/>
214
- <rdf:value>PZ</rdf:value>
220
+ <rdf:Description rdf:nodeID="Ndfcdh8934hsdljkfh98y89hlfhltyab8">
221
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
222
+ <rdf:value>Children's literature -- Periodicals</rdf:value>
223
+ <rdf:value>Children's periodicals, American</rdf:value>
215
224
  </rdf:Description>
216
225
  </dcterms:subject>
217
226
  </pgterms:ebook>
@@ -230,69 +239,63 @@ module GutenbergRdf
230
239
  describe "#covers" do
231
240
  describe "official PG covers" do
232
241
  let(:xml) do
233
- '<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
234
- <pgterms:ebook rdf:about="ebooks/12345">
235
- <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/12345.epub.noimages"/>
236
- <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/12345.cover.medium"/>
237
- <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/12345.cover.small"/>
238
- <pgterms:marc901>http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg</pgterms:marc901>
242
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
243
+ <pgterms:ebook rdf:about="ebooks/98765">
244
+ <dcterms:hasFormat>
245
+ <pgterms:file rdf:about="http://www.gutenberg.org/cache/epub/98765/pg98765.cover.small.jpg">
246
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">2699</dcterms:extent>
247
+ <dcterms:format>
248
+ <rdf:Description rdf:nodeID="N9u34589eyfdiuy8934y787d8f97sg786">
249
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
250
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
251
+ </rdf:Description>
252
+ </dcterms:format>
253
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
254
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-03-25T20:57:55.668737</dcterms:modified>
255
+ </pgterms:file>
256
+ </dcterms:hasFormat>
257
+ <pgterms:marc901>http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
258
+ <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
259
+ <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-rst/images/cover.jpg</pgterms:marc901>
260
+ <dcterms:hasFormat>
261
+ <pgterms:file rdf:about="http://www.gutenberg.org/cache/epub/98765/pg98765.cover.medium.jpg">
262
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-03-25T20:57:55.889736</dcterms:modified>
263
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
264
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">10856</dcterms:extent>
265
+ <dcterms:format>
266
+ <rdf:Description rdf:nodeID="N8df89ys8993p4qu89uenf89dusp38a07">
267
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
268
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
269
+ </rdf:Description>
270
+ </dcterms:format>
271
+ </pgterms:file>
272
+ </dcterms:hasFormat>
239
273
  </pgterms:ebook>
240
- <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.epub.noimages">
241
- <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">92652</dcterms:extent>
242
- <dcterms:format>
243
- <rdf:Description>
244
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
245
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/epub+zip</rdf:value>
246
- </rdf:Description>
247
- </dcterms:format>
248
- <dcterms:isFormatOf rdf:resource="ebooks/12345"/>
249
- <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:32.115259</dcterms:modified>
250
- </pgterms:file>
251
- <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.cover.medium">
252
- <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">10856</dcterms:extent>
253
- <dcterms:format>
254
- <rdf:Description>
255
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
256
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
257
- </rdf:Description>
258
- </dcterms:format>
259
- <dcterms:isFormatOf rdf:resource="ebooks/12345"/>
260
- <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:34.484114</dcterms:modified>
261
- </pgterms:file>
262
- <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.cover.small">
263
- <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1904</dcterms:extent>
264
- <dcterms:format>
265
- <rdf:Description>
266
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
267
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
268
- </rdf:Description>
269
- </dcterms:format>
270
- <dcterms:isFormatOf rdf:resource="ebooks/12345"/>
271
- <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:34.379124</dcterms:modified>
272
- </pgterms:file>
273
274
  </rdf:RDF>'
274
275
  end
275
276
  let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
276
277
 
277
278
  it "expects the correct number of entries returned" do
278
- expect(rdf.covers.count).to be 3
279
+ expect(rdf.covers.count).to be 4
280
+ end
281
+ it "expect medium cover url to be first in the list" do
282
+ expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/cache/epub/98765/pg98765.cover.medium.jpg'
279
283
  end
280
- it "expects those to be used" do
281
- expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/ebooks/12345.cover.medium'
282
- expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/ebooks/12345.cover.small'
284
+ it "expect the small cover url after the medium" do
285
+ expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/cache/epub/98765/pg98765.cover.small.jpg'
283
286
  end
284
287
  it "expects any other images to be listed after the official ones" do
285
- expect(rdf.covers[2]).to eql 'http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg'
288
+ expect(rdf.covers[2]).to eql 'http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg'
286
289
  end
287
290
  end
288
291
 
289
292
  describe "HTML ebook cover image" do
290
293
  let(:xml) do
291
- '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
292
- <pgterms:ebook rdf:about="ebooks/12345">
293
- <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/12345/12345-rst/images/cover.jpg</pgterms:marc901>
294
- <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/12345/12345-h/images/cover.jpg</pgterms:marc901>
295
- <pgterms:marc901>http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg</pgterms:marc901>
294
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
295
+ <pgterms:ebook rdf:about="ebooks/98765">
296
+ <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-rst/images/cover.jpg</pgterms:marc901>
297
+ <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
298
+ <pgterms:marc901>http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
296
299
  </pgterms:ebook>
297
300
  </rdf:RDF>'
298
301
  end
@@ -304,43 +307,52 @@ module GutenbergRdf
304
307
  it "should convert File URIs to the Gutenberg URL" do
305
308
  expect(rdf.covers.first).to match 'http://www.gutenberg.org'
306
309
  end
307
- it "expects the covers to be listed in the correct order" do
308
- expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg'
309
- expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/files/12345/12345-rst/images/cover.jpg'
310
+ it "expects the HTML cover to be listed first" do
311
+ expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg'
312
+ end
313
+ it "expects the RST cover to be listed after the HTML" do
314
+ expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/files/98765/98765-rst/images/cover.jpg'
310
315
  end
311
316
  end
312
317
  end
313
318
 
314
319
  describe "#ebook" do
315
320
  let(:xml) do
316
- '<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
321
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
317
322
  <pgterms:ebook rdf:about="ebooks/98765">
318
- <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/98765.txt.utf-8"/>
319
- <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/98765.zip"/>
323
+ <dcterms:hasFormat>
324
+ <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
325
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
326
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
327
+ <dcterms:format>
328
+ <rdf:Description rdf:nodeID="N87dfy78yd78s6gsg6f8970d76g0f6d9b">
329
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain</rdf:value>
330
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
331
+ </rdf:Description>
332
+ </dcterms:format>
333
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
334
+ </pgterms:file>
335
+ </dcterms:hasFormat>
336
+ <dcterms:hasFormat>
337
+ <pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
338
+ <dcterms:format>
339
+ <rdf:Description rdf:nodeID="Ndfsd78tf34tukjehdsouyo4yrefh6dea">
340
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
341
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
342
+ </rdf:Description>
343
+ </dcterms:format>
344
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
345
+ <dcterms:format>
346
+ <rdf:Description rdf:nodeID="Nfy7we43yhluwe9syrqyp2ewufy0f6d1e">
347
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
348
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
349
+ </rdf:Description>
350
+ </dcterms:format>
351
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
352
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
353
+ </pgterms:file>
354
+ </dcterms:hasFormat>
320
355
  </pgterms:ebook>
321
- <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
322
- <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
323
- <dcterms:format>
324
- <rdf:Description>
325
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
326
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
327
- </rdf:Description>
328
- </dcterms:format>
329
- <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
330
- <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
331
- </pgterms:file>
332
- <pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
333
- <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
334
- <dcterms:format>
335
- <rdf:Description>
336
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
337
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
338
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
339
- </rdf:Description>
340
- </dcterms:format>
341
- <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
342
- <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
343
- </pgterms:file>
344
356
  </rdf:RDF>'
345
357
  end
346
358
  let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
@@ -349,7 +361,50 @@ module GutenbergRdf
349
361
  expect(rdf.ebooks.count).to be 2
350
362
  end
351
363
  it "expects an entry to be a Media class" do
352
- expect(rdf.ebooks.first.class).to be Rdf::Media
364
+ expect(rdf.ebooks.first).to be_an_instance_of Rdf::Media
365
+ end
366
+
367
+ context "only collect ebook media files" do
368
+ let(:xml) do
369
+ '<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
370
+ <pgterms:ebook rdf:about="ebooks/98765">
371
+ <dcterms:hasFormat>
372
+ <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
373
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
374
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
375
+ <dcterms:format>
376
+ <rdf:Description rdf:nodeID="N87dfy78yd78s6gsg6f8970d76g0f6d9b">
377
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain</rdf:value>
378
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
379
+ </rdf:Description>
380
+ </dcterms:format>
381
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
382
+ </pgterms:file>
383
+ </dcterms:hasFormat>
384
+ <dcterms:hasFormat>
385
+ <pgterms:file rdf:about="http://www.gutenberg.org/cache/epub/98765/pg98765.cover.small.jpg">
386
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">2699</dcterms:extent>
387
+ <dcterms:format>
388
+ <rdf:Description rdf:nodeID="N9u34589eyfdiuy8934y787d8f97sg786">
389
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
390
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
391
+ </rdf:Description>
392
+ </dcterms:format>
393
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
394
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-03-25T20:57:55.668737</dcterms:modified>
395
+ </pgterms:file>
396
+ </dcterms:hasFormat>
397
+ </pgterms:ebook>
398
+ </rdf:RDF>'
399
+ end
400
+ let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
401
+
402
+ it "only extracts one media file" do
403
+ expect(rdf.ebooks.count).to be 1
404
+ end
405
+ it "expects the media type to be for an ebook" do
406
+ expect(rdf.ebooks[0].media_type).to eq 'text/plain'
407
+ end
353
408
  end
354
409
  end
355
410
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gutenberg_rdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Cook
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-07 00:00:00.000000000 Z
11
+ date: 2014-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -28,7 +28,8 @@ description: A Ruby wrapper for the Project Gutenberg RDF catalog files (require
28
28
  Ruby 2).
29
29
  email:
30
30
  - m@mikecook.co.uk
31
- executables: []
31
+ executables:
32
+ - rdf
32
33
  extensions: []
33
34
  extra_rdoc_files: []
34
35
  files:
@@ -38,6 +39,7 @@ files:
38
39
  - LICENSE.txt
39
40
  - README.md
40
41
  - Rakefile
42
+ - bin/rdf
41
43
  - gutenberg_rdf.gemspec
42
44
  - lib/gutenberg_rdf.rb
43
45
  - lib/gutenberg_rdf/rdf.rb
@@ -69,7 +71,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
69
71
  version: '0'
70
72
  requirements: []
71
73
  rubyforge_project:
72
- rubygems_version: 2.2.1
74
+ rubygems_version: 2.3.0
73
75
  signing_key:
74
76
  specification_version: 4
75
77
  summary: A Ruby wrapper for the Project Gutenberg RDF catalog files.
@@ -79,3 +81,4 @@ test_files:
79
81
  - spec/gutenberg_rdf/rdf_spec.rb
80
82
  - spec/gutenberg_rdf_spec.rb
81
83
  - spec/spec_helper.rb
84
+ has_rdoc: