gutenberg_rdf 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -2
- data/bin/rdf +26 -0
- data/lib/gutenberg_rdf/rdf.rb +20 -28
- data/lib/gutenberg_rdf/rdf/agent.rb +0 -4
- data/lib/gutenberg_rdf/version.rb +1 -1
- data/spec/gutenberg_rdf/rdf/agent_spec.rb +17 -20
- data/spec/gutenberg_rdf/rdf/media_spec.rb +17 -42
- data/spec/gutenberg_rdf/rdf_spec.rb +197 -142
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 26b9401e7a3223ad122100f096eebf5585a97e0e
|
4
|
+
data.tar.gz: 5fe2df876d2878fd9628edf6367c2d2d6c381803
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7fa836f7d515c77d2418e700b43ac00e1e4cef0809b967ed2a58ce929f7edb7b68c6d33a64c7f20920f775cf7b07a5e5a8d6d63e0df5a4f5f6efdb4b0a5e6cc0
|
7
|
+
data.tar.gz: 80db01f7fc89fc2bbfb4536454a8b37fa962328e3a93ac1a487c009a92963549fa241e938bedbabedfb85a997911a3b29ccde294ed07502ddb0907b02eeccbd7
|
data/README.md
CHANGED
@@ -1,11 +1,18 @@
|
|
1
1
|
# Gutenberg RDF
|
2
2
|
|
3
3
|
Gutenberg RDF is a Ruby wrapper for the Project Gutenberg RDF catalog book files,
|
4
|
-
providing
|
4
|
+
providing an API to all the metadata contained within.
|
5
|
+
|
6
|
+
The official RDF catalog can be found here: http://www.gutenberg.org/wiki/Gutenberg:Feeds
|
7
|
+
|
5
8
|
|
6
9
|
## Requirements
|
7
10
|
|
8
|
-
*
|
11
|
+
* Ruby 2.0 - this is so we get UTF-8 by default
|
12
|
+
|
13
|
+
NOTE: In around April 2014 PG changed the XML format in their RDF files
|
14
|
+
considerably, so this GEM will no longer work on files from before that date.
|
15
|
+
Please make sure to download the latest catalog using the link above.
|
9
16
|
|
10
17
|
|
11
18
|
## Installation
|
data/bin/rdf
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
|
5
|
+
require "gutenberg_rdf"
|
6
|
+
|
7
|
+
unless ARGV.last && File.exist?(ARGV.last)
|
8
|
+
puts
|
9
|
+
puts "ERROR: Please provide the full path to an RDF file:\n\n$ bin/rdf /gutenberg/pg1.rdf"
|
10
|
+
puts
|
11
|
+
exit 1
|
12
|
+
end
|
13
|
+
|
14
|
+
rdf = GutenbergRdf.parse(ARGV.last)
|
15
|
+
|
16
|
+
puts " ID: #{rdf.id}"
|
17
|
+
puts " Type: #{rdf.type}"
|
18
|
+
puts " Title: #{rdf.title}"
|
19
|
+
puts " Subtitle: #{rdf.subtitle}"
|
20
|
+
puts " Authors: #{rdf.authors.join(', ')}"
|
21
|
+
puts "Published: #{rdf.published}"
|
22
|
+
puts " Language: #{rdf.language}"
|
23
|
+
puts " Rights: #{rdf.rights}"
|
24
|
+
puts "Bookcover: #{rdf.covers.first}"
|
25
|
+
puts " eBooks: \n#{rdf.ebooks.map{|b| " #{b.uri}" }.join("\n")}"
|
26
|
+
puts " Subjects: \n #{rdf.subjects.join("\n ")}"
|
data/lib/gutenberg_rdf/rdf.rb
CHANGED
@@ -48,7 +48,7 @@ module GutenbergRdf
|
|
48
48
|
end
|
49
49
|
|
50
50
|
def language
|
51
|
-
xml.elements['pgterms:ebook/dcterms:language'].text
|
51
|
+
xml.elements['pgterms:ebook/dcterms:language/rdf:Description/rdf:value'].text
|
52
52
|
end
|
53
53
|
|
54
54
|
def rights
|
@@ -56,13 +56,14 @@ module GutenbergRdf
|
|
56
56
|
end
|
57
57
|
|
58
58
|
def covers
|
59
|
-
official_cover_images.concat(other_cover_images).
|
59
|
+
official_cover_images.concat(other_cover_images).uniq
|
60
60
|
end
|
61
61
|
|
62
62
|
def ebooks
|
63
63
|
files = Array.new
|
64
|
-
xml.elements.each('pgterms:
|
65
|
-
|
64
|
+
xml.elements.each('pgterms:ebook/dcterms:hasFormat') do |format|
|
65
|
+
file = format.elements['pgterms:file']
|
66
|
+
files << Media.new(file) if file.elements['dcterms:format/rdf:Description/rdf:value'].text.match(/\Atext|\Aapplication/)
|
66
67
|
end
|
67
68
|
files
|
68
69
|
end
|
@@ -85,37 +86,28 @@ module GutenbergRdf
|
|
85
86
|
title_array.map(&:strip)
|
86
87
|
end
|
87
88
|
|
88
|
-
def roles
|
89
|
-
@roles ||= extract_roles
|
90
|
-
end
|
91
|
-
|
92
|
-
def extract_roles
|
93
|
-
entries = Hash.new
|
94
|
-
xml.elements.each('pgterms:ebook/dcterms:creator') do |entry|
|
95
|
-
entries["#{entry.attributes['rdf:resource'].sub('2009/agents/', '')}"] = 'aut'
|
96
|
-
end
|
97
|
-
xml.elements.each('pgterms:ebook/marcrel:*') do |entry|
|
98
|
-
entries["#{entry.attributes['rdf:resource'].sub('2009/agents/', '')}"] = entry.name
|
99
|
-
end
|
100
|
-
entries
|
101
|
-
end
|
102
|
-
|
103
89
|
def extract_authors
|
104
|
-
|
105
|
-
xml.elements.each('pgterms:
|
106
|
-
|
107
|
-
|
108
|
-
|
90
|
+
agents = Array.new
|
91
|
+
xml.elements.each('pgterms:ebook/dcterms:creator') do |contributor|
|
92
|
+
agent = Agent.new(contributor.elements['pgterms:agent'])
|
93
|
+
agent.role = 'aut'
|
94
|
+
agents << agent
|
109
95
|
end
|
110
|
-
|
96
|
+
xml.elements.each('pgterms:ebook/marcrel:*') do |contributor|
|
97
|
+
agent = Agent.new(contributor.elements['pgterms:agent'])
|
98
|
+
agent.role = contributor.name
|
99
|
+
agents << agent
|
100
|
+
end
|
101
|
+
agents
|
111
102
|
end
|
112
103
|
|
113
104
|
def official_cover_images
|
114
105
|
entries = Array.new
|
115
|
-
xml.elements.each('pgterms:
|
106
|
+
xml.elements.each('pgterms:ebook/dcterms:hasFormat') do |format|
|
107
|
+
file = format.elements['pgterms:file']
|
116
108
|
entries << file.attributes['about'] if file_is_image?(file)
|
117
109
|
end
|
118
|
-
entries
|
110
|
+
entries.sort
|
119
111
|
end
|
120
112
|
|
121
113
|
def file_is_image?(node)
|
@@ -132,7 +124,7 @@ module GutenbergRdf
|
|
132
124
|
cover.sub!(/\Afile:\/\/\/public\/vhost\/g\/gutenberg\/html/, 'http://www.gutenberg.org')
|
133
125
|
entries << cover
|
134
126
|
end
|
135
|
-
entries
|
127
|
+
entries.sort
|
136
128
|
end
|
137
129
|
|
138
130
|
end
|
@@ -3,18 +3,24 @@ require 'spec_helper'
|
|
3
3
|
module GutenbergRdf
|
4
4
|
class Rdf
|
5
5
|
describe Agent do
|
6
|
+
let(:xml) do
|
7
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:marcrel="http://id.loc.gov/vocabulary/relators/">
|
8
|
+
<pgterms:ebook rdf:about="ebooks/99999999">
|
9
|
+
<dcterms:creator>
|
10
|
+
<pgterms:agent rdf:about="2009/agents/402">
|
11
|
+
<pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
|
12
|
+
<pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
|
13
|
+
<pgterms:name>Doe, Jon James</pgterms:name>
|
14
|
+
<pgterms:alias>Doe, Jon</pgterms:alias>
|
15
|
+
<pgterms:alias>Doe, J. J.</pgterms:alias>
|
16
|
+
<pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Jon_James_Doe"/>
|
17
|
+
</pgterms:agent>
|
18
|
+
</dcterms:creator>
|
19
|
+
</pgterms:ebook>
|
20
|
+
</rdf:RDF>'
|
21
|
+
end
|
6
22
|
let(:agent) do
|
7
|
-
xml
|
8
|
-
<pgterms:agent rdf:about="2009/agents/402">
|
9
|
-
<pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
|
10
|
-
<pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
|
11
|
-
<pgterms:name>Doe, Jon James</pgterms:name>
|
12
|
-
<pgterms:alias>Doe, Jon</pgterms:alias>
|
13
|
-
<pgterms:alias>Doe, J. J.</pgterms:alias>
|
14
|
-
<pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Jon_James_Doe"/>
|
15
|
-
</pgterms:agent>
|
16
|
-
</rdf:RDF>'
|
17
|
-
Agent.new(REXML::Document.new(xml).root.elements['pgterms:agent'])
|
23
|
+
Agent.new(REXML::Document.new(xml).root.elements['pgterms:ebook/dcterms:creator/pgterms:agent'])
|
18
24
|
end
|
19
25
|
|
20
26
|
it "expects an agent ID" do
|
@@ -25,15 +31,6 @@ module GutenbergRdf
|
|
25
31
|
expect(agent.role).to eq 'oth'
|
26
32
|
end
|
27
33
|
|
28
|
-
describe "Assigning Roles" do
|
29
|
-
it "assigns the correct value to .role" do
|
30
|
-
roles = {'402' => 'aut', '116' => 'ctb'}
|
31
|
-
agent.assign_role(roles)
|
32
|
-
|
33
|
-
expect(agent.role).to eq 'aut'
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
34
|
it "expects the last name" do
|
38
35
|
expect(agent.lastname).to eq 'Doe'
|
39
36
|
end
|
@@ -3,23 +3,26 @@ require 'spec_helper'
|
|
3
3
|
module GutenbergRdf
|
4
4
|
class Rdf
|
5
5
|
describe Media do
|
6
|
-
|
7
6
|
let(:xml) do
|
8
|
-
'<rdf:RDF xmlns:
|
9
|
-
<pgterms:
|
10
|
-
<dcterms:
|
11
|
-
|
12
|
-
|
13
|
-
<
|
14
|
-
<
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
7
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
8
|
+
<pgterms:ebook rdf:about="ebooks/98765">
|
9
|
+
<dcterms:hasFormat>
|
10
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
|
11
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
12
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
|
13
|
+
<dcterms:format>
|
14
|
+
<rdf:Description rdf:nodeID="N87dfy78yd78s6gsg6f8970d76g0f6d9b">
|
15
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
|
16
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
17
|
+
</rdf:Description>
|
18
|
+
</dcterms:format>
|
19
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
|
20
|
+
</pgterms:file>
|
21
|
+
</dcterms:hasFormat>
|
22
|
+
</pgterms:ebook>
|
20
23
|
</rdf:RDF>'
|
21
24
|
end
|
22
|
-
let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:file']) }
|
25
|
+
let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:ebook/dcterms:hasFormat/pgterms:file']) }
|
23
26
|
|
24
27
|
it "expects the file URI" do
|
25
28
|
expect(media.uri).to eql 'http://www.gutenberg.org/ebooks/98765.txt.utf-8'
|
@@ -36,34 +39,6 @@ module GutenbergRdf
|
|
36
39
|
it "should return the modified datetime" do
|
37
40
|
expect(media.modified.to_s).to eql '2010-02-16T08:29:52-07:00'
|
38
41
|
end
|
39
|
-
|
40
|
-
context "when there are two media types" do
|
41
|
-
let(:xml) do
|
42
|
-
'<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
43
|
-
<pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
|
44
|
-
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
|
45
|
-
<dcterms:format>
|
46
|
-
<rdf:Description>
|
47
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
48
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
|
49
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
|
50
|
-
</rdf:Description>
|
51
|
-
</dcterms:format>
|
52
|
-
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
53
|
-
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
|
54
|
-
</pgterms:file>
|
55
|
-
</rdf:RDF>'
|
56
|
-
end
|
57
|
-
let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:file']) }
|
58
|
-
|
59
|
-
it "expects the first entry to be used" do
|
60
|
-
expect(media.media_type).to eql 'application/zip'
|
61
|
-
end
|
62
|
-
it "expects the encoding to be an empty string" do
|
63
|
-
expect(media.encoding).to eql ''
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
42
|
end
|
68
43
|
end
|
69
44
|
end
|
@@ -2,42 +2,49 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
module GutenbergRdf
|
4
4
|
describe Rdf do
|
5
|
-
|
6
|
-
|
5
|
+
|
6
|
+
describe "basic metadata" do
|
7
|
+
let(:xml) do
|
8
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
7
9
|
<pgterms:ebook rdf:about="ebooks/98765">
|
8
10
|
<dcterms:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2006-09-28</dcterms:issued>
|
9
|
-
<dcterms:language
|
11
|
+
<dcterms:language>
|
12
|
+
<rdf:Description rdf:nodeID="N88989dfs7984987df987cvcsd876ew79">
|
13
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/RFC4646">en</rdf:value>
|
14
|
+
</rdf:Description>
|
15
|
+
</dcterms:language>
|
10
16
|
<dcterms:publisher>Project Gutenberg</dcterms:publisher>
|
11
17
|
<dcterms:rights>Public domain in the USA.</dcterms:rights>
|
12
18
|
</pgterms:ebook>
|
13
|
-
|
14
|
-
|
15
|
-
|
19
|
+
</rdf:RDF>'
|
20
|
+
end
|
21
|
+
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
16
22
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
23
|
+
it "expects an id" do
|
24
|
+
expect(rdf.id).to eql "98765"
|
25
|
+
end
|
26
|
+
it "expects a published date" do
|
27
|
+
expect(rdf.published).to eql "2006-09-28"
|
28
|
+
end
|
29
|
+
it "expects a publisher" do
|
30
|
+
expect(rdf.publisher).to eql "Project Gutenberg"
|
31
|
+
end
|
32
|
+
it "expects a language" do
|
33
|
+
expect(rdf.language).to eql "en"
|
34
|
+
end
|
35
|
+
it "expects the rights" do
|
36
|
+
expect(rdf.rights).to eql "Public domain in the USA."
|
37
|
+
end
|
31
38
|
end
|
32
39
|
|
33
40
|
describe "#type" do
|
34
41
|
let(:xml) do
|
35
|
-
'<rdf:RDF xmlns:
|
42
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
36
43
|
<pgterms:ebook rdf:about="ebooks/98765">
|
37
44
|
<dcterms:type>
|
38
|
-
<rdf:Description>
|
39
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/DCMIType"/>
|
45
|
+
<rdf:Description rdf:nodeID="Nd89943yhljdsf93489ydfs897g7fd897">
|
40
46
|
<rdf:value>Text</rdf:value>
|
47
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/DCMIType"/>
|
41
48
|
</rdf:Description>
|
42
49
|
</dcterms:type>
|
43
50
|
</pgterms:ebook>
|
@@ -52,7 +59,7 @@ module GutenbergRdf
|
|
52
59
|
|
53
60
|
describe "Titles" do
|
54
61
|
let(:xml) do
|
55
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
62
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
56
63
|
<pgterms:ebook rdf:about="ebooks/98765">
|
57
64
|
<dcterms:title>A Great Title</dcterms:title>
|
58
65
|
</pgterms:ebook>
|
@@ -69,7 +76,7 @@ module GutenbergRdf
|
|
69
76
|
|
70
77
|
context "with a title and subtitle, on separate lines" do
|
71
78
|
let(:xml) do
|
72
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
79
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
73
80
|
<pgterms:ebook rdf:about="ebooks/98765">
|
74
81
|
<dcterms:title>A Great Multi-Title
|
75
82
|
Or, a Subtitle</dcterms:title>
|
@@ -88,7 +95,7 @@ module GutenbergRdf
|
|
88
95
|
|
89
96
|
context "with; title, or, subtitle (we need to split on the 'or')" do
|
90
97
|
let(:xml) do
|
91
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
98
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
92
99
|
<pgterms:ebook rdf:about="ebooks/98765">
|
93
100
|
<dcterms:title>A Great Multi-Title, or, a Subtitle</dcterms:title>
|
94
101
|
</pgterms:ebook>
|
@@ -106,7 +113,7 @@ module GutenbergRdf
|
|
106
113
|
|
107
114
|
context "when title:subtitle are separated by a colon" do
|
108
115
|
let(:xml) do
|
109
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
116
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
110
117
|
<pgterms:ebook rdf:about="ebooks/98765">
|
111
118
|
<dcterms:title>A Great Multi-Title: And a Subtitle</dcterms:title>
|
112
119
|
</pgterms:ebook>
|
@@ -124,7 +131,7 @@ module GutenbergRdf
|
|
124
131
|
|
125
132
|
context "when title; and subtitle are separated by a semi-colon" do
|
126
133
|
let(:xml) do
|
127
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
134
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
128
135
|
<pgterms:ebook rdf:about="ebooks/98765">
|
129
136
|
<dcterms:title>A Great Multi-Title; Or, a Subtitle</dcterms:title>
|
130
137
|
</pgterms:ebook>
|
@@ -140,12 +147,12 @@ module GutenbergRdf
|
|
140
147
|
|
141
148
|
context "...except when subtitles already exists" do
|
142
149
|
let(:xml) do
|
143
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
150
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
151
|
+
<pgterms:ebook rdf:about="ebooks/98765">
|
152
|
+
<dcterms:title>A Great Multi-Title; and some other text
|
153
|
+
Then a Subtitle on a newline</dcterms:title>
|
154
|
+
</pgterms:ebook>
|
155
|
+
</rdf:RDF>'
|
149
156
|
end
|
150
157
|
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
151
158
|
it "expects a title" do
|
@@ -160,22 +167,24 @@ module GutenbergRdf
|
|
160
167
|
|
161
168
|
describe "#authors" do
|
162
169
|
let(:xml) do
|
163
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:marcrel="http://id.loc.gov/vocabulary/relators">
|
170
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:marcrel="http://id.loc.gov/vocabulary/relators/">
|
164
171
|
<pgterms:ebook rdf:about="ebooks/99999999">
|
165
|
-
<
|
166
|
-
|
172
|
+
<dcterms:creator>
|
173
|
+
<pgterms:agent rdf:about="2009/agents/116">
|
174
|
+
<pgterms:alias>Verschillende</pgterms:alias>
|
175
|
+
<pgterms:name>Various</pgterms:name>
|
176
|
+
</pgterms:agent>
|
177
|
+
</dcterms:creator>
|
178
|
+
<marcrel:ctb>
|
179
|
+
<pgterms:agent rdf:about="2009/agents/402">
|
180
|
+
<pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:deathdate>
|
181
|
+
<pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:birthdate>
|
182
|
+
<pgterms:name>Dodge, Mary Mapes</pgterms:name>
|
183
|
+
<pgterms:alias>Dodge, Mary</pgterms:alias>
|
184
|
+
<pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Mary_Mapes_Dodge"/>
|
185
|
+
</pgterms:agent>
|
186
|
+
</marcrel:ctb>
|
167
187
|
</pgterms:ebook>
|
168
|
-
<pgterms:agent rdf:about="2009/agents/402">
|
169
|
-
<pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
|
170
|
-
<pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
|
171
|
-
<pgterms:name>Dodge, Mary Mapes</pgterms:name>
|
172
|
-
<pgterms:alias>Dodge, Mary</pgterms:alias>
|
173
|
-
<pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Mary_Mapes_Dodge"/>
|
174
|
-
</pgterms:agent>
|
175
|
-
<pgterms:agent rdf:about="2009/agents/116">
|
176
|
-
<pgterms:alias>Verschillende</pgterms:alias>
|
177
|
-
<pgterms:name>Various</pgterms:name>
|
178
|
-
</pgterms:agent>
|
179
188
|
</rdf:RDF>'
|
180
189
|
end
|
181
190
|
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
@@ -183,35 +192,35 @@ module GutenbergRdf
|
|
183
192
|
it "returns the correct number of authors" do
|
184
193
|
expect(rdf.authors.count).to be 2
|
185
194
|
end
|
186
|
-
it "expects an
|
187
|
-
expect(rdf.authors
|
188
|
-
end
|
189
|
-
it "has the correct author names" do
|
190
|
-
expect(rdf.authors.first.fullname).to eq 'Mary Mapes Dodge'
|
195
|
+
it "expects an Agent object" do
|
196
|
+
expect(rdf.authors[0]).to be_an_instance_of Rdf::Agent
|
191
197
|
end
|
192
198
|
it "expects the author to have an aut role" do
|
193
|
-
expect(rdf.authors.
|
199
|
+
expect(rdf.authors[0].role).to eq 'aut'
|
200
|
+
end
|
201
|
+
it "has the correct author names" do
|
202
|
+
expect(rdf.authors[1].fullname).to eq 'Mary Mapes Dodge'
|
194
203
|
end
|
195
204
|
it "expects other agents to have the correct role" do
|
196
|
-
expect(rdf.authors.
|
205
|
+
expect(rdf.authors[1].role).to eq 'ctb'
|
197
206
|
end
|
198
207
|
end
|
199
208
|
|
200
209
|
describe "#subjects" do
|
201
210
|
let(:xml) do
|
202
|
-
%q{<rdf:RDF xmlns:
|
211
|
+
%q{<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
203
212
|
<pgterms:ebook rdf:about="ebooks/98765">
|
204
213
|
<dcterms:subject>
|
205
|
-
<rdf:Description>
|
206
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/
|
207
|
-
<rdf:value>
|
208
|
-
<rdf:value>Children's periodicals, American</rdf:value>
|
214
|
+
<rdf:Description rdf:nodeID="Ndfsc8xdsfwar734897n7sdofyhod11b9">
|
215
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCC"/>
|
216
|
+
<rdf:value>PZ</rdf:value>
|
209
217
|
</rdf:Description>
|
210
218
|
</dcterms:subject>
|
211
219
|
<dcterms:subject>
|
212
|
-
<rdf:Description>
|
213
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/
|
214
|
-
<rdf:value>
|
220
|
+
<rdf:Description rdf:nodeID="Ndfcdh8934hsdljkfh98y89hlfhltyab8">
|
221
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
|
222
|
+
<rdf:value>Children's literature -- Periodicals</rdf:value>
|
223
|
+
<rdf:value>Children's periodicals, American</rdf:value>
|
215
224
|
</rdf:Description>
|
216
225
|
</dcterms:subject>
|
217
226
|
</pgterms:ebook>
|
@@ -230,69 +239,63 @@ module GutenbergRdf
|
|
230
239
|
describe "#covers" do
|
231
240
|
describe "official PG covers" do
|
232
241
|
let(:xml) do
|
233
|
-
'<rdf:RDF xmlns:
|
234
|
-
<pgterms:ebook rdf:about="ebooks/
|
235
|
-
<dcterms:hasFormat
|
236
|
-
|
237
|
-
|
238
|
-
|
242
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
243
|
+
<pgterms:ebook rdf:about="ebooks/98765">
|
244
|
+
<dcterms:hasFormat>
|
245
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/cache/epub/98765/pg98765.cover.small.jpg">
|
246
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">2699</dcterms:extent>
|
247
|
+
<dcterms:format>
|
248
|
+
<rdf:Description rdf:nodeID="N9u34589eyfdiuy8934y787d8f97sg786">
|
249
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
250
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
|
251
|
+
</rdf:Description>
|
252
|
+
</dcterms:format>
|
253
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
254
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-03-25T20:57:55.668737</dcterms:modified>
|
255
|
+
</pgterms:file>
|
256
|
+
</dcterms:hasFormat>
|
257
|
+
<pgterms:marc901>http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
|
258
|
+
<pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
|
259
|
+
<pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-rst/images/cover.jpg</pgterms:marc901>
|
260
|
+
<dcterms:hasFormat>
|
261
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/cache/epub/98765/pg98765.cover.medium.jpg">
|
262
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-03-25T20:57:55.889736</dcterms:modified>
|
263
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
264
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">10856</dcterms:extent>
|
265
|
+
<dcterms:format>
|
266
|
+
<rdf:Description rdf:nodeID="N8df89ys8993p4qu89uenf89dusp38a07">
|
267
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
268
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
|
269
|
+
</rdf:Description>
|
270
|
+
</dcterms:format>
|
271
|
+
</pgterms:file>
|
272
|
+
</dcterms:hasFormat>
|
239
273
|
</pgterms:ebook>
|
240
|
-
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.epub.noimages">
|
241
|
-
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">92652</dcterms:extent>
|
242
|
-
<dcterms:format>
|
243
|
-
<rdf:Description>
|
244
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
245
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/epub+zip</rdf:value>
|
246
|
-
</rdf:Description>
|
247
|
-
</dcterms:format>
|
248
|
-
<dcterms:isFormatOf rdf:resource="ebooks/12345"/>
|
249
|
-
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:32.115259</dcterms:modified>
|
250
|
-
</pgterms:file>
|
251
|
-
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.cover.medium">
|
252
|
-
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">10856</dcterms:extent>
|
253
|
-
<dcterms:format>
|
254
|
-
<rdf:Description>
|
255
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
256
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
|
257
|
-
</rdf:Description>
|
258
|
-
</dcterms:format>
|
259
|
-
<dcterms:isFormatOf rdf:resource="ebooks/12345"/>
|
260
|
-
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:34.484114</dcterms:modified>
|
261
|
-
</pgterms:file>
|
262
|
-
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.cover.small">
|
263
|
-
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1904</dcterms:extent>
|
264
|
-
<dcterms:format>
|
265
|
-
<rdf:Description>
|
266
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
267
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
|
268
|
-
</rdf:Description>
|
269
|
-
</dcterms:format>
|
270
|
-
<dcterms:isFormatOf rdf:resource="ebooks/12345"/>
|
271
|
-
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:34.379124</dcterms:modified>
|
272
|
-
</pgterms:file>
|
273
274
|
</rdf:RDF>'
|
274
275
|
end
|
275
276
|
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
276
277
|
|
277
278
|
it "expects the correct number of entries returned" do
|
278
|
-
expect(rdf.covers.count).to be
|
279
|
+
expect(rdf.covers.count).to be 4
|
280
|
+
end
|
281
|
+
it "expect medium cover url to be first in the list" do
|
282
|
+
expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/cache/epub/98765/pg98765.cover.medium.jpg'
|
279
283
|
end
|
280
|
-
it "
|
281
|
-
expect(rdf.covers[
|
282
|
-
expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/ebooks/12345.cover.small'
|
284
|
+
it "expect the small cover url after the medium" do
|
285
|
+
expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/cache/epub/98765/pg98765.cover.small.jpg'
|
283
286
|
end
|
284
287
|
it "expects any other images to be listed after the official ones" do
|
285
|
-
expect(rdf.covers[2]).to eql 'http://www.gutenberg.org/files/
|
288
|
+
expect(rdf.covers[2]).to eql 'http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg'
|
286
289
|
end
|
287
290
|
end
|
288
291
|
|
289
292
|
describe "HTML ebook cover image" do
|
290
293
|
let(:xml) do
|
291
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
292
|
-
<pgterms:ebook rdf:about="ebooks/
|
293
|
-
<pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/
|
294
|
-
<pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/
|
295
|
-
<pgterms:marc901>http://www.gutenberg.org/files/
|
294
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
295
|
+
<pgterms:ebook rdf:about="ebooks/98765">
|
296
|
+
<pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-rst/images/cover.jpg</pgterms:marc901>
|
297
|
+
<pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
|
298
|
+
<pgterms:marc901>http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
|
296
299
|
</pgterms:ebook>
|
297
300
|
</rdf:RDF>'
|
298
301
|
end
|
@@ -304,43 +307,52 @@ module GutenbergRdf
|
|
304
307
|
it "should convert File URIs to the Gutenberg URL" do
|
305
308
|
expect(rdf.covers.first).to match 'http://www.gutenberg.org'
|
306
309
|
end
|
307
|
-
it "expects the
|
308
|
-
expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/files/
|
309
|
-
|
310
|
+
it "expects the HTML cover to be listed first" do
|
311
|
+
expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg'
|
312
|
+
end
|
313
|
+
it "expects the RST cover to be listed after the HTML" do
|
314
|
+
expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/files/98765/98765-rst/images/cover.jpg'
|
310
315
|
end
|
311
316
|
end
|
312
317
|
end
|
313
318
|
|
314
319
|
describe "#ebook" do
|
315
320
|
let(:xml) do
|
316
|
-
'<rdf:RDF xmlns:
|
321
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
317
322
|
<pgterms:ebook rdf:about="ebooks/98765">
|
318
|
-
<dcterms:hasFormat
|
319
|
-
|
323
|
+
<dcterms:hasFormat>
|
324
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
|
325
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
326
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
|
327
|
+
<dcterms:format>
|
328
|
+
<rdf:Description rdf:nodeID="N87dfy78yd78s6gsg6f8970d76g0f6d9b">
|
329
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain</rdf:value>
|
330
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
331
|
+
</rdf:Description>
|
332
|
+
</dcterms:format>
|
333
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
|
334
|
+
</pgterms:file>
|
335
|
+
</dcterms:hasFormat>
|
336
|
+
<dcterms:hasFormat>
|
337
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
|
338
|
+
<dcterms:format>
|
339
|
+
<rdf:Description rdf:nodeID="Ndfsd78tf34tukjehdsouyo4yrefh6dea">
|
340
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
|
341
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
342
|
+
</rdf:Description>
|
343
|
+
</dcterms:format>
|
344
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
|
345
|
+
<dcterms:format>
|
346
|
+
<rdf:Description rdf:nodeID="Nfy7we43yhluwe9syrqyp2ewufy0f6d1e">
|
347
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
|
348
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
349
|
+
</rdf:Description>
|
350
|
+
</dcterms:format>
|
351
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
352
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
|
353
|
+
</pgterms:file>
|
354
|
+
</dcterms:hasFormat>
|
320
355
|
</pgterms:ebook>
|
321
|
-
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
|
322
|
-
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
|
323
|
-
<dcterms:format>
|
324
|
-
<rdf:Description>
|
325
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
326
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
|
327
|
-
</rdf:Description>
|
328
|
-
</dcterms:format>
|
329
|
-
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
330
|
-
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
|
331
|
-
</pgterms:file>
|
332
|
-
<pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
|
333
|
-
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
|
334
|
-
<dcterms:format>
|
335
|
-
<rdf:Description>
|
336
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
337
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
|
338
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
|
339
|
-
</rdf:Description>
|
340
|
-
</dcterms:format>
|
341
|
-
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
342
|
-
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
|
343
|
-
</pgterms:file>
|
344
356
|
</rdf:RDF>'
|
345
357
|
end
|
346
358
|
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
@@ -349,7 +361,50 @@ module GutenbergRdf
|
|
349
361
|
expect(rdf.ebooks.count).to be 2
|
350
362
|
end
|
351
363
|
it "expects an entry to be a Media class" do
|
352
|
-
expect(rdf.ebooks.first
|
364
|
+
expect(rdf.ebooks.first).to be_an_instance_of Rdf::Media
|
365
|
+
end
|
366
|
+
|
367
|
+
context "only collect ebook media files" do
|
368
|
+
let(:xml) do
|
369
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
370
|
+
<pgterms:ebook rdf:about="ebooks/98765">
|
371
|
+
<dcterms:hasFormat>
|
372
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
|
373
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
374
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
|
375
|
+
<dcterms:format>
|
376
|
+
<rdf:Description rdf:nodeID="N87dfy78yd78s6gsg6f8970d76g0f6d9b">
|
377
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain</rdf:value>
|
378
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
379
|
+
</rdf:Description>
|
380
|
+
</dcterms:format>
|
381
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
|
382
|
+
</pgterms:file>
|
383
|
+
</dcterms:hasFormat>
|
384
|
+
<dcterms:hasFormat>
|
385
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/cache/epub/98765/pg98765.cover.small.jpg">
|
386
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">2699</dcterms:extent>
|
387
|
+
<dcterms:format>
|
388
|
+
<rdf:Description rdf:nodeID="N9u34589eyfdiuy8934y787d8f97sg786">
|
389
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
390
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
|
391
|
+
</rdf:Description>
|
392
|
+
</dcterms:format>
|
393
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
394
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-03-25T20:57:55.668737</dcterms:modified>
|
395
|
+
</pgterms:file>
|
396
|
+
</dcterms:hasFormat>
|
397
|
+
</pgterms:ebook>
|
398
|
+
</rdf:RDF>'
|
399
|
+
end
|
400
|
+
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
401
|
+
|
402
|
+
it "only extracts one media file" do
|
403
|
+
expect(rdf.ebooks.count).to be 1
|
404
|
+
end
|
405
|
+
it "expects the media type to be for an ebook" do
|
406
|
+
expect(rdf.ebooks[0].media_type).to eq 'text/plain'
|
407
|
+
end
|
353
408
|
end
|
354
409
|
end
|
355
410
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gutenberg_rdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Cook
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -28,7 +28,8 @@ description: A Ruby wrapper for the Project Gutenberg RDF catalog files (require
|
|
28
28
|
Ruby 2).
|
29
29
|
email:
|
30
30
|
- m@mikecook.co.uk
|
31
|
-
executables:
|
31
|
+
executables:
|
32
|
+
- rdf
|
32
33
|
extensions: []
|
33
34
|
extra_rdoc_files: []
|
34
35
|
files:
|
@@ -38,6 +39,7 @@ files:
|
|
38
39
|
- LICENSE.txt
|
39
40
|
- README.md
|
40
41
|
- Rakefile
|
42
|
+
- bin/rdf
|
41
43
|
- gutenberg_rdf.gemspec
|
42
44
|
- lib/gutenberg_rdf.rb
|
43
45
|
- lib/gutenberg_rdf/rdf.rb
|
@@ -69,7 +71,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
69
71
|
version: '0'
|
70
72
|
requirements: []
|
71
73
|
rubyforge_project:
|
72
|
-
rubygems_version: 2.
|
74
|
+
rubygems_version: 2.3.0
|
73
75
|
signing_key:
|
74
76
|
specification_version: 4
|
75
77
|
summary: A Ruby wrapper for the Project Gutenberg RDF catalog files.
|
@@ -79,3 +81,4 @@ test_files:
|
|
79
81
|
- spec/gutenberg_rdf/rdf_spec.rb
|
80
82
|
- spec/gutenberg_rdf_spec.rb
|
81
83
|
- spec/spec_helper.rb
|
84
|
+
has_rdoc:
|