gutenberg_rdf 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +9 -2
- data/bin/rdf +26 -0
- data/lib/gutenberg_rdf/rdf.rb +20 -28
- data/lib/gutenberg_rdf/rdf/agent.rb +0 -4
- data/lib/gutenberg_rdf/version.rb +1 -1
- data/spec/gutenberg_rdf/rdf/agent_spec.rb +17 -20
- data/spec/gutenberg_rdf/rdf/media_spec.rb +17 -42
- data/spec/gutenberg_rdf/rdf_spec.rb +197 -142
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 26b9401e7a3223ad122100f096eebf5585a97e0e
|
4
|
+
data.tar.gz: 5fe2df876d2878fd9628edf6367c2d2d6c381803
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7fa836f7d515c77d2418e700b43ac00e1e4cef0809b967ed2a58ce929f7edb7b68c6d33a64c7f20920f775cf7b07a5e5a8d6d63e0df5a4f5f6efdb4b0a5e6cc0
|
7
|
+
data.tar.gz: 80db01f7fc89fc2bbfb4536454a8b37fa962328e3a93ac1a487c009a92963549fa241e938bedbabedfb85a997911a3b29ccde294ed07502ddb0907b02eeccbd7
|
data/README.md
CHANGED
@@ -1,11 +1,18 @@
|
|
1
1
|
# Gutenberg RDF
|
2
2
|
|
3
3
|
Gutenberg RDF is a Ruby wrapper for the Project Gutenberg RDF catalog book files,
|
4
|
-
providing
|
4
|
+
providing an API to all the metadata contained within.
|
5
|
+
|
6
|
+
The official RDF catalog can be found here: http://www.gutenberg.org/wiki/Gutenberg:Feeds
|
7
|
+
|
5
8
|
|
6
9
|
## Requirements
|
7
10
|
|
8
|
-
*
|
11
|
+
* Ruby 2.0 - this is so we get UTF-8 by default
|
12
|
+
|
13
|
+
NOTE: In around April 2014 PG changed the XML format in their RDF files
|
14
|
+
considerably, so this GEM will no longer work on files from before that date.
|
15
|
+
Please make sure to download the latest catalog using the link above.
|
9
16
|
|
10
17
|
|
11
18
|
## Installation
|
data/bin/rdf
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
|
5
|
+
require "gutenberg_rdf"
|
6
|
+
|
7
|
+
unless ARGV.last && File.exist?(ARGV.last)
|
8
|
+
puts
|
9
|
+
puts "ERROR: Please provide the full path to an RDF file:\n\n$ bin/rdf /gutenberg/pg1.rdf"
|
10
|
+
puts
|
11
|
+
exit 1
|
12
|
+
end
|
13
|
+
|
14
|
+
rdf = GutenbergRdf.parse(ARGV.last)
|
15
|
+
|
16
|
+
puts " ID: #{rdf.id}"
|
17
|
+
puts " Type: #{rdf.type}"
|
18
|
+
puts " Title: #{rdf.title}"
|
19
|
+
puts " Subtitle: #{rdf.subtitle}"
|
20
|
+
puts " Authors: #{rdf.authors.join(', ')}"
|
21
|
+
puts "Published: #{rdf.published}"
|
22
|
+
puts " Language: #{rdf.language}"
|
23
|
+
puts " Rights: #{rdf.rights}"
|
24
|
+
puts "Bookcover: #{rdf.covers.first}"
|
25
|
+
puts " eBooks: \n#{rdf.ebooks.map{|b| " #{b.uri}" }.join("\n")}"
|
26
|
+
puts " Subjects: \n #{rdf.subjects.join("\n ")}"
|
data/lib/gutenberg_rdf/rdf.rb
CHANGED
@@ -48,7 +48,7 @@ module GutenbergRdf
|
|
48
48
|
end
|
49
49
|
|
50
50
|
def language
|
51
|
-
xml.elements['pgterms:ebook/dcterms:language'].text
|
51
|
+
xml.elements['pgterms:ebook/dcterms:language/rdf:Description/rdf:value'].text
|
52
52
|
end
|
53
53
|
|
54
54
|
def rights
|
@@ -56,13 +56,14 @@ module GutenbergRdf
|
|
56
56
|
end
|
57
57
|
|
58
58
|
def covers
|
59
|
-
official_cover_images.concat(other_cover_images).
|
59
|
+
official_cover_images.concat(other_cover_images).uniq
|
60
60
|
end
|
61
61
|
|
62
62
|
def ebooks
|
63
63
|
files = Array.new
|
64
|
-
xml.elements.each('pgterms:
|
65
|
-
|
64
|
+
xml.elements.each('pgterms:ebook/dcterms:hasFormat') do |format|
|
65
|
+
file = format.elements['pgterms:file']
|
66
|
+
files << Media.new(file) if file.elements['dcterms:format/rdf:Description/rdf:value'].text.match(/\Atext|\Aapplication/)
|
66
67
|
end
|
67
68
|
files
|
68
69
|
end
|
@@ -85,37 +86,28 @@ module GutenbergRdf
|
|
85
86
|
title_array.map(&:strip)
|
86
87
|
end
|
87
88
|
|
88
|
-
def roles
|
89
|
-
@roles ||= extract_roles
|
90
|
-
end
|
91
|
-
|
92
|
-
def extract_roles
|
93
|
-
entries = Hash.new
|
94
|
-
xml.elements.each('pgterms:ebook/dcterms:creator') do |entry|
|
95
|
-
entries["#{entry.attributes['rdf:resource'].sub('2009/agents/', '')}"] = 'aut'
|
96
|
-
end
|
97
|
-
xml.elements.each('pgterms:ebook/marcrel:*') do |entry|
|
98
|
-
entries["#{entry.attributes['rdf:resource'].sub('2009/agents/', '')}"] = entry.name
|
99
|
-
end
|
100
|
-
entries
|
101
|
-
end
|
102
|
-
|
103
89
|
def extract_authors
|
104
|
-
|
105
|
-
xml.elements.each('pgterms:
|
106
|
-
|
107
|
-
|
108
|
-
|
90
|
+
agents = Array.new
|
91
|
+
xml.elements.each('pgterms:ebook/dcterms:creator') do |contributor|
|
92
|
+
agent = Agent.new(contributor.elements['pgterms:agent'])
|
93
|
+
agent.role = 'aut'
|
94
|
+
agents << agent
|
109
95
|
end
|
110
|
-
|
96
|
+
xml.elements.each('pgterms:ebook/marcrel:*') do |contributor|
|
97
|
+
agent = Agent.new(contributor.elements['pgterms:agent'])
|
98
|
+
agent.role = contributor.name
|
99
|
+
agents << agent
|
100
|
+
end
|
101
|
+
agents
|
111
102
|
end
|
112
103
|
|
113
104
|
def official_cover_images
|
114
105
|
entries = Array.new
|
115
|
-
xml.elements.each('pgterms:
|
106
|
+
xml.elements.each('pgterms:ebook/dcterms:hasFormat') do |format|
|
107
|
+
file = format.elements['pgterms:file']
|
116
108
|
entries << file.attributes['about'] if file_is_image?(file)
|
117
109
|
end
|
118
|
-
entries
|
110
|
+
entries.sort
|
119
111
|
end
|
120
112
|
|
121
113
|
def file_is_image?(node)
|
@@ -132,7 +124,7 @@ module GutenbergRdf
|
|
132
124
|
cover.sub!(/\Afile:\/\/\/public\/vhost\/g\/gutenberg\/html/, 'http://www.gutenberg.org')
|
133
125
|
entries << cover
|
134
126
|
end
|
135
|
-
entries
|
127
|
+
entries.sort
|
136
128
|
end
|
137
129
|
|
138
130
|
end
|
@@ -3,18 +3,24 @@ require 'spec_helper'
|
|
3
3
|
module GutenbergRdf
|
4
4
|
class Rdf
|
5
5
|
describe Agent do
|
6
|
+
let(:xml) do
|
7
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:marcrel="http://id.loc.gov/vocabulary/relators/">
|
8
|
+
<pgterms:ebook rdf:about="ebooks/99999999">
|
9
|
+
<dcterms:creator>
|
10
|
+
<pgterms:agent rdf:about="2009/agents/402">
|
11
|
+
<pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
|
12
|
+
<pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
|
13
|
+
<pgterms:name>Doe, Jon James</pgterms:name>
|
14
|
+
<pgterms:alias>Doe, Jon</pgterms:alias>
|
15
|
+
<pgterms:alias>Doe, J. J.</pgterms:alias>
|
16
|
+
<pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Jon_James_Doe"/>
|
17
|
+
</pgterms:agent>
|
18
|
+
</dcterms:creator>
|
19
|
+
</pgterms:ebook>
|
20
|
+
</rdf:RDF>'
|
21
|
+
end
|
6
22
|
let(:agent) do
|
7
|
-
xml
|
8
|
-
<pgterms:agent rdf:about="2009/agents/402">
|
9
|
-
<pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
|
10
|
-
<pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
|
11
|
-
<pgterms:name>Doe, Jon James</pgterms:name>
|
12
|
-
<pgterms:alias>Doe, Jon</pgterms:alias>
|
13
|
-
<pgterms:alias>Doe, J. J.</pgterms:alias>
|
14
|
-
<pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Jon_James_Doe"/>
|
15
|
-
</pgterms:agent>
|
16
|
-
</rdf:RDF>'
|
17
|
-
Agent.new(REXML::Document.new(xml).root.elements['pgterms:agent'])
|
23
|
+
Agent.new(REXML::Document.new(xml).root.elements['pgterms:ebook/dcterms:creator/pgterms:agent'])
|
18
24
|
end
|
19
25
|
|
20
26
|
it "expects an agent ID" do
|
@@ -25,15 +31,6 @@ module GutenbergRdf
|
|
25
31
|
expect(agent.role).to eq 'oth'
|
26
32
|
end
|
27
33
|
|
28
|
-
describe "Assigning Roles" do
|
29
|
-
it "assigns the correct value to .role" do
|
30
|
-
roles = {'402' => 'aut', '116' => 'ctb'}
|
31
|
-
agent.assign_role(roles)
|
32
|
-
|
33
|
-
expect(agent.role).to eq 'aut'
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
34
|
it "expects the last name" do
|
38
35
|
expect(agent.lastname).to eq 'Doe'
|
39
36
|
end
|
@@ -3,23 +3,26 @@ require 'spec_helper'
|
|
3
3
|
module GutenbergRdf
|
4
4
|
class Rdf
|
5
5
|
describe Media do
|
6
|
-
|
7
6
|
let(:xml) do
|
8
|
-
'<rdf:RDF xmlns:
|
9
|
-
<pgterms:
|
10
|
-
<dcterms:
|
11
|
-
|
12
|
-
|
13
|
-
<
|
14
|
-
<
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
7
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
8
|
+
<pgterms:ebook rdf:about="ebooks/98765">
|
9
|
+
<dcterms:hasFormat>
|
10
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
|
11
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
12
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
|
13
|
+
<dcterms:format>
|
14
|
+
<rdf:Description rdf:nodeID="N87dfy78yd78s6gsg6f8970d76g0f6d9b">
|
15
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
|
16
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
17
|
+
</rdf:Description>
|
18
|
+
</dcterms:format>
|
19
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
|
20
|
+
</pgterms:file>
|
21
|
+
</dcterms:hasFormat>
|
22
|
+
</pgterms:ebook>
|
20
23
|
</rdf:RDF>'
|
21
24
|
end
|
22
|
-
let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:file']) }
|
25
|
+
let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:ebook/dcterms:hasFormat/pgterms:file']) }
|
23
26
|
|
24
27
|
it "expects the file URI" do
|
25
28
|
expect(media.uri).to eql 'http://www.gutenberg.org/ebooks/98765.txt.utf-8'
|
@@ -36,34 +39,6 @@ module GutenbergRdf
|
|
36
39
|
it "should return the modified datetime" do
|
37
40
|
expect(media.modified.to_s).to eql '2010-02-16T08:29:52-07:00'
|
38
41
|
end
|
39
|
-
|
40
|
-
context "when there are two media types" do
|
41
|
-
let(:xml) do
|
42
|
-
'<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
43
|
-
<pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
|
44
|
-
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
|
45
|
-
<dcterms:format>
|
46
|
-
<rdf:Description>
|
47
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
48
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
|
49
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
|
50
|
-
</rdf:Description>
|
51
|
-
</dcterms:format>
|
52
|
-
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
53
|
-
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
|
54
|
-
</pgterms:file>
|
55
|
-
</rdf:RDF>'
|
56
|
-
end
|
57
|
-
let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:file']) }
|
58
|
-
|
59
|
-
it "expects the first entry to be used" do
|
60
|
-
expect(media.media_type).to eql 'application/zip'
|
61
|
-
end
|
62
|
-
it "expects the encoding to be an empty string" do
|
63
|
-
expect(media.encoding).to eql ''
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
42
|
end
|
68
43
|
end
|
69
44
|
end
|
@@ -2,42 +2,49 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
module GutenbergRdf
|
4
4
|
describe Rdf do
|
5
|
-
|
6
|
-
|
5
|
+
|
6
|
+
describe "basic metadata" do
|
7
|
+
let(:xml) do
|
8
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
7
9
|
<pgterms:ebook rdf:about="ebooks/98765">
|
8
10
|
<dcterms:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2006-09-28</dcterms:issued>
|
9
|
-
<dcterms:language
|
11
|
+
<dcterms:language>
|
12
|
+
<rdf:Description rdf:nodeID="N88989dfs7984987df987cvcsd876ew79">
|
13
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/RFC4646">en</rdf:value>
|
14
|
+
</rdf:Description>
|
15
|
+
</dcterms:language>
|
10
16
|
<dcterms:publisher>Project Gutenberg</dcterms:publisher>
|
11
17
|
<dcterms:rights>Public domain in the USA.</dcterms:rights>
|
12
18
|
</pgterms:ebook>
|
13
|
-
|
14
|
-
|
15
|
-
|
19
|
+
</rdf:RDF>'
|
20
|
+
end
|
21
|
+
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
16
22
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
23
|
+
it "expects an id" do
|
24
|
+
expect(rdf.id).to eql "98765"
|
25
|
+
end
|
26
|
+
it "expects a published date" do
|
27
|
+
expect(rdf.published).to eql "2006-09-28"
|
28
|
+
end
|
29
|
+
it "expects a publisher" do
|
30
|
+
expect(rdf.publisher).to eql "Project Gutenberg"
|
31
|
+
end
|
32
|
+
it "expects a language" do
|
33
|
+
expect(rdf.language).to eql "en"
|
34
|
+
end
|
35
|
+
it "expects the rights" do
|
36
|
+
expect(rdf.rights).to eql "Public domain in the USA."
|
37
|
+
end
|
31
38
|
end
|
32
39
|
|
33
40
|
describe "#type" do
|
34
41
|
let(:xml) do
|
35
|
-
'<rdf:RDF xmlns:
|
42
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
36
43
|
<pgterms:ebook rdf:about="ebooks/98765">
|
37
44
|
<dcterms:type>
|
38
|
-
<rdf:Description>
|
39
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/DCMIType"/>
|
45
|
+
<rdf:Description rdf:nodeID="Nd89943yhljdsf93489ydfs897g7fd897">
|
40
46
|
<rdf:value>Text</rdf:value>
|
47
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/DCMIType"/>
|
41
48
|
</rdf:Description>
|
42
49
|
</dcterms:type>
|
43
50
|
</pgterms:ebook>
|
@@ -52,7 +59,7 @@ module GutenbergRdf
|
|
52
59
|
|
53
60
|
describe "Titles" do
|
54
61
|
let(:xml) do
|
55
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
62
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
56
63
|
<pgterms:ebook rdf:about="ebooks/98765">
|
57
64
|
<dcterms:title>A Great Title</dcterms:title>
|
58
65
|
</pgterms:ebook>
|
@@ -69,7 +76,7 @@ module GutenbergRdf
|
|
69
76
|
|
70
77
|
context "with a title and subtitle, on separate lines" do
|
71
78
|
let(:xml) do
|
72
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
79
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
73
80
|
<pgterms:ebook rdf:about="ebooks/98765">
|
74
81
|
<dcterms:title>A Great Multi-Title
|
75
82
|
Or, a Subtitle</dcterms:title>
|
@@ -88,7 +95,7 @@ module GutenbergRdf
|
|
88
95
|
|
89
96
|
context "with; title, or, subtitle (we need to split on the 'or')" do
|
90
97
|
let(:xml) do
|
91
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
98
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
92
99
|
<pgterms:ebook rdf:about="ebooks/98765">
|
93
100
|
<dcterms:title>A Great Multi-Title, or, a Subtitle</dcterms:title>
|
94
101
|
</pgterms:ebook>
|
@@ -106,7 +113,7 @@ module GutenbergRdf
|
|
106
113
|
|
107
114
|
context "when title:subtitle are separated by a colon" do
|
108
115
|
let(:xml) do
|
109
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
116
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
110
117
|
<pgterms:ebook rdf:about="ebooks/98765">
|
111
118
|
<dcterms:title>A Great Multi-Title: And a Subtitle</dcterms:title>
|
112
119
|
</pgterms:ebook>
|
@@ -124,7 +131,7 @@ module GutenbergRdf
|
|
124
131
|
|
125
132
|
context "when title; and subtitle are separated by a semi-colon" do
|
126
133
|
let(:xml) do
|
127
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
134
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
128
135
|
<pgterms:ebook rdf:about="ebooks/98765">
|
129
136
|
<dcterms:title>A Great Multi-Title; Or, a Subtitle</dcterms:title>
|
130
137
|
</pgterms:ebook>
|
@@ -140,12 +147,12 @@ module GutenbergRdf
|
|
140
147
|
|
141
148
|
context "...except when subtitles already exists" do
|
142
149
|
let(:xml) do
|
143
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
150
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
151
|
+
<pgterms:ebook rdf:about="ebooks/98765">
|
152
|
+
<dcterms:title>A Great Multi-Title; and some other text
|
153
|
+
Then a Subtitle on a newline</dcterms:title>
|
154
|
+
</pgterms:ebook>
|
155
|
+
</rdf:RDF>'
|
149
156
|
end
|
150
157
|
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
151
158
|
it "expects a title" do
|
@@ -160,22 +167,24 @@ module GutenbergRdf
|
|
160
167
|
|
161
168
|
describe "#authors" do
|
162
169
|
let(:xml) do
|
163
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:marcrel="http://id.loc.gov/vocabulary/relators">
|
170
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:marcrel="http://id.loc.gov/vocabulary/relators/">
|
164
171
|
<pgterms:ebook rdf:about="ebooks/99999999">
|
165
|
-
<
|
166
|
-
|
172
|
+
<dcterms:creator>
|
173
|
+
<pgterms:agent rdf:about="2009/agents/116">
|
174
|
+
<pgterms:alias>Verschillende</pgterms:alias>
|
175
|
+
<pgterms:name>Various</pgterms:name>
|
176
|
+
</pgterms:agent>
|
177
|
+
</dcterms:creator>
|
178
|
+
<marcrel:ctb>
|
179
|
+
<pgterms:agent rdf:about="2009/agents/402">
|
180
|
+
<pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:deathdate>
|
181
|
+
<pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:birthdate>
|
182
|
+
<pgterms:name>Dodge, Mary Mapes</pgterms:name>
|
183
|
+
<pgterms:alias>Dodge, Mary</pgterms:alias>
|
184
|
+
<pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Mary_Mapes_Dodge"/>
|
185
|
+
</pgterms:agent>
|
186
|
+
</marcrel:ctb>
|
167
187
|
</pgterms:ebook>
|
168
|
-
<pgterms:agent rdf:about="2009/agents/402">
|
169
|
-
<pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
|
170
|
-
<pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
|
171
|
-
<pgterms:name>Dodge, Mary Mapes</pgterms:name>
|
172
|
-
<pgterms:alias>Dodge, Mary</pgterms:alias>
|
173
|
-
<pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Mary_Mapes_Dodge"/>
|
174
|
-
</pgterms:agent>
|
175
|
-
<pgterms:agent rdf:about="2009/agents/116">
|
176
|
-
<pgterms:alias>Verschillende</pgterms:alias>
|
177
|
-
<pgterms:name>Various</pgterms:name>
|
178
|
-
</pgterms:agent>
|
179
188
|
</rdf:RDF>'
|
180
189
|
end
|
181
190
|
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
@@ -183,35 +192,35 @@ module GutenbergRdf
|
|
183
192
|
it "returns the correct number of authors" do
|
184
193
|
expect(rdf.authors.count).to be 2
|
185
194
|
end
|
186
|
-
it "expects an
|
187
|
-
expect(rdf.authors
|
188
|
-
end
|
189
|
-
it "has the correct author names" do
|
190
|
-
expect(rdf.authors.first.fullname).to eq 'Mary Mapes Dodge'
|
195
|
+
it "expects an Agent object" do
|
196
|
+
expect(rdf.authors[0]).to be_an_instance_of Rdf::Agent
|
191
197
|
end
|
192
198
|
it "expects the author to have an aut role" do
|
193
|
-
expect(rdf.authors.
|
199
|
+
expect(rdf.authors[0].role).to eq 'aut'
|
200
|
+
end
|
201
|
+
it "has the correct author names" do
|
202
|
+
expect(rdf.authors[1].fullname).to eq 'Mary Mapes Dodge'
|
194
203
|
end
|
195
204
|
it "expects other agents to have the correct role" do
|
196
|
-
expect(rdf.authors.
|
205
|
+
expect(rdf.authors[1].role).to eq 'ctb'
|
197
206
|
end
|
198
207
|
end
|
199
208
|
|
200
209
|
describe "#subjects" do
|
201
210
|
let(:xml) do
|
202
|
-
%q{<rdf:RDF xmlns:
|
211
|
+
%q{<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
203
212
|
<pgterms:ebook rdf:about="ebooks/98765">
|
204
213
|
<dcterms:subject>
|
205
|
-
<rdf:Description>
|
206
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/
|
207
|
-
<rdf:value>
|
208
|
-
<rdf:value>Children's periodicals, American</rdf:value>
|
214
|
+
<rdf:Description rdf:nodeID="Ndfsc8xdsfwar734897n7sdofyhod11b9">
|
215
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCC"/>
|
216
|
+
<rdf:value>PZ</rdf:value>
|
209
217
|
</rdf:Description>
|
210
218
|
</dcterms:subject>
|
211
219
|
<dcterms:subject>
|
212
|
-
<rdf:Description>
|
213
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/
|
214
|
-
<rdf:value>
|
220
|
+
<rdf:Description rdf:nodeID="Ndfcdh8934hsdljkfh98y89hlfhltyab8">
|
221
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
|
222
|
+
<rdf:value>Children's literature -- Periodicals</rdf:value>
|
223
|
+
<rdf:value>Children's periodicals, American</rdf:value>
|
215
224
|
</rdf:Description>
|
216
225
|
</dcterms:subject>
|
217
226
|
</pgterms:ebook>
|
@@ -230,69 +239,63 @@ module GutenbergRdf
|
|
230
239
|
describe "#covers" do
|
231
240
|
describe "official PG covers" do
|
232
241
|
let(:xml) do
|
233
|
-
'<rdf:RDF xmlns:
|
234
|
-
<pgterms:ebook rdf:about="ebooks/
|
235
|
-
<dcterms:hasFormat
|
236
|
-
|
237
|
-
|
238
|
-
|
242
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
243
|
+
<pgterms:ebook rdf:about="ebooks/98765">
|
244
|
+
<dcterms:hasFormat>
|
245
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/cache/epub/98765/pg98765.cover.small.jpg">
|
246
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">2699</dcterms:extent>
|
247
|
+
<dcterms:format>
|
248
|
+
<rdf:Description rdf:nodeID="N9u34589eyfdiuy8934y787d8f97sg786">
|
249
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
250
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
|
251
|
+
</rdf:Description>
|
252
|
+
</dcterms:format>
|
253
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
254
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-03-25T20:57:55.668737</dcterms:modified>
|
255
|
+
</pgterms:file>
|
256
|
+
</dcterms:hasFormat>
|
257
|
+
<pgterms:marc901>http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
|
258
|
+
<pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
|
259
|
+
<pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-rst/images/cover.jpg</pgterms:marc901>
|
260
|
+
<dcterms:hasFormat>
|
261
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/cache/epub/98765/pg98765.cover.medium.jpg">
|
262
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-03-25T20:57:55.889736</dcterms:modified>
|
263
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
264
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">10856</dcterms:extent>
|
265
|
+
<dcterms:format>
|
266
|
+
<rdf:Description rdf:nodeID="N8df89ys8993p4qu89uenf89dusp38a07">
|
267
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
268
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
|
269
|
+
</rdf:Description>
|
270
|
+
</dcterms:format>
|
271
|
+
</pgterms:file>
|
272
|
+
</dcterms:hasFormat>
|
239
273
|
</pgterms:ebook>
|
240
|
-
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.epub.noimages">
|
241
|
-
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">92652</dcterms:extent>
|
242
|
-
<dcterms:format>
|
243
|
-
<rdf:Description>
|
244
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
245
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/epub+zip</rdf:value>
|
246
|
-
</rdf:Description>
|
247
|
-
</dcterms:format>
|
248
|
-
<dcterms:isFormatOf rdf:resource="ebooks/12345"/>
|
249
|
-
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:32.115259</dcterms:modified>
|
250
|
-
</pgterms:file>
|
251
|
-
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.cover.medium">
|
252
|
-
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">10856</dcterms:extent>
|
253
|
-
<dcterms:format>
|
254
|
-
<rdf:Description>
|
255
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
256
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
|
257
|
-
</rdf:Description>
|
258
|
-
</dcterms:format>
|
259
|
-
<dcterms:isFormatOf rdf:resource="ebooks/12345"/>
|
260
|
-
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:34.484114</dcterms:modified>
|
261
|
-
</pgterms:file>
|
262
|
-
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.cover.small">
|
263
|
-
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1904</dcterms:extent>
|
264
|
-
<dcterms:format>
|
265
|
-
<rdf:Description>
|
266
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
267
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
|
268
|
-
</rdf:Description>
|
269
|
-
</dcterms:format>
|
270
|
-
<dcterms:isFormatOf rdf:resource="ebooks/12345"/>
|
271
|
-
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:34.379124</dcterms:modified>
|
272
|
-
</pgterms:file>
|
273
274
|
</rdf:RDF>'
|
274
275
|
end
|
275
276
|
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
276
277
|
|
277
278
|
it "expects the correct number of entries returned" do
|
278
|
-
expect(rdf.covers.count).to be
|
279
|
+
expect(rdf.covers.count).to be 4
|
280
|
+
end
|
281
|
+
it "expect medium cover url to be first in the list" do
|
282
|
+
expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/cache/epub/98765/pg98765.cover.medium.jpg'
|
279
283
|
end
|
280
|
-
it "
|
281
|
-
expect(rdf.covers[
|
282
|
-
expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/ebooks/12345.cover.small'
|
284
|
+
it "expect the small cover url after the medium" do
|
285
|
+
expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/cache/epub/98765/pg98765.cover.small.jpg'
|
283
286
|
end
|
284
287
|
it "expects any other images to be listed after the official ones" do
|
285
|
-
expect(rdf.covers[2]).to eql 'http://www.gutenberg.org/files/
|
288
|
+
expect(rdf.covers[2]).to eql 'http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg'
|
286
289
|
end
|
287
290
|
end
|
288
291
|
|
289
292
|
describe "HTML ebook cover image" do
|
290
293
|
let(:xml) do
|
291
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
292
|
-
<pgterms:ebook rdf:about="ebooks/
|
293
|
-
<pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/
|
294
|
-
<pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/
|
295
|
-
<pgterms:marc901>http://www.gutenberg.org/files/
|
294
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
295
|
+
<pgterms:ebook rdf:about="ebooks/98765">
|
296
|
+
<pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-rst/images/cover.jpg</pgterms:marc901>
|
297
|
+
<pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
|
298
|
+
<pgterms:marc901>http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg</pgterms:marc901>
|
296
299
|
</pgterms:ebook>
|
297
300
|
</rdf:RDF>'
|
298
301
|
end
|
@@ -304,43 +307,52 @@ module GutenbergRdf
|
|
304
307
|
it "should convert File URIs to the Gutenberg URL" do
|
305
308
|
expect(rdf.covers.first).to match 'http://www.gutenberg.org'
|
306
309
|
end
|
307
|
-
it "expects the
|
308
|
-
expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/files/
|
309
|
-
|
310
|
+
it "expects the HTML cover to be listed first" do
|
311
|
+
expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg'
|
312
|
+
end
|
313
|
+
it "expects the RST cover to be listed after the HTML" do
|
314
|
+
expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/files/98765/98765-rst/images/cover.jpg'
|
310
315
|
end
|
311
316
|
end
|
312
317
|
end
|
313
318
|
|
314
319
|
describe "#ebook" do
|
315
320
|
let(:xml) do
|
316
|
-
'<rdf:RDF xmlns:
|
321
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
317
322
|
<pgterms:ebook rdf:about="ebooks/98765">
|
318
|
-
<dcterms:hasFormat
|
319
|
-
|
323
|
+
<dcterms:hasFormat>
|
324
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
|
325
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
326
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
|
327
|
+
<dcterms:format>
|
328
|
+
<rdf:Description rdf:nodeID="N87dfy78yd78s6gsg6f8970d76g0f6d9b">
|
329
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain</rdf:value>
|
330
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
331
|
+
</rdf:Description>
|
332
|
+
</dcterms:format>
|
333
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
|
334
|
+
</pgterms:file>
|
335
|
+
</dcterms:hasFormat>
|
336
|
+
<dcterms:hasFormat>
|
337
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
|
338
|
+
<dcterms:format>
|
339
|
+
<rdf:Description rdf:nodeID="Ndfsd78tf34tukjehdsouyo4yrefh6dea">
|
340
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
|
341
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
342
|
+
</rdf:Description>
|
343
|
+
</dcterms:format>
|
344
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
|
345
|
+
<dcterms:format>
|
346
|
+
<rdf:Description rdf:nodeID="Nfy7we43yhluwe9syrqyp2ewufy0f6d1e">
|
347
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
|
348
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
349
|
+
</rdf:Description>
|
350
|
+
</dcterms:format>
|
351
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
352
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
|
353
|
+
</pgterms:file>
|
354
|
+
</dcterms:hasFormat>
|
320
355
|
</pgterms:ebook>
|
321
|
-
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
|
322
|
-
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
|
323
|
-
<dcterms:format>
|
324
|
-
<rdf:Description>
|
325
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
326
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
|
327
|
-
</rdf:Description>
|
328
|
-
</dcterms:format>
|
329
|
-
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
330
|
-
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
|
331
|
-
</pgterms:file>
|
332
|
-
<pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
|
333
|
-
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
|
334
|
-
<dcterms:format>
|
335
|
-
<rdf:Description>
|
336
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
337
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
|
338
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
|
339
|
-
</rdf:Description>
|
340
|
-
</dcterms:format>
|
341
|
-
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
342
|
-
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
|
343
|
-
</pgterms:file>
|
344
356
|
</rdf:RDF>'
|
345
357
|
end
|
346
358
|
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
@@ -349,7 +361,50 @@ module GutenbergRdf
|
|
349
361
|
expect(rdf.ebooks.count).to be 2
|
350
362
|
end
|
351
363
|
it "expects an entry to be a Media class" do
|
352
|
-
expect(rdf.ebooks.first
|
364
|
+
expect(rdf.ebooks.first).to be_an_instance_of Rdf::Media
|
365
|
+
end
|
366
|
+
|
367
|
+
context "only collect ebook media files" do
|
368
|
+
let(:xml) do
|
369
|
+
'<rdf:RDF xml:base="http://www.gutenberg.org/" xmlns:cc="http://web.resource.org/cc/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
370
|
+
<pgterms:ebook rdf:about="ebooks/98765">
|
371
|
+
<dcterms:hasFormat>
|
372
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
|
373
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
374
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
|
375
|
+
<dcterms:format>
|
376
|
+
<rdf:Description rdf:nodeID="N87dfy78yd78s6gsg6f8970d76g0f6d9b">
|
377
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain</rdf:value>
|
378
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
379
|
+
</rdf:Description>
|
380
|
+
</dcterms:format>
|
381
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
|
382
|
+
</pgterms:file>
|
383
|
+
</dcterms:hasFormat>
|
384
|
+
<dcterms:hasFormat>
|
385
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/cache/epub/98765/pg98765.cover.small.jpg">
|
386
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">2699</dcterms:extent>
|
387
|
+
<dcterms:format>
|
388
|
+
<rdf:Description rdf:nodeID="N9u34589eyfdiuy8934y787d8f97sg786">
|
389
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
390
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
|
391
|
+
</rdf:Description>
|
392
|
+
</dcterms:format>
|
393
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
394
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-03-25T20:57:55.668737</dcterms:modified>
|
395
|
+
</pgterms:file>
|
396
|
+
</dcterms:hasFormat>
|
397
|
+
</pgterms:ebook>
|
398
|
+
</rdf:RDF>'
|
399
|
+
end
|
400
|
+
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
401
|
+
|
402
|
+
it "only extracts one media file" do
|
403
|
+
expect(rdf.ebooks.count).to be 1
|
404
|
+
end
|
405
|
+
it "expects the media type to be for an ebook" do
|
406
|
+
expect(rdf.ebooks[0].media_type).to eq 'text/plain'
|
407
|
+
end
|
353
408
|
end
|
354
409
|
end
|
355
410
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gutenberg_rdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Cook
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -28,7 +28,8 @@ description: A Ruby wrapper for the Project Gutenberg RDF catalog files (require
|
|
28
28
|
Ruby 2).
|
29
29
|
email:
|
30
30
|
- m@mikecook.co.uk
|
31
|
-
executables:
|
31
|
+
executables:
|
32
|
+
- rdf
|
32
33
|
extensions: []
|
33
34
|
extra_rdoc_files: []
|
34
35
|
files:
|
@@ -38,6 +39,7 @@ files:
|
|
38
39
|
- LICENSE.txt
|
39
40
|
- README.md
|
40
41
|
- Rakefile
|
42
|
+
- bin/rdf
|
41
43
|
- gutenberg_rdf.gemspec
|
42
44
|
- lib/gutenberg_rdf.rb
|
43
45
|
- lib/gutenberg_rdf/rdf.rb
|
@@ -69,7 +71,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
69
71
|
version: '0'
|
70
72
|
requirements: []
|
71
73
|
rubyforge_project:
|
72
|
-
rubygems_version: 2.
|
74
|
+
rubygems_version: 2.3.0
|
73
75
|
signing_key:
|
74
76
|
specification_version: 4
|
75
77
|
summary: A Ruby wrapper for the Project Gutenberg RDF catalog files.
|
@@ -79,3 +81,4 @@ test_files:
|
|
79
81
|
- spec/gutenberg_rdf/rdf_spec.rb
|
80
82
|
- spec/gutenberg_rdf_spec.rb
|
81
83
|
- spec/spec_helper.rb
|
84
|
+
has_rdoc:
|