gutenberg_rdf 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bbef70a3ac7d39b2bb3c3e0945ba6d722ffa1d2b
4
+ data.tar.gz: 98cd1b9df8ad4ff1507cea7a2ade4686aee9302b
5
+ SHA512:
6
+ metadata.gz: 0a2174ab980a295ec48ee2f8791b8bb8d12de541f8159d257f9e2e2ff57d0ad373e60ce40188e9a4007d5be0a54a98e3631532dee07820b609222112382b77e5
7
+ data.tar.gz: 2ee305ee3749ba4cfa6239af9acc49e6eea65f4d8ab461eccab474bc4bbdf47df433791b7b398b0a60eaac132255aa3f85123bca3a5e3f3814fa2881d0288deb
data/.gitignore ADDED
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+
19
+ .ruby-*
20
+ .DS_Store
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in gutenberg_rdf.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Mike Cook
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,76 @@
1
+ # Gutenberg RDF
2
+
3
+ Gutenberg RDF is a Ruby wrapper for the Project Gutenberg RDF catalog book files,
4
+ providing a nice API to all the metadata contained within.
5
+
6
+ ## Requirements
7
+
8
+ * Ruby 2.0 - this is so we get UTF-8 by default
9
+ * Nokogiri - for parsing the RDF
10
+
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ gem 'gutenberg_rdf'
17
+
18
+ And then execute:
19
+
20
+ $ bundle
21
+
22
+ Or install it yourself as:
23
+
24
+ $ gem install gutenberg_rdf
25
+
26
+ ## Usage
27
+
28
+ require 'gutenberg_rdf'
29
+
30
+ xml = Nokogiri::XML(File.new('/path/to/pg2746.rdf'))
31
+ book = GutenbergRdf::Rdf.new(xml)
32
+
33
+ puts book.id
34
+ #=> "2746"
35
+
36
+ puts book.type
37
+ #=> "Text"
38
+
39
+ puts book.title
40
+ #=> "Urbain Grandier"
41
+
42
+ puts book.subtitle
43
+ #=> "Celebrated Crimes"
44
+
45
+ puts book.authors.first.fullname
46
+ #=> "Alexandre Dumas"
47
+
48
+ puts book.subjects.first
49
+ #=> "Crime"
50
+
51
+ puts book.published
52
+ #=> "2004-09-22"
53
+
54
+ puts book.publisher
55
+ #=> "Project Gutenberg"
56
+
57
+ puts book.rights
58
+ #=> "Public domain in the USA."
59
+
60
+ puts book.language
61
+ #=> "en"
62
+
63
+ puts book.covers.first
64
+ #=> "http://www.gutenberg.org/ebooks/2746.cover.medium"
65
+
66
+ puts book.ebooks[3][:uri]
67
+ #=> "http://www.gutenberg.org/ebooks/2746.epub.images"
68
+
69
+
70
+ ## Contributing
71
+
72
+ 1. Fork it
73
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
74
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
75
+ 4. Push to the branch (`git push origin my-new-feature`)
76
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,28 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'gutenberg_rdf/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "gutenberg_rdf"
7
+ spec.version = GutenbergRdf::VERSION
8
+ spec.authors = ["Mike Cook"]
9
+ spec.email = ["m@mikecook.co.uk"]
10
+ spec.summary = %q{A Ruby wrapper for the Project Gutenberg RDF catalog files.}
11
+ spec.description = %q{A Ruby wrapper providing a nice API for the Project Gutenberg RDF catalog files. See the README for more information.}
12
+ spec.homepage = ""
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files`.split($/)
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.required_ruby_version = ">= 2.0.0" # so we have UTF-8 by default
21
+
22
+ spec.add_dependency "nokogiri", "~> 1.6.0"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.3"
25
+ spec.add_development_dependency "rake"
26
+
27
+ spec.add_development_dependency "rspec", "~> 2.14.1"
28
+ end
@@ -0,0 +1,58 @@
1
+ module GutenbergRdf
2
+ class Rdf
3
+ class Agent
4
+ attr_reader :xml
5
+
6
+ def initialize(xml)
7
+ @xml = xml
8
+ end
9
+
10
+ def id
11
+ xml.at_xpath('pgterms:agent').attribute('about').content.match(/\A\d\d\d\d\/agents\/(\d+)\z/)[1]
12
+ end
13
+
14
+ def fullname
15
+ [firstname, lastname].join(' ')
16
+ end
17
+
18
+ def lastname
19
+ @lastname ||= name_parts[:last]
20
+ end
21
+
22
+ def firstname
23
+ @firstname ||= name_parts[:first]
24
+ end
25
+
26
+ def birthdate
27
+ xml.at_xpath('pgterms:agent/pgterms:birthdate').text
28
+ end
29
+
30
+ def deathdate
31
+ xml.at_xpath('pgterms:agent/pgterms:deathdate').text
32
+ end
33
+
34
+ def webpage
35
+ xml.at_xpath('pgterms:agent/pgterms:webpage').attribute('resource').content
36
+ end
37
+
38
+ def aliases
39
+ entries = Array.new
40
+ xml.xpath('//pgterms:alias').each do |name|
41
+ entries << name.text
42
+ end
43
+ entries
44
+ end
45
+
46
+ private
47
+
48
+ def name_parts
49
+ parts = xml.xpath('//pgterms:name').text.split(/, */)
50
+ last = parts.shift
51
+ first = parts.reverse.join(' ')
52
+
53
+ {first: first, last: last}
54
+ end
55
+
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,125 @@
1
+ require 'date'
2
+
3
+ module GutenbergRdf
4
+ class Rdf
5
+ attr_reader :xml
6
+
7
+ def initialize(xml)
8
+ @xml = xml.at_xpath('rdf:RDF')
9
+ end
10
+
11
+ def id
12
+ xml.at_xpath('pgterms:ebook').attribute('about').content.match(/\Aebooks\/(.+)\z/)[1]
13
+ end
14
+
15
+ def type
16
+ xml.at_xpath('pgterms:ebook/dcterms:type/rdf:Description/rdf:value').text
17
+ end
18
+
19
+ def title
20
+ titles.first
21
+ end
22
+
23
+ def subtitle
24
+ titles[1..-1].join(' - ')
25
+ end
26
+
27
+ def authors
28
+ @authors ||= extract_authors
29
+ end
30
+
31
+ def subjects
32
+ entries = Array.new
33
+ xml.xpath('pgterms:ebook//dcterms:subject').each do |entry|
34
+ next unless entry.at_xpath('rdf:Description/dcam:memberOf').attribute('resource').text.match(/LCSH\z/)
35
+ entry.xpath('rdf:Description//rdf:value').each do |value|
36
+ entries << value.text
37
+ end
38
+ end
39
+ entries
40
+ end
41
+
42
+ def published
43
+ xml.at_xpath('pgterms:ebook/dcterms:issued').text
44
+ end
45
+
46
+ def publisher
47
+ xml.at_xpath('pgterms:ebook/dcterms:publisher').text
48
+ end
49
+
50
+ def language
51
+ xml.at_xpath('pgterms:ebook/dcterms:language').text
52
+ end
53
+
54
+ def rights
55
+ xml.at_xpath('pgterms:ebook/dcterms:rights').text
56
+ end
57
+
58
+ def covers
59
+ official_cover_images.concat(other_cover_images).sort.uniq
60
+ end
61
+
62
+ def ebooks
63
+ files = Array.new
64
+ xml.xpath('//pgterms:file').each do |file|
65
+ uri = file.attribute('about').content
66
+ datatypes = separate_mimetype_and_encoding(file.at_xpath('dcterms:format/rdf:Description/rdf:value').text)
67
+ modified = DateTime.parse(file.at_xpath('dcterms:modified').text + '-07:00')
68
+ files << {uri: uri, mime_type: datatypes[:mimetype], encoding: datatypes[:encoding], modified: modified}
69
+ end
70
+ files
71
+ end
72
+
73
+ private
74
+
75
+ def titles
76
+ @titles ||= split_title_and_subtitle
77
+ end
78
+
79
+ def split_title_and_subtitle
80
+ # Note this gsub is replacing UTF-8 hyphens with normal ASCII ones
81
+ t = xml.at_xpath('pgterms:ebook/dcterms:title').text.gsub(/—/, '-')
82
+
83
+ title_array = t.split(/\n/)
84
+ title_array = title_array.first.split(/:/) if title_array.count == 1
85
+ title_array = title_array.first.split(/;/) if title_array.count == 1
86
+
87
+ title_array.each(&:strip!)
88
+ end
89
+
90
+ def extract_authors
91
+ entries = Array.new
92
+ xml.xpath('//pgterms:agent').each do |agent|
93
+ entries << Agent.new(agent)
94
+ end
95
+ entries
96
+ end
97
+
98
+ def official_cover_images
99
+ entries = Array.new
100
+ xml.xpath('//pgterms:file').each do |file|
101
+ url = file.attribute('about').content
102
+ entries << url if file.xpath('dcterms:format/rdf:Description//rdf:value').detect { |v| v.text.match(/image/) }
103
+ end
104
+ entries
105
+ end
106
+
107
+ def other_cover_images
108
+ entries = Array.new
109
+ xml.xpath('pgterms:ebook//pgterms:marc901').each do |node|
110
+ cover = node.text
111
+ cover.sub!(/\Afile:\/\/\/public\/vhost\/g\/gutenberg\/html/, 'http://www.gutenberg.org')
112
+ entries << cover
113
+ end
114
+ entries
115
+ end
116
+
117
+ def separate_mimetype_and_encoding(string)
118
+ parts = string.split(/; */)
119
+ m = parts.shift
120
+ e = parts.join(';').sub('charset=', '')
121
+ {mimetype: m, encoding: e}
122
+ end
123
+
124
+ end
125
+ end
@@ -0,0 +1,3 @@
1
+ module GutenbergRdf
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,13 @@
1
+ require 'nokogiri'
2
+
3
+ require "gutenberg_rdf/rdf"
4
+ require "gutenberg_rdf/rdf/agent"
5
+ require "gutenberg_rdf/version"
6
+
7
+ module GutenbergRdf
8
+
9
+ def self.parse(path)
10
+ Rdf.new(Nokogiri::XML(File.new(path)))
11
+ end
12
+
13
+ end
@@ -0,0 +1,112 @@
1
+ require 'spec_helper'
2
+
3
+ module GutenbergRdf
4
+ class Rdf
5
+ describe Agent do
6
+ let(:agent) do
7
+ xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
8
+ <pgterms:agent rdf:about="2009/agents/402">
9
+ <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
10
+ <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
11
+ <pgterms:name>Doe, Jon James</pgterms:name>
12
+ <pgterms:alias>Doe, Jon</pgterms:alias>
13
+ <pgterms:alias>Doe, J. J.</pgterms:alias>
14
+ <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Jon_James_Doe"/>
15
+ </pgterms:agent>
16
+ </rdf:RDF>'
17
+ rdf = Nokogiri::XML(xml)
18
+ Agent.new(rdf.at_xpath('rdf:RDF'))
19
+ end
20
+
21
+ it "expects an agent ID" do
22
+ expect(agent.id).to eql '402'
23
+ end
24
+
25
+ it "expects the last name" do
26
+ expect(agent.lastname).to eql 'Doe'
27
+ end
28
+
29
+ it "expects the first name(s)" do
30
+ expect(agent.firstname).to eql 'Jon James'
31
+ end
32
+
33
+ it "expects the full name" do
34
+ expect(agent.fullname).to eql 'Jon James Doe'
35
+ end
36
+
37
+ it "expects a birth date" do
38
+ expect(agent.birthdate).to eql '1830'
39
+ end
40
+
41
+ it "expects a death date" do
42
+ expect(agent.deathdate).to eql '1905'
43
+ end
44
+
45
+ it "expects a webpage" do
46
+ expect(agent.webpage).to eql 'http://en.wikipedia.org/wiki/Jon_James_Doe'
47
+ end
48
+
49
+ it "expects any alias names" do
50
+ expect(agent.aliases[0]).to eql 'Doe, Jon'
51
+ expect(agent.aliases[1]).to eql 'Doe, J. J.'
52
+ end
53
+
54
+ context "when only a single name is given" do
55
+ let(:agent) do
56
+ xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
57
+ <pgterms:agent rdf:about="2009/agents/402">
58
+ <pgterms:name>Dato</pgterms:name>
59
+ </pgterms:agent>
60
+ </rdf:RDF>'
61
+ rdf = Nokogiri::XML(xml)
62
+ Agent.new(rdf.at_xpath('rdf:RDF'))
63
+ end
64
+
65
+ it "expects it to be assigned to the last name" do
66
+ expect(agent.lastname).to eql 'Dato'
67
+ end
68
+ it "expects firstname to be an empty string" do
69
+ expect(agent.firstname).to eql ''
70
+ end
71
+ end
72
+
73
+ context "when the name has a suffix" do
74
+ let(:agent) do
75
+ xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
76
+ <pgterms:agent rdf:about="2009/agents/402">
77
+ <pgterms:name>Doe, Jon, Sir</pgterms:name>
78
+ </pgterms:agent>
79
+ </rdf:RDF>'
80
+ rdf = Nokogiri::XML(xml)
81
+ Agent.new(rdf.at_xpath('rdf:RDF'))
82
+ end
83
+
84
+ it "expects the correct name order" do
85
+ expect(agent.firstname).to eql 'Sir Jon'
86
+ expect(agent.lastname).to eql 'Doe'
87
+ end
88
+ end
89
+
90
+ context "when full name is given in (brackets)" do
91
+ let(:agent) do
92
+ xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
93
+ <pgterms:agent agent:about="2009/agents/402">
94
+ <pgterms:name>Doe, J. J. (Jon James)</pgterms:name>
95
+ </pgterms:agent>
96
+ </rdf:RDF>'
97
+ rdf = Nokogiri::XML(xml)
98
+ Agent.new(rdf.at_xpath('rdf:RDF'))
99
+ end
100
+
101
+ it "expects initials to replaced by name in brackets" do
102
+ pending "Not yet implemented"
103
+ expect(agent.firstname).to eql 'Jon James'
104
+ expect(agent.lastname).to eql 'Doe'
105
+ end
106
+ it "expects the name (excluding name in brackets) to be added to the aliases"
107
+ it "should not have duplicate aliases"
108
+ end
109
+ end
110
+
111
+ end
112
+ end
@@ -0,0 +1,374 @@
1
+ require 'spec_helper'
2
+
3
+ module GutenbergRdf
4
+ describe Rdf do
5
+ let(:xml) do
6
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
7
+ <pgterms:ebook rdf:about="ebooks/98765">
8
+ <dcterms:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2006-09-28</dcterms:issued>
9
+ <dcterms:language rdf:datatype="http://purl.org/dc/terms/RFC4646">en</dcterms:language>
10
+ <dcterms:publisher>Project Gutenberg</dcterms:publisher>
11
+ <dcterms:rights>Public domain in the USA.</dcterms:rights>
12
+ </pgterms:ebook>
13
+ </rdf:RDF>'
14
+ end
15
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
16
+
17
+ it "expects an id" do
18
+ expect(rdf.id).to eql "98765"
19
+ end
20
+ it "expects a published date" do
21
+ expect(rdf.published).to eql "2006-09-28"
22
+ end
23
+ it "expects a publisher" do
24
+ expect(rdf.publisher).to eql "Project Gutenberg"
25
+ end
26
+ it "expects a language" do
27
+ expect(rdf.language).to eql "en"
28
+ end
29
+ it "expects the rights" do
30
+ expect(rdf.rights).to eql "Public domain in the USA."
31
+ end
32
+
33
+ describe "#type" do
34
+ let(:xml) do
35
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
36
+ <pgterms:ebook rdf:about="ebooks/98765">
37
+ <dcterms:type>
38
+ <rdf:Description>
39
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/DCMIType"/>
40
+ <rdf:value>Text</rdf:value>
41
+ </rdf:Description>
42
+ </dcterms:type>
43
+ </pgterms:ebook>
44
+ </rdf:RDF>'
45
+ end
46
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
47
+
48
+ it "expect the type of entity" do
49
+ expect(rdf.type).to eql 'Text'
50
+ end
51
+ end
52
+
53
+ describe "Titles" do
54
+ let(:xml) do
55
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
56
+ <pgterms:ebook rdf:about="ebooks/98765">
57
+ <dcterms:title>A Great Title</dcterms:title>
58
+ </pgterms:ebook>
59
+ </rdf:RDF>'
60
+ end
61
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
62
+
63
+ it "expects a title" do
64
+ expect(rdf.title).to eql 'A Great Title'
65
+ end
66
+ it "expects subtitle to be empty" do
67
+ expect(rdf.subtitle).to eql ''
68
+ end
69
+
70
+ context "with a title and subtitle, on separate lines" do
71
+ let(:xml) do
72
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
73
+ <pgterms:ebook rdf:about="ebooks/98765">
74
+ <dcterms:title>A Great Multi-Title
75
+ Or, a Subtitle</dcterms:title>
76
+ </pgterms:ebook>
77
+ </rdf:RDF>'
78
+ end
79
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
80
+
81
+ it "expects the title to be the first line" do
82
+ expect(rdf.title).to eql 'A Great Multi-Title'
83
+ end
84
+ it "expects the subtitle to be the second line" do
85
+ expect(rdf.subtitle).to eql 'Or, a Subtitle'
86
+ end
87
+ end
88
+
89
+ context "when title:subtitle are separated by a colon" do
90
+ let(:xml) do
91
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
92
+ <pgterms:ebook rdf:about="ebooks/98765">
93
+ <dcterms:title>A Great Multi-Title: And a Subtitle</dcterms:title>
94
+ </pgterms:ebook>
95
+ </rdf:RDF>'
96
+ end
97
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
98
+
99
+ it "expects a title" do
100
+ expect(rdf.title).to eql 'A Great Multi-Title'
101
+ end
102
+ it "expects a subtitle" do
103
+ expect(rdf.subtitle).to eql 'And a Subtitle'
104
+ end
105
+ end
106
+
107
+ context "when title; and subtitle are separated by a semi-colon" do
108
+ let(:xml) do
109
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
110
+ <pgterms:ebook rdf:about="ebooks/98765">
111
+ <dcterms:title>A Great Multi-Title; Or, a Subtitle</dcterms:title>
112
+ </pgterms:ebook>
113
+ </rdf:RDF>'
114
+ end
115
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
116
+ it "expects a title" do
117
+ expect(rdf.title).to eql 'A Great Multi-Title'
118
+ end
119
+ it "expects a subtitle" do
120
+ expect(rdf.subtitle).to eql 'Or, a Subtitle'
121
+ end
122
+
123
+ context "...except when subtitles already exists" do
124
+ let(:xml) do
125
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
126
+ <pgterms:ebook rdf:about="ebooks/98765">
127
+ <dcterms:title>A Great Multi-Title; and some other text
128
+ Then a Subtitle on a newline</dcterms:title>
129
+ </pgterms:ebook>
130
+ </rdf:RDF>'
131
+ end
132
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
133
+ it "expects a title" do
134
+ expect(rdf.title).to eql 'A Great Multi-Title; and some other text'
135
+ end
136
+ it "expects a subtitle" do
137
+ expect(rdf.subtitle).to eql 'Then a Subtitle on a newline'
138
+ end
139
+ end
140
+ end
141
+ end
142
+
143
+ describe "#authors" do
144
+ let(:xml) do
145
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
146
+ <pgterms:agent rdf:about="2009/agents/402">
147
+ <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
148
+ <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
149
+ <pgterms:name>Dodge, Mary Mapes</pgterms:name>
150
+ <pgterms:alias>Dodge, Mary</pgterms:alias>
151
+ <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Mary_Mapes_Dodge"/>
152
+ </pgterms:agent>
153
+ <pgterms:agent rdf:about="2009/agents/116">
154
+ <pgterms:alias>Verschillende</pgterms:alias>
155
+ <pgterms:name>Various</pgterms:name>
156
+ </pgterms:agent>
157
+ </rdf:RDF>'
158
+ end
159
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
160
+ it "expects a Array" do
161
+ expect(rdf.authors.class).to be Array
162
+ end
163
+ it "expects correct number to be returned" do
164
+ expect(rdf.authors.count).to be 2
165
+ end
166
+ it "expects an author object" do
167
+ expect(rdf.authors.first.class).to be Rdf::Agent
168
+ end
169
+ end
170
+
171
+ describe "#subjects" do
172
+ let(:xml) do
173
+ %q{<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
174
+ <pgterms:ebook rdf:about="ebooks/98765">
175
+ <dcterms:subject>
176
+ <rdf:Description>
177
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
178
+ <rdf:value>Children's literature -- Periodicals</rdf:value>
179
+ <rdf:value>Children's periodicals, American</rdf:value>
180
+ </rdf:Description>
181
+ </dcterms:subject>
182
+ <dcterms:subject>
183
+ <rdf:Description>
184
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCC"/>
185
+ <rdf:value>PZ</rdf:value>
186
+ </rdf:Description>
187
+ </dcterms:subject>
188
+ </pgterms:ebook>
189
+ </rdf:RDF>}
190
+ end
191
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
192
+ it "expects correct number to be returned" do
193
+ expect(rdf.subjects.count).to be 2
194
+ end
195
+ it "expects the correct data" do
196
+ expect(rdf.subjects.first).to eql "Children's literature -- Periodicals"
197
+ expect(rdf.subjects.last).to eql "Children's periodicals, American"
198
+ end
199
+ end
200
+
201
+ describe "#covers" do
202
+ describe "official PG covers" do
203
+ let(:xml) do
204
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
205
+ <pgterms:ebook rdf:about="ebooks/12345">
206
+ <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/12345.epub.noimages"/>
207
+ <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/12345.cover.medium"/>
208
+ <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/12345.cover.small"/>
209
+ <pgterms:marc901>http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg</pgterms:marc901>
210
+ </pgterms:ebook>
211
+ <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.epub.noimages">
212
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">92652</dcterms:extent>
213
+ <dcterms:format>
214
+ <rdf:Description>
215
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
216
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/epub+zip</rdf:value>
217
+ </rdf:Description>
218
+ </dcterms:format>
219
+ <dcterms:isFormatOf rdf:resource="ebooks/12345"/>
220
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:32.115259</dcterms:modified>
221
+ </pgterms:file>
222
+ <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.cover.medium">
223
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">10856</dcterms:extent>
224
+ <dcterms:format>
225
+ <rdf:Description>
226
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
227
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
228
+ </rdf:Description>
229
+ </dcterms:format>
230
+ <dcterms:isFormatOf rdf:resource="ebooks/12345"/>
231
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:34.484114</dcterms:modified>
232
+ </pgterms:file>
233
+ <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.cover.small">
234
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1904</dcterms:extent>
235
+ <dcterms:format>
236
+ <rdf:Description>
237
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
238
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
239
+ </rdf:Description>
240
+ </dcterms:format>
241
+ <dcterms:isFormatOf rdf:resource="ebooks/12345"/>
242
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:34.379124</dcterms:modified>
243
+ </pgterms:file>
244
+ </rdf:RDF>'
245
+ end
246
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
247
+
248
+ it "expects the correct number of entries returned" do
249
+ expect(rdf.covers.count).to be 3
250
+ end
251
+ it "expects those to be used" do
252
+ expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/ebooks/12345.cover.medium'
253
+ expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/ebooks/12345.cover.small'
254
+ end
255
+ it "expects any other images to be listed after the official ones" do
256
+ expect(rdf.covers[2]).to eql 'http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg'
257
+ end
258
+ end
259
+
260
+ describe "HTML ebook cover image" do
261
+ let(:xml) do
262
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
263
+ <pgterms:ebook rdf:about="ebooks/12345">
264
+ <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/12345/12345-rst/images/cover.jpg</pgterms:marc901>
265
+ <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/12345/12345-h/images/cover.jpg</pgterms:marc901>
266
+ <pgterms:marc901>http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg</pgterms:marc901>
267
+ </pgterms:ebook>
268
+ </rdf:RDF>'
269
+ end
270
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
271
+
272
+ it "expects only unique entries" do
273
+ expect(rdf.covers.count).to be 2
274
+ end
275
+ it "should convert File URIs to the Gutenberg URL" do
276
+ expect(rdf.covers.first).to match 'http://www.gutenberg.org'
277
+ end
278
+ it "expects the covers to be listed in the correct order" do
279
+ expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg'
280
+ expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/files/12345/12345-rst/images/cover.jpg'
281
+ end
282
+ end
283
+ end
284
+
285
+ describe "#ebook" do
286
+ let(:xml) do
287
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
288
+ <pgterms:ebook rdf:about="ebooks/98765">
289
+ <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/98765.txt.utf-8"/>
290
+ <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/98765.zip"/>
291
+ </pgterms:ebook>
292
+ <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
293
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
294
+ <dcterms:format>
295
+ <rdf:Description>
296
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
297
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
298
+ </rdf:Description>
299
+ </dcterms:format>
300
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
301
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
302
+ </pgterms:file>
303
+ <pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
304
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
305
+ <dcterms:format>
306
+ <rdf:Description>
307
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
308
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
309
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
310
+ </rdf:Description>
311
+ </dcterms:format>
312
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
313
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
314
+ </pgterms:file>
315
+ </rdf:RDF>'
316
+ end
317
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
318
+
319
+ it "expects the correct number of entries" do
320
+ expect(rdf.ebooks.count).to be 2
321
+ end
322
+ it "expects an entry Hash to have the correct keys" do
323
+ expect(rdf.ebooks.first).to have_key :uri
324
+ expect(rdf.ebooks.first).to have_key :mime_type
325
+ expect(rdf.ebooks.first).to have_key :encoding
326
+ expect(rdf.ebooks.first).to have_key :modified
327
+ end
328
+ it "expcts the modified value to be a DateTime" do
329
+ expect(rdf.ebooks.first[:modified].class).to be DateTime
330
+ end
331
+
332
+ it "should return the URL" do
333
+ expect(rdf.ebooks.first[:uri]).to eql 'http://www.gutenberg.org/ebooks/98765.txt.utf-8'
334
+ end
335
+ it "should return the mime_type" do
336
+ expect(rdf.ebooks.first[:mime_type]).to eql 'text/plain'
337
+ end
338
+ it "should return the encoding" do
339
+ expect(rdf.ebooks.first[:encoding]).to eql 'utf-8'
340
+ end
341
+ it "should return the modified datetime" do
342
+ expect(rdf.ebooks.first[:modified].to_s).to eql '2010-02-16T08:29:52-07:00'
343
+ end
344
+
345
+ context "when there are two mime-types" do
346
+ let(:xml) do
347
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
348
+ <pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
349
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
350
+ <dcterms:format>
351
+ <rdf:Description>
352
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
353
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
354
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
355
+ </rdf:Description>
356
+ </dcterms:format>
357
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
358
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
359
+ </pgterms:file>
360
+ </rdf:RDF>'
361
+ end
362
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
363
+
364
+ it "should use just the first one" do
365
+ expect(rdf.ebooks.first[:mime_type]).to eql 'application/zip'
366
+ end
367
+ it "expects the encoding to be an empty string" do
368
+ expect(rdf.ebooks.first[:encoding]).to eql ''
369
+ end
370
+ end
371
+ end
372
+
373
+ end
374
+ end
@@ -0,0 +1,16 @@
1
+ require 'spec_helper'
2
+
3
+ module GutenbergRdf
4
+ describe ".parse" do
5
+ let(:file) { StringIO.new('<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><pgterms:ebook rdf:about="ebooks/98765"/></rdf:RDF>') }
6
+
7
+ it "expects an Rdf object" do
8
+ File.stub(:new).and_return(file)
9
+ book = GutenbergRdf.parse(file)
10
+
11
+ expect(book.class).to be Rdf
12
+ expect(book.id).to eql '98765'
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,9 @@
1
+ require 'gutenberg_rdf'
2
+
3
+ RSpec.configure do |config|
4
+ config.mock_with :rspec
5
+ config.treat_symbols_as_metadata_keys_with_true_values = true
6
+ config.run_all_when_everything_filtered = true
7
+ config.filter_run :focus
8
+ config.order = 'random'
9
+ end
metadata ADDED
@@ -0,0 +1,120 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gutenberg_rdf
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Mike Cook
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-10-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 1.6.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 1.6.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 2.14.1
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 2.14.1
69
+ description: A Ruby wrapper providing a nice API for the Project Gutenberg RDF catalog
70
+ files. See the README for more information.
71
+ email:
72
+ - m@mikecook.co.uk
73
+ executables: []
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - .gitignore
78
+ - .rspec
79
+ - Gemfile
80
+ - LICENSE.txt
81
+ - README.md
82
+ - Rakefile
83
+ - gutenberg_rdf.gemspec
84
+ - lib/gutenberg_rdf.rb
85
+ - lib/gutenberg_rdf/rdf.rb
86
+ - lib/gutenberg_rdf/rdf/agent.rb
87
+ - lib/gutenberg_rdf/version.rb
88
+ - spec/gutenberg_rdf/rdf/agent_spec.rb
89
+ - spec/gutenberg_rdf/rdf_spec.rb
90
+ - spec/gutenberg_rdf_spec.rb
91
+ - spec/spec_helper.rb
92
+ homepage: ''
93
+ licenses:
94
+ - MIT
95
+ metadata: {}
96
+ post_install_message:
97
+ rdoc_options: []
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - '>='
103
+ - !ruby/object:Gem::Version
104
+ version: 2.0.0
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ requirements: []
111
+ rubyforge_project:
112
+ rubygems_version: 2.0.6
113
+ signing_key:
114
+ specification_version: 4
115
+ summary: A Ruby wrapper for the Project Gutenberg RDF catalog files.
116
+ test_files:
117
+ - spec/gutenberg_rdf/rdf/agent_spec.rb
118
+ - spec/gutenberg_rdf/rdf_spec.rb
119
+ - spec/gutenberg_rdf_spec.rb
120
+ - spec/spec_helper.rb