gutenberg_rdf 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bbef70a3ac7d39b2bb3c3e0945ba6d722ffa1d2b
4
+ data.tar.gz: 98cd1b9df8ad4ff1507cea7a2ade4686aee9302b
5
+ SHA512:
6
+ metadata.gz: 0a2174ab980a295ec48ee2f8791b8bb8d12de541f8159d257f9e2e2ff57d0ad373e60ce40188e9a4007d5be0a54a98e3631532dee07820b609222112382b77e5
7
+ data.tar.gz: 2ee305ee3749ba4cfa6239af9acc49e6eea65f4d8ab461eccab474bc4bbdf47df433791b7b398b0a60eaac132255aa3f85123bca3a5e3f3814fa2881d0288deb
data/.gitignore ADDED
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+
19
+ .ruby-*
20
+ .DS_Store
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in gutenberg_rdf.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Mike Cook
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,76 @@
1
+ # Gutenberg RDF
2
+
3
+ Gutenberg RDF is a Ruby wrapper for the Project Gutenberg RDF catalog book files,
4
+ providing a nice API to all the metadata contained within.
5
+
6
+ ## Requirements
7
+
8
+ * Ruby 2.0 - this is so we get UTF-8 by default
9
+ * Nokogiri - for parsing the RDF
10
+
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ gem 'gutenberg_rdf'
17
+
18
+ And then execute:
19
+
20
+ $ bundle
21
+
22
+ Or install it yourself as:
23
+
24
+ $ gem install gutenberg_rdf
25
+
26
+ ## Usage
27
+
28
+ require 'gutenberg_rdf'
29
+
30
+ xml = Nokogiri::XML(File.new('/path/to/pg2746.rdf'))
31
+ book = GutenbergRdf::Rdf.new(xml)
32
+
33
+ puts book.id
34
+ #=> "2746"
35
+
36
+ puts book.type
37
+ #=> "Text"
38
+
39
+ puts book.title
40
+ #=> "Urbain Grandier"
41
+
42
+ puts book.subtitle
43
+ #=> "Celebrated Crimes"
44
+
45
+ puts book.authors.first.fullname
46
+ #=> "Alexandre Dumas"
47
+
48
+ puts book.subjects.first
49
+ #=> "Crime"
50
+
51
+ puts book.published
52
+ #=> "2004-09-22"
53
+
54
+ puts book.publisher
55
+ #=> "Project Gutenberg"
56
+
57
+ puts book.rights
58
+ #=> "Public domain in the USA."
59
+
60
+ puts book.language
61
+ #=> "en"
62
+
63
+ puts book.covers.first
64
+ #=> "http://www.gutenberg.org/ebooks/2746.cover.medium"
65
+
66
+ puts book.ebooks[3][:uri]
67
+ #=> "http://www.gutenberg.org/ebooks/2746.epub.images"
68
+
69
+
70
+ ## Contributing
71
+
72
+ 1. Fork it
73
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
74
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
75
+ 4. Push to the branch (`git push origin my-new-feature`)
76
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,28 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'gutenberg_rdf/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "gutenberg_rdf"
7
+ spec.version = GutenbergRdf::VERSION
8
+ spec.authors = ["Mike Cook"]
9
+ spec.email = ["m@mikecook.co.uk"]
10
+ spec.summary = %q{A Ruby wrapper for the Project Gutenberg RDF catalog files.}
11
+ spec.description = %q{A Ruby wrapper providing a nice API for the Project Gutenberg RDF catalog files. See the README for more information.}
12
+ spec.homepage = ""
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files`.split($/)
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.required_ruby_version = ">= 2.0.0" # so we have UTF-8 by default
21
+
22
+ spec.add_dependency "nokogiri", "~> 1.6.0"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.3"
25
+ spec.add_development_dependency "rake"
26
+
27
+ spec.add_development_dependency "rspec", "~> 2.14.1"
28
+ end
@@ -0,0 +1,58 @@
1
+ module GutenbergRdf
2
+ class Rdf
3
+ class Agent
4
+ attr_reader :xml
5
+
6
+ def initialize(xml)
7
+ @xml = xml
8
+ end
9
+
10
+ def id
11
+ xml.at_xpath('pgterms:agent').attribute('about').content.match(/\A\d\d\d\d\/agents\/(\d+)\z/)[1]
12
+ end
13
+
14
+ def fullname
15
+ [firstname, lastname].join(' ')
16
+ end
17
+
18
+ def lastname
19
+ @lastname ||= name_parts[:last]
20
+ end
21
+
22
+ def firstname
23
+ @firstname ||= name_parts[:first]
24
+ end
25
+
26
+ def birthdate
27
+ xml.at_xpath('pgterms:agent/pgterms:birthdate').text
28
+ end
29
+
30
+ def deathdate
31
+ xml.at_xpath('pgterms:agent/pgterms:deathdate').text
32
+ end
33
+
34
+ def webpage
35
+ xml.at_xpath('pgterms:agent/pgterms:webpage').attribute('resource').content
36
+ end
37
+
38
+ def aliases
39
+ entries = Array.new
40
+ xml.xpath('//pgterms:alias').each do |name|
41
+ entries << name.text
42
+ end
43
+ entries
44
+ end
45
+
46
+ private
47
+
48
+ def name_parts
49
+ parts = xml.xpath('//pgterms:name').text.split(/, */)
50
+ last = parts.shift
51
+ first = parts.reverse.join(' ')
52
+
53
+ {first: first, last: last}
54
+ end
55
+
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,125 @@
1
+ require 'date'
2
+
3
+ module GutenbergRdf
4
+ class Rdf
5
+ attr_reader :xml
6
+
7
+ def initialize(xml)
8
+ @xml = xml.at_xpath('rdf:RDF')
9
+ end
10
+
11
+ def id
12
+ xml.at_xpath('pgterms:ebook').attribute('about').content.match(/\Aebooks\/(.+)\z/)[1]
13
+ end
14
+
15
+ def type
16
+ xml.at_xpath('pgterms:ebook/dcterms:type/rdf:Description/rdf:value').text
17
+ end
18
+
19
+ def title
20
+ titles.first
21
+ end
22
+
23
+ def subtitle
24
+ titles[1..-1].join(' - ')
25
+ end
26
+
27
+ def authors
28
+ @authors ||= extract_authors
29
+ end
30
+
31
+ def subjects
32
+ entries = Array.new
33
+ xml.xpath('pgterms:ebook//dcterms:subject').each do |entry|
34
+ next unless entry.at_xpath('rdf:Description/dcam:memberOf').attribute('resource').text.match(/LCSH\z/)
35
+ entry.xpath('rdf:Description//rdf:value').each do |value|
36
+ entries << value.text
37
+ end
38
+ end
39
+ entries
40
+ end
41
+
42
+ def published
43
+ xml.at_xpath('pgterms:ebook/dcterms:issued').text
44
+ end
45
+
46
+ def publisher
47
+ xml.at_xpath('pgterms:ebook/dcterms:publisher').text
48
+ end
49
+
50
+ def language
51
+ xml.at_xpath('pgterms:ebook/dcterms:language').text
52
+ end
53
+
54
+ def rights
55
+ xml.at_xpath('pgterms:ebook/dcterms:rights').text
56
+ end
57
+
58
+ def covers
59
+ official_cover_images.concat(other_cover_images).sort.uniq
60
+ end
61
+
62
+ def ebooks
63
+ files = Array.new
64
+ xml.xpath('//pgterms:file').each do |file|
65
+ uri = file.attribute('about').content
66
+ datatypes = separate_mimetype_and_encoding(file.at_xpath('dcterms:format/rdf:Description/rdf:value').text)
67
+ modified = DateTime.parse(file.at_xpath('dcterms:modified').text + '-07:00')
68
+ files << {uri: uri, mime_type: datatypes[:mimetype], encoding: datatypes[:encoding], modified: modified}
69
+ end
70
+ files
71
+ end
72
+
73
+ private
74
+
75
+ def titles
76
+ @titles ||= split_title_and_subtitle
77
+ end
78
+
79
+ def split_title_and_subtitle
80
+ # Note this gsub is replacing UTF-8 hyphens with normal ASCII ones
81
+ t = xml.at_xpath('pgterms:ebook/dcterms:title').text.gsub(/—/, '-')
82
+
83
+ title_array = t.split(/\n/)
84
+ title_array = title_array.first.split(/:/) if title_array.count == 1
85
+ title_array = title_array.first.split(/;/) if title_array.count == 1
86
+
87
+ title_array.each(&:strip!)
88
+ end
89
+
90
+ def extract_authors
91
+ entries = Array.new
92
+ xml.xpath('//pgterms:agent').each do |agent|
93
+ entries << Agent.new(agent)
94
+ end
95
+ entries
96
+ end
97
+
98
+ def official_cover_images
99
+ entries = Array.new
100
+ xml.xpath('//pgterms:file').each do |file|
101
+ url = file.attribute('about').content
102
+ entries << url if file.xpath('dcterms:format/rdf:Description//rdf:value').detect { |v| v.text.match(/image/) }
103
+ end
104
+ entries
105
+ end
106
+
107
+ def other_cover_images
108
+ entries = Array.new
109
+ xml.xpath('pgterms:ebook//pgterms:marc901').each do |node|
110
+ cover = node.text
111
+ cover.sub!(/\Afile:\/\/\/public\/vhost\/g\/gutenberg\/html/, 'http://www.gutenberg.org')
112
+ entries << cover
113
+ end
114
+ entries
115
+ end
116
+
117
+ def separate_mimetype_and_encoding(string)
118
+ parts = string.split(/; */)
119
+ m = parts.shift
120
+ e = parts.join(';').sub('charset=', '')
121
+ {mimetype: m, encoding: e}
122
+ end
123
+
124
+ end
125
+ end
@@ -0,0 +1,3 @@
1
+ module GutenbergRdf
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,13 @@
1
+ require 'nokogiri'
2
+
3
+ require "gutenberg_rdf/rdf"
4
+ require "gutenberg_rdf/rdf/agent"
5
+ require "gutenberg_rdf/version"
6
+
7
+ module GutenbergRdf
8
+
9
+ def self.parse(path)
10
+ Rdf.new(Nokogiri::XML(File.new(path)))
11
+ end
12
+
13
+ end
@@ -0,0 +1,112 @@
1
+ require 'spec_helper'
2
+
3
+ module GutenbergRdf
4
+ class Rdf
5
+ describe Agent do
6
+ let(:agent) do
7
+ xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
8
+ <pgterms:agent rdf:about="2009/agents/402">
9
+ <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
10
+ <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
11
+ <pgterms:name>Doe, Jon James</pgterms:name>
12
+ <pgterms:alias>Doe, Jon</pgterms:alias>
13
+ <pgterms:alias>Doe, J. J.</pgterms:alias>
14
+ <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Jon_James_Doe"/>
15
+ </pgterms:agent>
16
+ </rdf:RDF>'
17
+ rdf = Nokogiri::XML(xml)
18
+ Agent.new(rdf.at_xpath('rdf:RDF'))
19
+ end
20
+
21
+ it "expects an agent ID" do
22
+ expect(agent.id).to eql '402'
23
+ end
24
+
25
+ it "expects the last name" do
26
+ expect(agent.lastname).to eql 'Doe'
27
+ end
28
+
29
+ it "expects the first name(s)" do
30
+ expect(agent.firstname).to eql 'Jon James'
31
+ end
32
+
33
+ it "expects the full name" do
34
+ expect(agent.fullname).to eql 'Jon James Doe'
35
+ end
36
+
37
+ it "expects a birth date" do
38
+ expect(agent.birthdate).to eql '1830'
39
+ end
40
+
41
+ it "expects a death date" do
42
+ expect(agent.deathdate).to eql '1905'
43
+ end
44
+
45
+ it "expects a webpage" do
46
+ expect(agent.webpage).to eql 'http://en.wikipedia.org/wiki/Jon_James_Doe'
47
+ end
48
+
49
+ it "expects any alias names" do
50
+ expect(agent.aliases[0]).to eql 'Doe, Jon'
51
+ expect(agent.aliases[1]).to eql 'Doe, J. J.'
52
+ end
53
+
54
+ context "when only a single name is given" do
55
+ let(:agent) do
56
+ xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
57
+ <pgterms:agent rdf:about="2009/agents/402">
58
+ <pgterms:name>Dato</pgterms:name>
59
+ </pgterms:agent>
60
+ </rdf:RDF>'
61
+ rdf = Nokogiri::XML(xml)
62
+ Agent.new(rdf.at_xpath('rdf:RDF'))
63
+ end
64
+
65
+ it "expects it to be assigned to the last name" do
66
+ expect(agent.lastname).to eql 'Dato'
67
+ end
68
+ it "expects firstname to be an empty string" do
69
+ expect(agent.firstname).to eql ''
70
+ end
71
+ end
72
+
73
+ context "when the name has a suffix" do
74
+ let(:agent) do
75
+ xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
76
+ <pgterms:agent rdf:about="2009/agents/402">
77
+ <pgterms:name>Doe, Jon, Sir</pgterms:name>
78
+ </pgterms:agent>
79
+ </rdf:RDF>'
80
+ rdf = Nokogiri::XML(xml)
81
+ Agent.new(rdf.at_xpath('rdf:RDF'))
82
+ end
83
+
84
+ it "expects the correct name order" do
85
+ expect(agent.firstname).to eql 'Sir Jon'
86
+ expect(agent.lastname).to eql 'Doe'
87
+ end
88
+ end
89
+
90
+ context "when full name is given in (brackets)" do
91
+ let(:agent) do
92
+ xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
93
+ <pgterms:agent agent:about="2009/agents/402">
94
+ <pgterms:name>Doe, J. J. (Jon James)</pgterms:name>
95
+ </pgterms:agent>
96
+ </rdf:RDF>'
97
+ rdf = Nokogiri::XML(xml)
98
+ Agent.new(rdf.at_xpath('rdf:RDF'))
99
+ end
100
+
101
+ it "expects initials to replaced by name in brackets" do
102
+ pending "Not yet implemented"
103
+ expect(agent.firstname).to eql 'Jon James'
104
+ expect(agent.lastname).to eql 'Doe'
105
+ end
106
+ it "expects the name (excluding name in brackets) to be added to the aliases"
107
+ it "should not have duplicate aliases"
108
+ end
109
+ end
110
+
111
+ end
112
+ end
@@ -0,0 +1,374 @@
1
+ require 'spec_helper'
2
+
3
+ module GutenbergRdf
4
+ describe Rdf do
5
+ let(:xml) do
6
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
7
+ <pgterms:ebook rdf:about="ebooks/98765">
8
+ <dcterms:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2006-09-28</dcterms:issued>
9
+ <dcterms:language rdf:datatype="http://purl.org/dc/terms/RFC4646">en</dcterms:language>
10
+ <dcterms:publisher>Project Gutenberg</dcterms:publisher>
11
+ <dcterms:rights>Public domain in the USA.</dcterms:rights>
12
+ </pgterms:ebook>
13
+ </rdf:RDF>'
14
+ end
15
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
16
+
17
+ it "expects an id" do
18
+ expect(rdf.id).to eql "98765"
19
+ end
20
+ it "expects a published date" do
21
+ expect(rdf.published).to eql "2006-09-28"
22
+ end
23
+ it "expects a publisher" do
24
+ expect(rdf.publisher).to eql "Project Gutenberg"
25
+ end
26
+ it "expects a language" do
27
+ expect(rdf.language).to eql "en"
28
+ end
29
+ it "expects the rights" do
30
+ expect(rdf.rights).to eql "Public domain in the USA."
31
+ end
32
+
33
+ describe "#type" do
34
+ let(:xml) do
35
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
36
+ <pgterms:ebook rdf:about="ebooks/98765">
37
+ <dcterms:type>
38
+ <rdf:Description>
39
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/DCMIType"/>
40
+ <rdf:value>Text</rdf:value>
41
+ </rdf:Description>
42
+ </dcterms:type>
43
+ </pgterms:ebook>
44
+ </rdf:RDF>'
45
+ end
46
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
47
+
48
+ it "expect the type of entity" do
49
+ expect(rdf.type).to eql 'Text'
50
+ end
51
+ end
52
+
53
+ describe "Titles" do
54
+ let(:xml) do
55
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
56
+ <pgterms:ebook rdf:about="ebooks/98765">
57
+ <dcterms:title>A Great Title</dcterms:title>
58
+ </pgterms:ebook>
59
+ </rdf:RDF>'
60
+ end
61
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
62
+
63
+ it "expects a title" do
64
+ expect(rdf.title).to eql 'A Great Title'
65
+ end
66
+ it "expects subtitle to be empty" do
67
+ expect(rdf.subtitle).to eql ''
68
+ end
69
+
70
+ context "with a title and subtitle, on separate lines" do
71
+ let(:xml) do
72
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
73
+ <pgterms:ebook rdf:about="ebooks/98765">
74
+ <dcterms:title>A Great Multi-Title
75
+ Or, a Subtitle</dcterms:title>
76
+ </pgterms:ebook>
77
+ </rdf:RDF>'
78
+ end
79
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
80
+
81
+ it "expects the title to be the first line" do
82
+ expect(rdf.title).to eql 'A Great Multi-Title'
83
+ end
84
+ it "expects the subtitle to be the second line" do
85
+ expect(rdf.subtitle).to eql 'Or, a Subtitle'
86
+ end
87
+ end
88
+
89
+ context "when title:subtitle are separated by a colon" do
90
+ let(:xml) do
91
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
92
+ <pgterms:ebook rdf:about="ebooks/98765">
93
+ <dcterms:title>A Great Multi-Title: And a Subtitle</dcterms:title>
94
+ </pgterms:ebook>
95
+ </rdf:RDF>'
96
+ end
97
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
98
+
99
+ it "expects a title" do
100
+ expect(rdf.title).to eql 'A Great Multi-Title'
101
+ end
102
+ it "expects a subtitle" do
103
+ expect(rdf.subtitle).to eql 'And a Subtitle'
104
+ end
105
+ end
106
+
107
+ context "when title; and subtitle are separated by a semi-colon" do
108
+ let(:xml) do
109
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
110
+ <pgterms:ebook rdf:about="ebooks/98765">
111
+ <dcterms:title>A Great Multi-Title; Or, a Subtitle</dcterms:title>
112
+ </pgterms:ebook>
113
+ </rdf:RDF>'
114
+ end
115
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
116
+ it "expects a title" do
117
+ expect(rdf.title).to eql 'A Great Multi-Title'
118
+ end
119
+ it "expects a subtitle" do
120
+ expect(rdf.subtitle).to eql 'Or, a Subtitle'
121
+ end
122
+
123
+ context "...except when subtitles already exists" do
124
+ let(:xml) do
125
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
126
+ <pgterms:ebook rdf:about="ebooks/98765">
127
+ <dcterms:title>A Great Multi-Title; and some other text
128
+ Then a Subtitle on a newline</dcterms:title>
129
+ </pgterms:ebook>
130
+ </rdf:RDF>'
131
+ end
132
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
133
+ it "expects a title" do
134
+ expect(rdf.title).to eql 'A Great Multi-Title; and some other text'
135
+ end
136
+ it "expects a subtitle" do
137
+ expect(rdf.subtitle).to eql 'Then a Subtitle on a newline'
138
+ end
139
+ end
140
+ end
141
+ end
142
+
143
+ describe "#authors" do
144
+ let(:xml) do
145
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
146
+ <pgterms:agent rdf:about="2009/agents/402">
147
+ <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
148
+ <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
149
+ <pgterms:name>Dodge, Mary Mapes</pgterms:name>
150
+ <pgterms:alias>Dodge, Mary</pgterms:alias>
151
+ <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Mary_Mapes_Dodge"/>
152
+ </pgterms:agent>
153
+ <pgterms:agent rdf:about="2009/agents/116">
154
+ <pgterms:alias>Verschillende</pgterms:alias>
155
+ <pgterms:name>Various</pgterms:name>
156
+ </pgterms:agent>
157
+ </rdf:RDF>'
158
+ end
159
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
160
+ it "expects a Array" do
161
+ expect(rdf.authors.class).to be Array
162
+ end
163
+ it "expects correct number to be returned" do
164
+ expect(rdf.authors.count).to be 2
165
+ end
166
+ it "expects an author object" do
167
+ expect(rdf.authors.first.class).to be Rdf::Agent
168
+ end
169
+ end
170
+
171
+ describe "#subjects" do
172
+ let(:xml) do
173
+ %q{<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
174
+ <pgterms:ebook rdf:about="ebooks/98765">
175
+ <dcterms:subject>
176
+ <rdf:Description>
177
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
178
+ <rdf:value>Children's literature -- Periodicals</rdf:value>
179
+ <rdf:value>Children's periodicals, American</rdf:value>
180
+ </rdf:Description>
181
+ </dcterms:subject>
182
+ <dcterms:subject>
183
+ <rdf:Description>
184
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCC"/>
185
+ <rdf:value>PZ</rdf:value>
186
+ </rdf:Description>
187
+ </dcterms:subject>
188
+ </pgterms:ebook>
189
+ </rdf:RDF>}
190
+ end
191
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
192
+ it "expects correct number to be returned" do
193
+ expect(rdf.subjects.count).to be 2
194
+ end
195
+ it "expects the correct data" do
196
+ expect(rdf.subjects.first).to eql "Children's literature -- Periodicals"
197
+ expect(rdf.subjects.last).to eql "Children's periodicals, American"
198
+ end
199
+ end
200
+
201
+ describe "#covers" do
202
+ describe "official PG covers" do
203
+ let(:xml) do
204
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
205
+ <pgterms:ebook rdf:about="ebooks/12345">
206
+ <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/12345.epub.noimages"/>
207
+ <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/12345.cover.medium"/>
208
+ <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/12345.cover.small"/>
209
+ <pgterms:marc901>http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg</pgterms:marc901>
210
+ </pgterms:ebook>
211
+ <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.epub.noimages">
212
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">92652</dcterms:extent>
213
+ <dcterms:format>
214
+ <rdf:Description>
215
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
216
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/epub+zip</rdf:value>
217
+ </rdf:Description>
218
+ </dcterms:format>
219
+ <dcterms:isFormatOf rdf:resource="ebooks/12345"/>
220
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:32.115259</dcterms:modified>
221
+ </pgterms:file>
222
+ <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.cover.medium">
223
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">10856</dcterms:extent>
224
+ <dcterms:format>
225
+ <rdf:Description>
226
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
227
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
228
+ </rdf:Description>
229
+ </dcterms:format>
230
+ <dcterms:isFormatOf rdf:resource="ebooks/12345"/>
231
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:34.484114</dcterms:modified>
232
+ </pgterms:file>
233
+ <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.cover.small">
234
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1904</dcterms:extent>
235
+ <dcterms:format>
236
+ <rdf:Description>
237
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
238
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
239
+ </rdf:Description>
240
+ </dcterms:format>
241
+ <dcterms:isFormatOf rdf:resource="ebooks/12345"/>
242
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:34.379124</dcterms:modified>
243
+ </pgterms:file>
244
+ </rdf:RDF>'
245
+ end
246
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
247
+
248
+ it "expects the correct number of entries returned" do
249
+ expect(rdf.covers.count).to be 3
250
+ end
251
+ it "expects those to be used" do
252
+ expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/ebooks/12345.cover.medium'
253
+ expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/ebooks/12345.cover.small'
254
+ end
255
+ it "expects any other images to be listed after the official ones" do
256
+ expect(rdf.covers[2]).to eql 'http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg'
257
+ end
258
+ end
259
+
260
+ describe "HTML ebook cover image" do
261
+ let(:xml) do
262
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
263
+ <pgterms:ebook rdf:about="ebooks/12345">
264
+ <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/12345/12345-rst/images/cover.jpg</pgterms:marc901>
265
+ <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/12345/12345-h/images/cover.jpg</pgterms:marc901>
266
+ <pgterms:marc901>http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg</pgterms:marc901>
267
+ </pgterms:ebook>
268
+ </rdf:RDF>'
269
+ end
270
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
271
+
272
+ it "expects only unique entries" do
273
+ expect(rdf.covers.count).to be 2
274
+ end
275
+ it "should convert File URIs to the Gutenberg URL" do
276
+ expect(rdf.covers.first).to match 'http://www.gutenberg.org'
277
+ end
278
+ it "expects the covers to be listed in the correct order" do
279
+ expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg'
280
+ expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/files/12345/12345-rst/images/cover.jpg'
281
+ end
282
+ end
283
+ end
284
+
285
+ describe "#ebook" do
286
+ let(:xml) do
287
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
288
+ <pgterms:ebook rdf:about="ebooks/98765">
289
+ <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/98765.txt.utf-8"/>
290
+ <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/98765.zip"/>
291
+ </pgterms:ebook>
292
+ <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
293
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
294
+ <dcterms:format>
295
+ <rdf:Description>
296
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
297
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
298
+ </rdf:Description>
299
+ </dcterms:format>
300
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
301
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
302
+ </pgterms:file>
303
+ <pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
304
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
305
+ <dcterms:format>
306
+ <rdf:Description>
307
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
308
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
309
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
310
+ </rdf:Description>
311
+ </dcterms:format>
312
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
313
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
314
+ </pgterms:file>
315
+ </rdf:RDF>'
316
+ end
317
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
318
+
319
+ it "expects the correct number of entries" do
320
+ expect(rdf.ebooks.count).to be 2
321
+ end
322
+ it "expects an entry Hash to have the correct keys" do
323
+ expect(rdf.ebooks.first).to have_key :uri
324
+ expect(rdf.ebooks.first).to have_key :mime_type
325
+ expect(rdf.ebooks.first).to have_key :encoding
326
+ expect(rdf.ebooks.first).to have_key :modified
327
+ end
328
+ it "expcts the modified value to be a DateTime" do
329
+ expect(rdf.ebooks.first[:modified].class).to be DateTime
330
+ end
331
+
332
+ it "should return the URL" do
333
+ expect(rdf.ebooks.first[:uri]).to eql 'http://www.gutenberg.org/ebooks/98765.txt.utf-8'
334
+ end
335
+ it "should return the mime_type" do
336
+ expect(rdf.ebooks.first[:mime_type]).to eql 'text/plain'
337
+ end
338
+ it "should return the encoding" do
339
+ expect(rdf.ebooks.first[:encoding]).to eql 'utf-8'
340
+ end
341
+ it "should return the modified datetime" do
342
+ expect(rdf.ebooks.first[:modified].to_s).to eql '2010-02-16T08:29:52-07:00'
343
+ end
344
+
345
+ context "when there are two mime-types" do
346
+ let(:xml) do
347
+ '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
348
+ <pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
349
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
350
+ <dcterms:format>
351
+ <rdf:Description>
352
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
353
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
354
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
355
+ </rdf:Description>
356
+ </dcterms:format>
357
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
358
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
359
+ </pgterms:file>
360
+ </rdf:RDF>'
361
+ end
362
+ let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
363
+
364
+ it "should use just the first one" do
365
+ expect(rdf.ebooks.first[:mime_type]).to eql 'application/zip'
366
+ end
367
+ it "expects the encoding to be an empty string" do
368
+ expect(rdf.ebooks.first[:encoding]).to eql ''
369
+ end
370
+ end
371
+ end
372
+
373
+ end
374
+ end
@@ -0,0 +1,16 @@
1
+ require 'spec_helper'
2
+
3
+ module GutenbergRdf
4
+ describe ".parse" do
5
+ let(:file) { StringIO.new('<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><pgterms:ebook rdf:about="ebooks/98765"/></rdf:RDF>') }
6
+
7
+ it "expects an Rdf object" do
8
+ File.stub(:new).and_return(file)
9
+ book = GutenbergRdf.parse(file)
10
+
11
+ expect(book.class).to be Rdf
12
+ expect(book.id).to eql '98765'
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,9 @@
1
+ require 'gutenberg_rdf'
2
+
3
+ RSpec.configure do |config|
4
+ config.mock_with :rspec
5
+ config.treat_symbols_as_metadata_keys_with_true_values = true
6
+ config.run_all_when_everything_filtered = true
7
+ config.filter_run :focus
8
+ config.order = 'random'
9
+ end
metadata ADDED
@@ -0,0 +1,120 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gutenberg_rdf
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Mike Cook
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-10-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 1.6.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 1.6.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 2.14.1
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 2.14.1
69
+ description: A Ruby wrapper providing a nice API for the Project Gutenberg RDF catalog
70
+ files. See the README for more information.
71
+ email:
72
+ - m@mikecook.co.uk
73
+ executables: []
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - .gitignore
78
+ - .rspec
79
+ - Gemfile
80
+ - LICENSE.txt
81
+ - README.md
82
+ - Rakefile
83
+ - gutenberg_rdf.gemspec
84
+ - lib/gutenberg_rdf.rb
85
+ - lib/gutenberg_rdf/rdf.rb
86
+ - lib/gutenberg_rdf/rdf/agent.rb
87
+ - lib/gutenberg_rdf/version.rb
88
+ - spec/gutenberg_rdf/rdf/agent_spec.rb
89
+ - spec/gutenberg_rdf/rdf_spec.rb
90
+ - spec/gutenberg_rdf_spec.rb
91
+ - spec/spec_helper.rb
92
+ homepage: ''
93
+ licenses:
94
+ - MIT
95
+ metadata: {}
96
+ post_install_message:
97
+ rdoc_options: []
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - '>='
103
+ - !ruby/object:Gem::Version
104
+ version: 2.0.0
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ requirements: []
111
+ rubyforge_project:
112
+ rubygems_version: 2.0.6
113
+ signing_key:
114
+ specification_version: 4
115
+ summary: A Ruby wrapper for the Project Gutenberg RDF catalog files.
116
+ test_files:
117
+ - spec/gutenberg_rdf/rdf/agent_spec.rb
118
+ - spec/gutenberg_rdf/rdf_spec.rb
119
+ - spec/gutenberg_rdf_spec.rb
120
+ - spec/spec_helper.rb