RubyGems - gutenberg_rdf - Versions diffs - 0.0.2 - Mend

gutenberg_rdf 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

checksums.yaml +7 -0
data/.gitignore +20 -0
data/.rspec +2 -0
data/Gemfile +4 -0
data/LICENSE.txt +22 -0
data/README.md +76 -0
data/Rakefile +1 -0
data/gutenberg_rdf.gemspec +28 -0
data/lib/gutenberg_rdf/rdf/agent.rb +58 -0
data/lib/gutenberg_rdf/rdf.rb +125 -0
data/lib/gutenberg_rdf/version.rb +3 -0
data/lib/gutenberg_rdf.rb +13 -0
data/spec/gutenberg_rdf/rdf/agent_spec.rb +112 -0
data/spec/gutenberg_rdf/rdf_spec.rb +374 -0
data/spec/gutenberg_rdf_spec.rb +16 -0
data/spec/spec_helper.rb +9 -0
metadata +120 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: bbef70a3ac7d39b2bb3c3e0945ba6d722ffa1d2b
+  data.tar.gz: 98cd1b9df8ad4ff1507cea7a2ade4686aee9302b
+SHA512:
+  metadata.gz: 0a2174ab980a295ec48ee2f8791b8bb8d12de541f8159d257f9e2e2ff57d0ad373e60ce40188e9a4007d5be0a54a98e3631532dee07820b609222112382b77e5
+  data.tar.gz: 2ee305ee3749ba4cfa6239af9acc49e6eea65f4d8ab461eccab474bc4bbdf47df433791b7b398b0a60eaac132255aa3f85123bca3a5e3f3814fa2881d0288deb

data/.gitignore ADDED Viewed

@@ -0,0 +1,20 @@
+*.gem
+*.rbc
+.bundle
+.config
+.yardoc
+Gemfile.lock
+InstalledFiles
+_yardoc
+coverage
+doc/
+lib/bundler/man
+pkg
+rdoc
+spec/reports
+test/tmp
+test/version_tmp
+tmp
+.ruby-*
+.DS_Store

data/.rspec ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ --color
2	+ --format progress

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+# Specify your gem's dependencies in gutenberg_rdf.gemspec
+gemspec

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,22 @@
+Copyright (c) 2013 Mike Cook
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,76 @@
+# Gutenberg RDF
+Gutenberg RDF is a Ruby wrapper for the Project Gutenberg RDF catalog book files,
+providing a nice API to all the metadata contained within.
+## Requirements
+*  Ruby 2.0 - this is so we get UTF-8 by default
+*  Nokogiri - for parsing the RDF
+## Installation
+Add this line to your application's Gemfile:
+    gem 'gutenberg_rdf'
+And then execute:
+    $ bundle
+Or install it yourself as:
+    $ gem install gutenberg_rdf
+## Usage
+    require 'gutenberg_rdf'
+    xml = Nokogiri::XML(File.new('/path/to/pg2746.rdf'))
+    book = GutenbergRdf::Rdf.new(xml)
+    puts book.id
+    #=> "2746"
+    puts book.type
+    #=> "Text"
+    puts book.title
+    #=> "Urbain Grandier"
+    puts book.subtitle
+    #=> "Celebrated Crimes"
+    puts book.authors.first.fullname
+    #=> "Alexandre Dumas"
+    puts book.subjects.first
+    #=> "Crime"
+    puts book.published
+    #=> "2004-09-22"
+    puts book.publisher
+    #=> "Project Gutenberg"
+    puts book.rights
+    #=> "Public domain in the USA."
+    puts book.language
+    #=> "en"
+    puts book.covers.first
+    #=> "http://www.gutenberg.org/ebooks/2746.cover.medium"
+    puts book.ebooks[3][:uri]
+    #=> "http://www.gutenberg.org/ebooks/2746.epub.images"
+## Contributing
+1. Fork it
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Add some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create new Pull Request

data/Rakefile ADDED Viewed

	@@ -0,0 +1 @@
1	+ require "bundler/gem_tasks"

data/gutenberg_rdf.gemspec ADDED Viewed

@@ -0,0 +1,28 @@
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'gutenberg_rdf/version'
+Gem::Specification.new do |spec|
+  spec.name          = "gutenberg_rdf"
+  spec.version       = GutenbergRdf::VERSION
+  spec.authors       = ["Mike Cook"]
+  spec.email         = ["m@mikecook.co.uk"]
+  spec.summary       = %q{A Ruby wrapper for the Project Gutenberg RDF catalog files.}
+  spec.description   = %q{A Ruby wrapper providing a nice API for the Project Gutenberg RDF catalog files. See the README for more information.}
+  spec.homepage      = ""
+  spec.license       = "MIT"
+  spec.files         = `git ls-files`.split($/)
+  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ["lib"]
+  spec.required_ruby_version = ">= 2.0.0" # so we have UTF-8 by default
+  spec.add_dependency "nokogiri", "~> 1.6.0"
+  spec.add_development_dependency "bundler", "~> 1.3"
+  spec.add_development_dependency "rake"
+  spec.add_development_dependency "rspec", "~> 2.14.1"
+end

data/lib/gutenberg_rdf/rdf/agent.rb ADDED Viewed

@@ -0,0 +1,58 @@
+module GutenbergRdf
+  class Rdf
+    class Agent
+      attr_reader :xml
+      def initialize(xml)
+        @xml = xml
+      end
+      def id
+        xml.at_xpath('pgterms:agent').attribute('about').content.match(/\A\d\d\d\d\/agents\/(\d+)\z/)[1]
+      end
+      def fullname
+        [firstname, lastname].join(' ')
+      end
+      def lastname
+        @lastname ||= name_parts[:last]
+      end
+      def firstname
+        @firstname ||= name_parts[:first]
+      end
+      def birthdate
+        xml.at_xpath('pgterms:agent/pgterms:birthdate').text
+      end
+      def deathdate
+        xml.at_xpath('pgterms:agent/pgterms:deathdate').text
+      end
+      def webpage
+        xml.at_xpath('pgterms:agent/pgterms:webpage').attribute('resource').content
+      end
+      def aliases
+        entries = Array.new
+        xml.xpath('//pgterms:alias').each do |name|
+          entries << name.text
+        end
+        entries
+      end
+    private
+      def name_parts
+        parts = xml.xpath('//pgterms:name').text.split(/, */)
+        last  = parts.shift
+        first = parts.reverse.join(' ')
+        {first: first, last: last}
+      end
+    end
+  end
+end

data/lib/gutenberg_rdf/rdf.rb ADDED Viewed

@@ -0,0 +1,125 @@
+require 'date'
+module GutenbergRdf
+  class Rdf
+    attr_reader :xml
+    def initialize(xml)
+      @xml = xml.at_xpath('rdf:RDF')
+    end
+    def id
+      xml.at_xpath('pgterms:ebook').attribute('about').content.match(/\Aebooks\/(.+)\z/)[1]
+    end
+    def type
+      xml.at_xpath('pgterms:ebook/dcterms:type/rdf:Description/rdf:value').text
+    end
+    def title
+      titles.first
+    end
+    def subtitle
+      titles[1..-1].join(' - ')
+    end
+    def authors
+      @authors ||= extract_authors
+    end
+    def subjects
+      entries = Array.new
+      xml.xpath('pgterms:ebook//dcterms:subject').each do |entry|
+        next unless entry.at_xpath('rdf:Description/dcam:memberOf').attribute('resource').text.match(/LCSH\z/)
+        entry.xpath('rdf:Description//rdf:value').each do |value|
+          entries << value.text
+        end
+      end
+      entries
+    end
+    def published
+      xml.at_xpath('pgterms:ebook/dcterms:issued').text
+    end
+    def publisher
+      xml.at_xpath('pgterms:ebook/dcterms:publisher').text
+    end
+    def language
+      xml.at_xpath('pgterms:ebook/dcterms:language').text
+    end
+    def rights
+      xml.at_xpath('pgterms:ebook/dcterms:rights').text
+    end
+    def covers
+      official_cover_images.concat(other_cover_images).sort.uniq
+    end
+    def ebooks
+      files = Array.new
+      xml.xpath('//pgterms:file').each do |file|
+        uri = file.attribute('about').content
+        datatypes = separate_mimetype_and_encoding(file.at_xpath('dcterms:format/rdf:Description/rdf:value').text)
+        modified = DateTime.parse(file.at_xpath('dcterms:modified').text + '-07:00')
+        files << {uri: uri, mime_type: datatypes[:mimetype], encoding: datatypes[:encoding], modified: modified}
+      end
+      files
+    end
+  private
+    def titles
+      @titles ||= split_title_and_subtitle
+    end
+    def split_title_and_subtitle
+      # Note this gsub is replacing UTF-8 hyphens with normal ASCII ones
+      t = xml.at_xpath('pgterms:ebook/dcterms:title').text.gsub(/—/, '-')
+      title_array = t.split(/\n/)
+      title_array = title_array.first.split(/:/) if title_array.count == 1
+      title_array = title_array.first.split(/;/) if title_array.count == 1
+      title_array.each(&:strip!)
+    end
+    def extract_authors
+      entries = Array.new
+      xml.xpath('//pgterms:agent').each do |agent|
+        entries << Agent.new(agent)
+      end
+      entries
+    end
+    def official_cover_images
+      entries = Array.new
+      xml.xpath('//pgterms:file').each do |file|
+        url = file.attribute('about').content
+        entries << url if file.xpath('dcterms:format/rdf:Description//rdf:value').detect { |v| v.text.match(/image/) }
+      end
+      entries
+    end
+    def other_cover_images
+      entries = Array.new
+      xml.xpath('pgterms:ebook//pgterms:marc901').each do |node|
+        cover = node.text
+        cover.sub!(/\Afile:\/\/\/public\/vhost\/g\/gutenberg\/html/, 'http://www.gutenberg.org')
+        entries << cover
+      end
+      entries
+    end
+    def separate_mimetype_and_encoding(string)
+      parts = string.split(/; */)
+      m = parts.shift
+      e = parts.join(';').sub('charset=', '')
+      {mimetype: m, encoding: e}
+    end
+  end
+end

data/lib/gutenberg_rdf/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module GutenbergRdf
+  VERSION = "0.0.2"
+end

data/lib/gutenberg_rdf.rb ADDED Viewed

@@ -0,0 +1,13 @@
+require 'nokogiri'
+require "gutenberg_rdf/rdf"
+require "gutenberg_rdf/rdf/agent"
+require "gutenberg_rdf/version"
+module GutenbergRdf
+  def self.parse(path)
+    Rdf.new(Nokogiri::XML(File.new(path)))
+  end
+end

data/spec/gutenberg_rdf/rdf/agent_spec.rb ADDED Viewed

@@ -0,0 +1,112 @@
+require 'spec_helper'
+module GutenbergRdf
+  class Rdf
+    describe Agent do
+      let(:agent) do
+        xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+                <pgterms:agent rdf:about="2009/agents/402">
+                  <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
+                  <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
+                  <pgterms:name>Doe, Jon James</pgterms:name>
+                  <pgterms:alias>Doe, Jon</pgterms:alias>
+                  <pgterms:alias>Doe, J. J.</pgterms:alias>
+                  <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Jon_James_Doe"/>
+                </pgterms:agent>
+              </rdf:RDF>'
+        rdf = Nokogiri::XML(xml)
+        Agent.new(rdf.at_xpath('rdf:RDF'))
+      end
+      it "expects an agent ID" do
+        expect(agent.id).to eql '402'
+      end
+      it "expects the last name" do
+        expect(agent.lastname).to eql 'Doe'
+      end
+      it "expects the first name(s)" do
+        expect(agent.firstname).to eql 'Jon James'
+      end
+      it "expects the full name" do
+        expect(agent.fullname).to eql 'Jon James Doe'
+      end
+      it "expects a birth date" do
+        expect(agent.birthdate).to eql '1830'
+      end
+      it "expects a death date" do
+        expect(agent.deathdate).to eql '1905'
+      end
+      it "expects a webpage" do
+        expect(agent.webpage).to eql 'http://en.wikipedia.org/wiki/Jon_James_Doe'
+      end
+      it "expects any alias names" do
+        expect(agent.aliases[0]).to eql 'Doe, Jon'
+        expect(agent.aliases[1]).to eql 'Doe, J. J.'
+      end
+      context "when only a single name is given" do
+        let(:agent) do
+          xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+                   <pgterms:agent rdf:about="2009/agents/402">
+                     <pgterms:name>Dato</pgterms:name>
+                   </pgterms:agent>
+                 </rdf:RDF>'
+          rdf = Nokogiri::XML(xml)
+          Agent.new(rdf.at_xpath('rdf:RDF'))
+        end
+        it "expects it to be assigned to the last name" do
+          expect(agent.lastname).to eql 'Dato'
+        end
+        it "expects firstname to be an empty string" do
+          expect(agent.firstname).to eql ''
+        end
+      end
+      context "when the name has a suffix" do
+        let(:agent) do
+          xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+                   <pgterms:agent rdf:about="2009/agents/402">
+                     <pgterms:name>Doe, Jon, Sir</pgterms:name>
+                   </pgterms:agent>
+                 </rdf:RDF>'
+          rdf = Nokogiri::XML(xml)
+          Agent.new(rdf.at_xpath('rdf:RDF'))
+        end
+        it "expects the correct name order" do
+          expect(agent.firstname).to eql 'Sir Jon'
+          expect(agent.lastname).to eql 'Doe'
+        end
+      end
+      context "when full name is given in (brackets)" do
+        let(:agent) do
+          xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+                   <pgterms:agent agent:about="2009/agents/402">
+                     <pgterms:name>Doe, J. J. (Jon James)</pgterms:name>
+                   </pgterms:agent>
+                 </rdf:RDF>'
+          rdf = Nokogiri::XML(xml)
+          Agent.new(rdf.at_xpath('rdf:RDF'))
+        end
+        it "expects initials to replaced by name in brackets" do
+          pending "Not yet implemented"
+          expect(agent.firstname).to eql 'Jon James'
+          expect(agent.lastname).to eql 'Doe'
+        end
+        it "expects the name (excluding name in brackets) to be added to the aliases"
+        it "should not have duplicate aliases"
+      end
+    end
+  end
+end

data/spec/gutenberg_rdf/rdf_spec.rb ADDED Viewed

@@ -0,0 +1,374 @@
+require 'spec_helper'
+module GutenbergRdf
+  describe Rdf do
+    let(:xml) do
+      '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+           <pgterms:ebook rdf:about="ebooks/98765">
+             <dcterms:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2006-09-28</dcterms:issued>
+             <dcterms:language rdf:datatype="http://purl.org/dc/terms/RFC4646">en</dcterms:language>
+             <dcterms:publisher>Project Gutenberg</dcterms:publisher>
+             <dcterms:rights>Public domain in the USA.</dcterms:rights>
+           </pgterms:ebook>
+       </rdf:RDF>'
+    end
+    let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
+    it "expects an id" do
+      expect(rdf.id).to eql "98765"
+    end
+    it "expects a published date" do
+      expect(rdf.published).to eql "2006-09-28"
+    end
+    it "expects a publisher" do
+      expect(rdf.publisher).to eql "Project Gutenberg"
+    end
+    it "expects a language" do
+      expect(rdf.language).to eql "en"
+    end
+    it "expects the rights" do
+      expect(rdf.rights).to eql "Public domain in the USA."
+    end
+    describe "#type" do
+      let(:xml) do
+        '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+           <pgterms:ebook rdf:about="ebooks/98765">
+             <dcterms:type>
+               <rdf:Description>
+                 <dcam:memberOf rdf:resource="http://purl.org/dc/terms/DCMIType"/>
+                 <rdf:value>Text</rdf:value>
+               </rdf:Description>
+             </dcterms:type>
+           </pgterms:ebook>
+         </rdf:RDF>'
+      end
+      let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
+      it "expect the type of entity" do
+        expect(rdf.type).to eql 'Text'
+      end
+    end
+    describe "Titles" do
+      let(:xml) do
+        '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+           <pgterms:ebook rdf:about="ebooks/98765">
+             <dcterms:title>A Great Title</dcterms:title>
+           </pgterms:ebook>
+         </rdf:RDF>'
+      end
+      let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
+      it "expects a title" do
+        expect(rdf.title).to eql 'A Great Title'
+      end
+      it "expects subtitle to be empty" do
+        expect(rdf.subtitle).to eql ''
+      end
+      context "with a title and subtitle, on separate lines" do
+        let(:xml) do
+          '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+            <pgterms:ebook rdf:about="ebooks/98765">
+              <dcterms:title>A Great Multi-Title
+                Or, a Subtitle</dcterms:title>
+            </pgterms:ebook>
+          </rdf:RDF>'
+        end
+        let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
+        it "expects the title to be the first line" do
+          expect(rdf.title).to eql 'A Great Multi-Title'
+        end
+        it "expects the subtitle to be the second line" do
+          expect(rdf.subtitle).to eql 'Or, a Subtitle'
+        end
+      end
+      context "when title:subtitle are separated by a colon" do
+        let(:xml) do
+          '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+            <pgterms:ebook rdf:about="ebooks/98765">
+              <dcterms:title>A Great Multi-Title: And a Subtitle</dcterms:title>
+            </pgterms:ebook>
+          </rdf:RDF>'
+        end
+        let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
+        it "expects a title" do
+          expect(rdf.title).to eql 'A Great Multi-Title'
+        end
+        it "expects a subtitle" do
+          expect(rdf.subtitle).to eql 'And a Subtitle'
+        end
+      end
+      context "when title; and subtitle are separated by a semi-colon" do
+        let(:xml) do
+          '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+            <pgterms:ebook rdf:about="ebooks/98765">
+              <dcterms:title>A Great Multi-Title; Or, a Subtitle</dcterms:title>
+            </pgterms:ebook>
+          </rdf:RDF>'
+        end
+        let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
+        it "expects a title" do
+          expect(rdf.title).to eql 'A Great Multi-Title'
+        end
+        it "expects a subtitle" do
+          expect(rdf.subtitle).to eql 'Or, a Subtitle'
+        end
+        context "...except when subtitles already exists" do
+          let(:xml) do
+            '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+            <pgterms:ebook rdf:about="ebooks/98765">
+              <dcterms:title>A Great Multi-Title; and some other text
+                Then a Subtitle on a newline</dcterms:title>
+            </pgterms:ebook>
+          </rdf:RDF>'
+          end
+          let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
+          it "expects a title" do
+            expect(rdf.title).to eql 'A Great Multi-Title; and some other text'
+          end
+          it "expects a subtitle" do
+            expect(rdf.subtitle).to eql 'Then a Subtitle on a newline'
+          end
+        end
+      end
+    end
+    describe "#authors" do
+      let(:xml) do
+        '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+          <pgterms:agent rdf:about="2009/agents/402">
+            <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
+            <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
+            <pgterms:name>Dodge, Mary Mapes</pgterms:name>
+            <pgterms:alias>Dodge, Mary</pgterms:alias>
+            <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Mary_Mapes_Dodge"/>
+          </pgterms:agent>
+          <pgterms:agent rdf:about="2009/agents/116">
+            <pgterms:alias>Verschillende</pgterms:alias>
+            <pgterms:name>Various</pgterms:name>
+          </pgterms:agent>
+        </rdf:RDF>'
+      end
+      let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
+      it "expects a Array" do
+        expect(rdf.authors.class).to be Array
+      end
+      it "expects correct number to be returned" do
+        expect(rdf.authors.count).to be 2
+      end
+      it "expects an author object" do
+        expect(rdf.authors.first.class).to be Rdf::Agent
+      end
+    end
+    describe "#subjects" do
+      let(:xml) do
+        %q{<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+           <pgterms:ebook rdf:about="ebooks/98765">
+             <dcterms:subject>
+               <rdf:Description>
+                 <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
+                 <rdf:value>Children's literature -- Periodicals</rdf:value>
+                 <rdf:value>Children's periodicals, American</rdf:value>
+               </rdf:Description>
+             </dcterms:subject>
+             <dcterms:subject>
+               <rdf:Description>
+                 <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCC"/>
+                 <rdf:value>PZ</rdf:value>
+               </rdf:Description>
+             </dcterms:subject>
+           </pgterms:ebook>
+        </rdf:RDF>}
+      end
+      let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
+      it "expects correct number to be returned" do
+        expect(rdf.subjects.count).to be 2
+      end
+      it "expects the correct data" do
+        expect(rdf.subjects.first).to eql "Children's literature -- Periodicals"
+        expect(rdf.subjects.last).to eql "Children's periodicals, American"
+      end
+    end
+    describe "#covers" do
+      describe "official PG covers" do
+        let(:xml) do
+          '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+             <pgterms:ebook rdf:about="ebooks/12345">
+               <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/12345.epub.noimages"/>
+               <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/12345.cover.medium"/>
+               <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/12345.cover.small"/>
+               <pgterms:marc901>http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg</pgterms:marc901>
+             </pgterms:ebook>
+             <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.epub.noimages">
+               <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">92652</dcterms:extent>
+               <dcterms:format>
+                 <rdf:Description>
+                   <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
+                   <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/epub+zip</rdf:value>
+                 </rdf:Description>
+               </dcterms:format>
+               <dcterms:isFormatOf rdf:resource="ebooks/12345"/>
+               <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:32.115259</dcterms:modified>
+             </pgterms:file>
+             <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.cover.medium">
+               <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">10856</dcterms:extent>
+               <dcterms:format>
+                 <rdf:Description>
+                   <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
+                   <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
+                 </rdf:Description>
+               </dcterms:format>
+               <dcterms:isFormatOf rdf:resource="ebooks/12345"/>
+               <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:34.484114</dcterms:modified>
+             </pgterms:file>
+             <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/12345.cover.small">
+               <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1904</dcterms:extent>
+               <dcterms:format>
+                 <rdf:Description>
+                   <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
+                   <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">image/jpeg</rdf:value>
+                 </rdf:Description>
+               </dcterms:format>
+               <dcterms:isFormatOf rdf:resource="ebooks/12345"/>
+               <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-09-21T19:22:34.379124</dcterms:modified>
+             </pgterms:file>
+           </rdf:RDF>'
+        end
+        let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
+        it "expects the correct number of entries returned" do
+          expect(rdf.covers.count).to be 3
+        end
+        it "expects those to be used" do
+          expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/ebooks/12345.cover.medium'
+          expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/ebooks/12345.cover.small'
+        end
+        it "expects any other images to be listed after the official ones" do
+          expect(rdf.covers[2]).to eql 'http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg'
+        end
+      end
+      describe "HTML ebook cover image" do
+        let(:xml) do
+          '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+             <pgterms:ebook rdf:about="ebooks/12345">
+               <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/12345/12345-rst/images/cover.jpg</pgterms:marc901>
+               <pgterms:marc901>file:///public/vhost/g/gutenberg/html/files/12345/12345-h/images/cover.jpg</pgterms:marc901>
+               <pgterms:marc901>http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg</pgterms:marc901>
+             </pgterms:ebook>
+           </rdf:RDF>'
+        end
+        let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
+        it "expects only unique entries" do
+          expect(rdf.covers.count).to be 2
+        end
+        it "should convert File URIs to the Gutenberg URL" do
+          expect(rdf.covers.first).to match 'http://www.gutenberg.org'
+        end
+        it "expects the covers to be listed in the correct order" do
+          expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/files/12345/12345-h/images/cover.jpg'
+          expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/files/12345/12345-rst/images/cover.jpg'
+        end
+      end
+    end
+    describe "#ebook" do
+      let(:xml) do
+        '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+          <pgterms:ebook rdf:about="ebooks/98765">
+            <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/98765.txt.utf-8"/>
+            <dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/98765.zip"/>
+          </pgterms:ebook>
+          <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
+            <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
+            <dcterms:format>
+              <rdf:Description>
+                <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
+                <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
+              </rdf:Description>
+            </dcterms:format>
+            <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
+            <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
+          </pgterms:file>
+          <pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
+            <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
+            <dcterms:format>
+              <rdf:Description>
+                <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
+                <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
+                <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
+              </rdf:Description>
+            </dcterms:format>
+            <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
+            <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
+          </pgterms:file>
+        </rdf:RDF>'
+      end
+      let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
+      it "expects the correct number of entries" do
+        expect(rdf.ebooks.count).to be 2
+      end
+      it "expects an entry Hash to have the correct keys" do
+        expect(rdf.ebooks.first).to have_key :uri
+        expect(rdf.ebooks.first).to have_key :mime_type
+        expect(rdf.ebooks.first).to have_key :encoding
+        expect(rdf.ebooks.first).to have_key :modified
+      end
+      it "expcts the modified value to be a DateTime" do
+        expect(rdf.ebooks.first[:modified].class).to be DateTime
+      end
+      it "should return the URL" do
+        expect(rdf.ebooks.first[:uri]).to eql 'http://www.gutenberg.org/ebooks/98765.txt.utf-8'
+      end
+      it "should return the mime_type" do
+        expect(rdf.ebooks.first[:mime_type]).to eql 'text/plain'
+      end
+      it "should return the encoding" do
+        expect(rdf.ebooks.first[:encoding]).to eql 'utf-8'
+      end
+      it "should return the modified datetime" do
+        expect(rdf.ebooks.first[:modified].to_s).to eql '2010-02-16T08:29:52-07:00'
+      end
+      context "when there are two mime-types" do
+        let(:xml) do
+          '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+             <pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
+               <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
+               <dcterms:format>
+                 <rdf:Description>
+                   <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
+                   <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
+                   <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
+                 </rdf:Description>
+               </dcterms:format>
+               <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
+               <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
+             </pgterms:file>
+          </rdf:RDF>'
+        end
+        let(:rdf) { Rdf.new(Nokogiri::XML(xml)) }
+        it "should use just the first one" do
+          expect(rdf.ebooks.first[:mime_type]).to eql 'application/zip'
+        end
+        it "expects the encoding to be an empty string" do
+          expect(rdf.ebooks.first[:encoding]).to eql ''
+        end
+      end
+    end
+  end
+end

data/spec/gutenberg_rdf_spec.rb ADDED Viewed

@@ -0,0 +1,16 @@
+require 'spec_helper'
+module GutenbergRdf
+  describe ".parse" do
+    let(:file) { StringIO.new('<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><pgterms:ebook rdf:about="ebooks/98765"/></rdf:RDF>') }
+    it "expects an Rdf object" do
+      File.stub(:new).and_return(file)
+      book = GutenbergRdf.parse(file)
+      expect(book.class).to be Rdf
+      expect(book.id).to eql '98765'
+    end
+  end
+end

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,9 @@
+require 'gutenberg_rdf'
+RSpec.configure do |config|
+  config.mock_with :rspec
+  config.treat_symbols_as_metadata_keys_with_true_values = true
+  config.run_all_when_everything_filtered = true
+  config.filter_run :focus
+  config.order = 'random'
+end

metadata ADDED Viewed

@@ -0,0 +1,120 @@
+--- !ruby/object:Gem::Specification
+name: gutenberg_rdf
+version: !ruby/object:Gem::Version
+  version: 0.0.2
+platform: ruby
+authors:
+- Mike Cook
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2013-10-18 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 1.6.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 1.6.0
+- !ruby/object:Gem::Dependency
+  name: bundler
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.3'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.3'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 2.14.1
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 2.14.1
+description: A Ruby wrapper providing a nice API for the Project Gutenberg RDF catalog
+  files. See the README for more information.
+email:
+- m@mikecook.co.uk
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- .rspec
+- Gemfile
+- LICENSE.txt
+- README.md
+- Rakefile
+- gutenberg_rdf.gemspec
+- lib/gutenberg_rdf.rb
+- lib/gutenberg_rdf/rdf.rb
+- lib/gutenberg_rdf/rdf/agent.rb
+- lib/gutenberg_rdf/version.rb
+- spec/gutenberg_rdf/rdf/agent_spec.rb
+- spec/gutenberg_rdf/rdf_spec.rb
+- spec/gutenberg_rdf_spec.rb
+- spec/spec_helper.rb
+homepage: ''
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: 2.0.0
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.0.6
+signing_key:
+specification_version: 4
+summary: A Ruby wrapper for the Project Gutenberg RDF catalog files.
+test_files:
+- spec/gutenberg_rdf/rdf/agent_spec.rb
+- spec/gutenberg_rdf/rdf_spec.rb
+- spec/gutenberg_rdf_spec.rb
+- spec/spec_helper.rb