gutenberg_rdf 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cde8f277e56fe12183ccc5386414bb2161d8fed3
4
- data.tar.gz: c5ece359a2fa5e80addb608e5441440e9a46f2cb
3
+ metadata.gz: 541a22ec14bf66080b5795db4d5d86ee62db04a2
4
+ data.tar.gz: 7c115cfce05f53427bba5363087b435a0bf1ddfc
5
5
  SHA512:
6
- metadata.gz: 92fe622ad63fac10e422b6f4597d14d0fd7e933d8efbe82c68f8f103fe4112a529746eceaadcbe3457432fd9bb298c95018284292b9727d9a0ea0cd06c5ca314
7
- data.tar.gz: 45324adf7e17df7e5faeaf1b2b599d8d73bd056d8e3ee82205950e6e1409db114d838c8fb6ef11eba9df1330d0d08a5364e2467ab07095cca50765a2bbd505f0
6
+ metadata.gz: a68d926ca0bc5dd8904e29dd191ecd5b78b96f9d34bd101ca3ff1f5d84ac0f423a7d6b14bb3565237013da27cc2e7d0f255206113b4fdd399bea2e49fdfa40e6
7
+ data.tar.gz: 7b338342891dfe1b89ffc1f23ee5e7739e6dc1a7a07d982217208eb4cc6ff4e64a5a3619fff27fce11d7bd6008621f1a92644d6e8b427abe8b2a8d4683834536
data/README.md CHANGED
@@ -61,9 +61,14 @@ Or install it yourself as:
61
61
  puts book.covers.first
62
62
  #=> "http://www.gutenberg.org/ebooks/2746.cover.medium"
63
63
 
64
- puts book.ebooks[3][:uri]
65
- #=> "http://www.gutenberg.org/ebooks/2746.epub.images"
64
+ puts book.ebooks[1].uri
65
+ #=> "http://www.gutenberg.org/files/2746/2746-h/2746-h.htm"
66
66
 
67
+ puts book.ebooks[1].media_type
68
+ #=> "text/html"
69
+
70
+ puts book.ebooks[1].encoding
71
+ #=> "us-ascii"
67
72
 
68
73
  ## Contributing
69
74
 
@@ -0,0 +1,37 @@
1
+ module GutenbergRdf
2
+ class Rdf
3
+ class Media
4
+ attr_reader :xml
5
+
6
+ def initialize(xml)
7
+ @xml = xml
8
+ end
9
+
10
+ def uri
11
+ xml.attributes['about']
12
+ end
13
+
14
+ def media_type
15
+ datatype[:type]
16
+ end
17
+
18
+ def encoding
19
+ datatype[:encoding]
20
+ end
21
+
22
+ def modified
23
+ DateTime.parse(xml.elements['dcterms:modified'].text + '-07:00')
24
+ end
25
+
26
+ private
27
+
28
+ def datatype
29
+ parts = xml.elements['dcterms:format/rdf:Description/rdf:value'].text.split(/; */)
30
+ t = parts.shift
31
+ e = parts.join(';').sub('charset=', '')
32
+ {type: t, encoding: e}
33
+ end
34
+
35
+ end
36
+ end
37
+ end
@@ -62,10 +62,7 @@ module GutenbergRdf
62
62
  def ebooks
63
63
  files = Array.new
64
64
  xml.elements.each('pgterms:file') do |file|
65
- uri = file.attributes['about']
66
- datatypes = separate_mimetype_and_encoding(file.elements['dcterms:format/rdf:Description/rdf:value'].text)
67
- modified = DateTime.parse(file.elements['dcterms:modified'].text + '-07:00')
68
- files << {uri: uri, mime_type: datatypes[:mimetype], encoding: datatypes[:encoding], modified: modified}
65
+ files << Media.new(file)
69
66
  end
70
67
  files
71
68
  end
@@ -120,12 +117,5 @@ module GutenbergRdf
120
117
  entries
121
118
  end
122
119
 
123
- def separate_mimetype_and_encoding(string)
124
- parts = string.split(/; */)
125
- m = parts.shift
126
- e = parts.join(';').sub('charset=', '')
127
- {mimetype: m, encoding: e}
128
- end
129
-
130
120
  end
131
121
  end
@@ -1,3 +1,3 @@
1
1
  module GutenbergRdf
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.1"
3
3
  end
data/lib/gutenberg_rdf.rb CHANGED
@@ -2,6 +2,7 @@ require 'rexml/document'
2
2
 
3
3
  require "gutenberg_rdf/rdf"
4
4
  require "gutenberg_rdf/rdf/agent"
5
+ require "gutenberg_rdf/rdf/media"
5
6
  require "gutenberg_rdf/version"
6
7
 
7
8
  module GutenbergRdf
@@ -0,0 +1,69 @@
1
+ require 'spec_helper'
2
+
3
+ module GutenbergRdf
4
+ class Rdf
5
+ describe Media do
6
+
7
+ let(:xml) do
8
+ '<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
9
+ <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
10
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
11
+ <dcterms:format>
12
+ <rdf:Description>
13
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
14
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
15
+ </rdf:Description>
16
+ </dcterms:format>
17
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
18
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
19
+ </pgterms:file>
20
+ </rdf:RDF>'
21
+ end
22
+ let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:file']) }
23
+
24
+ it "expects the file URI" do
25
+ expect(media.uri).to eql 'http://www.gutenberg.org/ebooks/98765.txt.utf-8'
26
+ end
27
+ it "expects the file media type" do
28
+ expect(media.media_type).to eql 'text/plain'
29
+ end
30
+ it "expects the file encoding" do
31
+ expect(media.encoding).to eql 'utf-8'
32
+ end
33
+ it "expects modified to be a DateTime" do
34
+ expect(media.modified.class).to be DateTime
35
+ end
36
+ it "should return the modified datetime" do
37
+ expect(media.modified.to_s).to eql '2010-02-16T08:29:52-07:00'
38
+ end
39
+
40
+ context "when there are two media types" do
41
+ let(:xml) do
42
+ '<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
43
+ <pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
44
+ <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
45
+ <dcterms:format>
46
+ <rdf:Description>
47
+ <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
48
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
49
+ <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
50
+ </rdf:Description>
51
+ </dcterms:format>
52
+ <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
53
+ <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
54
+ </pgterms:file>
55
+ </rdf:RDF>'
56
+ end
57
+ let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:file']) }
58
+
59
+ it "expects the first entry to be used" do
60
+ expect(media.media_type).to eql 'application/zip'
61
+ end
62
+ it "expects the encoding to be an empty string" do
63
+ expect(media.encoding).to eql ''
64
+ end
65
+ end
66
+
67
+ end
68
+ end
69
+ end
@@ -320,56 +320,9 @@ module GutenbergRdf
320
320
  it "expects the correct number of entries" do
321
321
  expect(rdf.ebooks.count).to be 2
322
322
  end
323
- it "expects an entry Hash to have the correct keys" do
324
- expect(rdf.ebooks.first).to have_key :uri
325
- expect(rdf.ebooks.first).to have_key :mime_type
326
- expect(rdf.ebooks.first).to have_key :encoding
327
- expect(rdf.ebooks.first).to have_key :modified
328
- end
329
- it "expcts the modified value to be a DateTime" do
330
- expect(rdf.ebooks.first[:modified].class).to be DateTime
331
- end
332
-
333
- it "should return the URL" do
334
- expect(rdf.ebooks.first[:uri]).to eql 'http://www.gutenberg.org/ebooks/98765.txt.utf-8'
335
- end
336
- it "should return the mime_type" do
337
- expect(rdf.ebooks.first[:mime_type]).to eql 'text/plain'
338
- end
339
- it "should return the encoding" do
340
- expect(rdf.ebooks.first[:encoding]).to eql 'utf-8'
341
- end
342
- it "should return the modified datetime" do
343
- expect(rdf.ebooks.first[:modified].to_s).to eql '2010-02-16T08:29:52-07:00'
344
- end
345
-
346
- context "when there are two mime-types" do
347
- let(:xml) do
348
- '<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
349
- <pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
350
- <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
351
- <dcterms:format>
352
- <rdf:Description>
353
- <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
354
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
355
- <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
356
- </rdf:Description>
357
- </dcterms:format>
358
- <dcterms:isFormatOf rdf:resource="ebooks/98765"/>
359
- <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
360
- </pgterms:file>
361
- </rdf:RDF>'
362
- end
363
- let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
364
-
365
- it "should use just the first one" do
366
- expect(rdf.ebooks.first[:mime_type]).to eql 'application/zip'
367
- end
368
- it "expects the encoding to be an empty string" do
369
- expect(rdf.ebooks.first[:encoding]).to eql ''
370
- end
323
+ it "expects an entry to be a Media class" do
324
+ expect(rdf.ebooks.first.class).to be Rdf::Media
371
325
  end
372
326
  end
373
-
374
327
  end
375
328
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gutenberg_rdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Cook
@@ -42,8 +42,10 @@ files:
42
42
  - lib/gutenberg_rdf.rb
43
43
  - lib/gutenberg_rdf/rdf.rb
44
44
  - lib/gutenberg_rdf/rdf/agent.rb
45
+ - lib/gutenberg_rdf/rdf/media.rb
45
46
  - lib/gutenberg_rdf/version.rb
46
47
  - spec/gutenberg_rdf/rdf/agent_spec.rb
48
+ - spec/gutenberg_rdf/rdf/media_spec.rb
47
49
  - spec/gutenberg_rdf/rdf_spec.rb
48
50
  - spec/gutenberg_rdf_spec.rb
49
51
  - spec/spec_helper.rb
@@ -73,6 +75,7 @@ specification_version: 4
73
75
  summary: A Ruby wrapper for the Project Gutenberg RDF catalog files.
74
76
  test_files:
75
77
  - spec/gutenberg_rdf/rdf/agent_spec.rb
78
+ - spec/gutenberg_rdf/rdf/media_spec.rb
76
79
  - spec/gutenberg_rdf/rdf_spec.rb
77
80
  - spec/gutenberg_rdf_spec.rb
78
81
  - spec/spec_helper.rb