gutenberg_rdf 0.1.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -2
- data/lib/gutenberg_rdf/rdf/media.rb +37 -0
- data/lib/gutenberg_rdf/rdf.rb +1 -11
- data/lib/gutenberg_rdf/version.rb +1 -1
- data/lib/gutenberg_rdf.rb +1 -0
- data/spec/gutenberg_rdf/rdf/media_spec.rb +69 -0
- data/spec/gutenberg_rdf/rdf_spec.rb +2 -49
- metadata +4 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 541a22ec14bf66080b5795db4d5d86ee62db04a2
|
4
|
+
data.tar.gz: 7c115cfce05f53427bba5363087b435a0bf1ddfc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a68d926ca0bc5dd8904e29dd191ecd5b78b96f9d34bd101ca3ff1f5d84ac0f423a7d6b14bb3565237013da27cc2e7d0f255206113b4fdd399bea2e49fdfa40e6
|
7
|
+
data.tar.gz: 7b338342891dfe1b89ffc1f23ee5e7739e6dc1a7a07d982217208eb4cc6ff4e64a5a3619fff27fce11d7bd6008621f1a92644d6e8b427abe8b2a8d4683834536
|
data/README.md
CHANGED
@@ -61,9 +61,14 @@ Or install it yourself as:
|
|
61
61
|
puts book.covers.first
|
62
62
|
#=> "http://www.gutenberg.org/ebooks/2746.cover.medium"
|
63
63
|
|
64
|
-
puts book.ebooks[
|
65
|
-
#=> "http://www.gutenberg.org/
|
64
|
+
puts book.ebooks[1].uri
|
65
|
+
#=> "http://www.gutenberg.org/files/2746/2746-h/2746-h.htm"
|
66
66
|
|
67
|
+
puts book.ebooks[1].media_type
|
68
|
+
#=> "text/html"
|
69
|
+
|
70
|
+
puts book.ebooks[1].encoding
|
71
|
+
#=> "us-ascii"
|
67
72
|
|
68
73
|
## Contributing
|
69
74
|
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module GutenbergRdf
|
2
|
+
class Rdf
|
3
|
+
class Media
|
4
|
+
attr_reader :xml
|
5
|
+
|
6
|
+
def initialize(xml)
|
7
|
+
@xml = xml
|
8
|
+
end
|
9
|
+
|
10
|
+
def uri
|
11
|
+
xml.attributes['about']
|
12
|
+
end
|
13
|
+
|
14
|
+
def media_type
|
15
|
+
datatype[:type]
|
16
|
+
end
|
17
|
+
|
18
|
+
def encoding
|
19
|
+
datatype[:encoding]
|
20
|
+
end
|
21
|
+
|
22
|
+
def modified
|
23
|
+
DateTime.parse(xml.elements['dcterms:modified'].text + '-07:00')
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def datatype
|
29
|
+
parts = xml.elements['dcterms:format/rdf:Description/rdf:value'].text.split(/; */)
|
30
|
+
t = parts.shift
|
31
|
+
e = parts.join(';').sub('charset=', '')
|
32
|
+
{type: t, encoding: e}
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/gutenberg_rdf/rdf.rb
CHANGED
@@ -62,10 +62,7 @@ module GutenbergRdf
|
|
62
62
|
def ebooks
|
63
63
|
files = Array.new
|
64
64
|
xml.elements.each('pgterms:file') do |file|
|
65
|
-
|
66
|
-
datatypes = separate_mimetype_and_encoding(file.elements['dcterms:format/rdf:Description/rdf:value'].text)
|
67
|
-
modified = DateTime.parse(file.elements['dcterms:modified'].text + '-07:00')
|
68
|
-
files << {uri: uri, mime_type: datatypes[:mimetype], encoding: datatypes[:encoding], modified: modified}
|
65
|
+
files << Media.new(file)
|
69
66
|
end
|
70
67
|
files
|
71
68
|
end
|
@@ -120,12 +117,5 @@ module GutenbergRdf
|
|
120
117
|
entries
|
121
118
|
end
|
122
119
|
|
123
|
-
def separate_mimetype_and_encoding(string)
|
124
|
-
parts = string.split(/; */)
|
125
|
-
m = parts.shift
|
126
|
-
e = parts.join(';').sub('charset=', '')
|
127
|
-
{mimetype: m, encoding: e}
|
128
|
-
end
|
129
|
-
|
130
120
|
end
|
131
121
|
end
|
data/lib/gutenberg_rdf.rb
CHANGED
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module GutenbergRdf
|
4
|
+
class Rdf
|
5
|
+
describe Media do
|
6
|
+
|
7
|
+
let(:xml) do
|
8
|
+
'<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
9
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
|
10
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
|
11
|
+
<dcterms:format>
|
12
|
+
<rdf:Description>
|
13
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
14
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
|
15
|
+
</rdf:Description>
|
16
|
+
</dcterms:format>
|
17
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
18
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
|
19
|
+
</pgterms:file>
|
20
|
+
</rdf:RDF>'
|
21
|
+
end
|
22
|
+
let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:file']) }
|
23
|
+
|
24
|
+
it "expects the file URI" do
|
25
|
+
expect(media.uri).to eql 'http://www.gutenberg.org/ebooks/98765.txt.utf-8'
|
26
|
+
end
|
27
|
+
it "expects the file media type" do
|
28
|
+
expect(media.media_type).to eql 'text/plain'
|
29
|
+
end
|
30
|
+
it "expects the file encoding" do
|
31
|
+
expect(media.encoding).to eql 'utf-8'
|
32
|
+
end
|
33
|
+
it "expects modified to be a DateTime" do
|
34
|
+
expect(media.modified.class).to be DateTime
|
35
|
+
end
|
36
|
+
it "should return the modified datetime" do
|
37
|
+
expect(media.modified.to_s).to eql '2010-02-16T08:29:52-07:00'
|
38
|
+
end
|
39
|
+
|
40
|
+
context "when there are two media types" do
|
41
|
+
let(:xml) do
|
42
|
+
'<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
43
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
|
44
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
|
45
|
+
<dcterms:format>
|
46
|
+
<rdf:Description>
|
47
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
48
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
|
49
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
|
50
|
+
</rdf:Description>
|
51
|
+
</dcterms:format>
|
52
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
53
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
|
54
|
+
</pgterms:file>
|
55
|
+
</rdf:RDF>'
|
56
|
+
end
|
57
|
+
let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:file']) }
|
58
|
+
|
59
|
+
it "expects the first entry to be used" do
|
60
|
+
expect(media.media_type).to eql 'application/zip'
|
61
|
+
end
|
62
|
+
it "expects the encoding to be an empty string" do
|
63
|
+
expect(media.encoding).to eql ''
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -320,56 +320,9 @@ module GutenbergRdf
|
|
320
320
|
it "expects the correct number of entries" do
|
321
321
|
expect(rdf.ebooks.count).to be 2
|
322
322
|
end
|
323
|
-
it "expects an entry
|
324
|
-
expect(rdf.ebooks.first).to
|
325
|
-
expect(rdf.ebooks.first).to have_key :mime_type
|
326
|
-
expect(rdf.ebooks.first).to have_key :encoding
|
327
|
-
expect(rdf.ebooks.first).to have_key :modified
|
328
|
-
end
|
329
|
-
it "expcts the modified value to be a DateTime" do
|
330
|
-
expect(rdf.ebooks.first[:modified].class).to be DateTime
|
331
|
-
end
|
332
|
-
|
333
|
-
it "should return the URL" do
|
334
|
-
expect(rdf.ebooks.first[:uri]).to eql 'http://www.gutenberg.org/ebooks/98765.txt.utf-8'
|
335
|
-
end
|
336
|
-
it "should return the mime_type" do
|
337
|
-
expect(rdf.ebooks.first[:mime_type]).to eql 'text/plain'
|
338
|
-
end
|
339
|
-
it "should return the encoding" do
|
340
|
-
expect(rdf.ebooks.first[:encoding]).to eql 'utf-8'
|
341
|
-
end
|
342
|
-
it "should return the modified datetime" do
|
343
|
-
expect(rdf.ebooks.first[:modified].to_s).to eql '2010-02-16T08:29:52-07:00'
|
344
|
-
end
|
345
|
-
|
346
|
-
context "when there are two mime-types" do
|
347
|
-
let(:xml) do
|
348
|
-
'<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
349
|
-
<pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
|
350
|
-
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
|
351
|
-
<dcterms:format>
|
352
|
-
<rdf:Description>
|
353
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
354
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
|
355
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
|
356
|
-
</rdf:Description>
|
357
|
-
</dcterms:format>
|
358
|
-
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
359
|
-
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
|
360
|
-
</pgterms:file>
|
361
|
-
</rdf:RDF>'
|
362
|
-
end
|
363
|
-
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
364
|
-
|
365
|
-
it "should use just the first one" do
|
366
|
-
expect(rdf.ebooks.first[:mime_type]).to eql 'application/zip'
|
367
|
-
end
|
368
|
-
it "expects the encoding to be an empty string" do
|
369
|
-
expect(rdf.ebooks.first[:encoding]).to eql ''
|
370
|
-
end
|
323
|
+
it "expects an entry to be a Media class" do
|
324
|
+
expect(rdf.ebooks.first.class).to be Rdf::Media
|
371
325
|
end
|
372
326
|
end
|
373
|
-
|
374
327
|
end
|
375
328
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gutenberg_rdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Cook
|
@@ -42,8 +42,10 @@ files:
|
|
42
42
|
- lib/gutenberg_rdf.rb
|
43
43
|
- lib/gutenberg_rdf/rdf.rb
|
44
44
|
- lib/gutenberg_rdf/rdf/agent.rb
|
45
|
+
- lib/gutenberg_rdf/rdf/media.rb
|
45
46
|
- lib/gutenberg_rdf/version.rb
|
46
47
|
- spec/gutenberg_rdf/rdf/agent_spec.rb
|
48
|
+
- spec/gutenberg_rdf/rdf/media_spec.rb
|
47
49
|
- spec/gutenberg_rdf/rdf_spec.rb
|
48
50
|
- spec/gutenberg_rdf_spec.rb
|
49
51
|
- spec/spec_helper.rb
|
@@ -73,6 +75,7 @@ specification_version: 4
|
|
73
75
|
summary: A Ruby wrapper for the Project Gutenberg RDF catalog files.
|
74
76
|
test_files:
|
75
77
|
- spec/gutenberg_rdf/rdf/agent_spec.rb
|
78
|
+
- spec/gutenberg_rdf/rdf/media_spec.rb
|
76
79
|
- spec/gutenberg_rdf/rdf_spec.rb
|
77
80
|
- spec/gutenberg_rdf_spec.rb
|
78
81
|
- spec/spec_helper.rb
|