gutenberg_rdf 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -2
- data/lib/gutenberg_rdf/rdf/media.rb +37 -0
- data/lib/gutenberg_rdf/rdf.rb +1 -11
- data/lib/gutenberg_rdf/version.rb +1 -1
- data/lib/gutenberg_rdf.rb +1 -0
- data/spec/gutenberg_rdf/rdf/media_spec.rb +69 -0
- data/spec/gutenberg_rdf/rdf_spec.rb +2 -49
- metadata +4 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 541a22ec14bf66080b5795db4d5d86ee62db04a2
|
4
|
+
data.tar.gz: 7c115cfce05f53427bba5363087b435a0bf1ddfc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a68d926ca0bc5dd8904e29dd191ecd5b78b96f9d34bd101ca3ff1f5d84ac0f423a7d6b14bb3565237013da27cc2e7d0f255206113b4fdd399bea2e49fdfa40e6
|
7
|
+
data.tar.gz: 7b338342891dfe1b89ffc1f23ee5e7739e6dc1a7a07d982217208eb4cc6ff4e64a5a3619fff27fce11d7bd6008621f1a92644d6e8b427abe8b2a8d4683834536
|
data/README.md
CHANGED
@@ -61,9 +61,14 @@ Or install it yourself as:
|
|
61
61
|
puts book.covers.first
|
62
62
|
#=> "http://www.gutenberg.org/ebooks/2746.cover.medium"
|
63
63
|
|
64
|
-
puts book.ebooks[
|
65
|
-
#=> "http://www.gutenberg.org/
|
64
|
+
puts book.ebooks[1].uri
|
65
|
+
#=> "http://www.gutenberg.org/files/2746/2746-h/2746-h.htm"
|
66
66
|
|
67
|
+
puts book.ebooks[1].media_type
|
68
|
+
#=> "text/html"
|
69
|
+
|
70
|
+
puts book.ebooks[1].encoding
|
71
|
+
#=> "us-ascii"
|
67
72
|
|
68
73
|
## Contributing
|
69
74
|
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module GutenbergRdf
|
2
|
+
class Rdf
|
3
|
+
class Media
|
4
|
+
attr_reader :xml
|
5
|
+
|
6
|
+
def initialize(xml)
|
7
|
+
@xml = xml
|
8
|
+
end
|
9
|
+
|
10
|
+
def uri
|
11
|
+
xml.attributes['about']
|
12
|
+
end
|
13
|
+
|
14
|
+
def media_type
|
15
|
+
datatype[:type]
|
16
|
+
end
|
17
|
+
|
18
|
+
def encoding
|
19
|
+
datatype[:encoding]
|
20
|
+
end
|
21
|
+
|
22
|
+
def modified
|
23
|
+
DateTime.parse(xml.elements['dcterms:modified'].text + '-07:00')
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def datatype
|
29
|
+
parts = xml.elements['dcterms:format/rdf:Description/rdf:value'].text.split(/; */)
|
30
|
+
t = parts.shift
|
31
|
+
e = parts.join(';').sub('charset=', '')
|
32
|
+
{type: t, encoding: e}
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/gutenberg_rdf/rdf.rb
CHANGED
@@ -62,10 +62,7 @@ module GutenbergRdf
|
|
62
62
|
def ebooks
|
63
63
|
files = Array.new
|
64
64
|
xml.elements.each('pgterms:file') do |file|
|
65
|
-
|
66
|
-
datatypes = separate_mimetype_and_encoding(file.elements['dcterms:format/rdf:Description/rdf:value'].text)
|
67
|
-
modified = DateTime.parse(file.elements['dcterms:modified'].text + '-07:00')
|
68
|
-
files << {uri: uri, mime_type: datatypes[:mimetype], encoding: datatypes[:encoding], modified: modified}
|
65
|
+
files << Media.new(file)
|
69
66
|
end
|
70
67
|
files
|
71
68
|
end
|
@@ -120,12 +117,5 @@ module GutenbergRdf
|
|
120
117
|
entries
|
121
118
|
end
|
122
119
|
|
123
|
-
def separate_mimetype_and_encoding(string)
|
124
|
-
parts = string.split(/; */)
|
125
|
-
m = parts.shift
|
126
|
-
e = parts.join(';').sub('charset=', '')
|
127
|
-
{mimetype: m, encoding: e}
|
128
|
-
end
|
129
|
-
|
130
120
|
end
|
131
121
|
end
|
data/lib/gutenberg_rdf.rb
CHANGED
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module GutenbergRdf
|
4
|
+
class Rdf
|
5
|
+
describe Media do
|
6
|
+
|
7
|
+
let(:xml) do
|
8
|
+
'<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
9
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/98765.txt.utf-8">
|
10
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">293684</dcterms:extent>
|
11
|
+
<dcterms:format>
|
12
|
+
<rdf:Description>
|
13
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
14
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=utf-8</rdf:value>
|
15
|
+
</rdf:Description>
|
16
|
+
</dcterms:format>
|
17
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
18
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2010-02-16T08:29:52.373092</dcterms:modified>
|
19
|
+
</pgterms:file>
|
20
|
+
</rdf:RDF>'
|
21
|
+
end
|
22
|
+
let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:file']) }
|
23
|
+
|
24
|
+
it "expects the file URI" do
|
25
|
+
expect(media.uri).to eql 'http://www.gutenberg.org/ebooks/98765.txt.utf-8'
|
26
|
+
end
|
27
|
+
it "expects the file media type" do
|
28
|
+
expect(media.media_type).to eql 'text/plain'
|
29
|
+
end
|
30
|
+
it "expects the file encoding" do
|
31
|
+
expect(media.encoding).to eql 'utf-8'
|
32
|
+
end
|
33
|
+
it "expects modified to be a DateTime" do
|
34
|
+
expect(media.modified.class).to be DateTime
|
35
|
+
end
|
36
|
+
it "should return the modified datetime" do
|
37
|
+
expect(media.modified.to_s).to eql '2010-02-16T08:29:52-07:00'
|
38
|
+
end
|
39
|
+
|
40
|
+
context "when there are two media types" do
|
41
|
+
let(:xml) do
|
42
|
+
'<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
43
|
+
<pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
|
44
|
+
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
|
45
|
+
<dcterms:format>
|
46
|
+
<rdf:Description>
|
47
|
+
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
48
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
|
49
|
+
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
|
50
|
+
</rdf:Description>
|
51
|
+
</dcterms:format>
|
52
|
+
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
53
|
+
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
|
54
|
+
</pgterms:file>
|
55
|
+
</rdf:RDF>'
|
56
|
+
end
|
57
|
+
let(:media) { Media.new(REXML::Document.new(xml).elements['rdf:RDF/pgterms:file']) }
|
58
|
+
|
59
|
+
it "expects the first entry to be used" do
|
60
|
+
expect(media.media_type).to eql 'application/zip'
|
61
|
+
end
|
62
|
+
it "expects the encoding to be an empty string" do
|
63
|
+
expect(media.encoding).to eql ''
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -320,56 +320,9 @@ module GutenbergRdf
|
|
320
320
|
it "expects the correct number of entries" do
|
321
321
|
expect(rdf.ebooks.count).to be 2
|
322
322
|
end
|
323
|
-
it "expects an entry
|
324
|
-
expect(rdf.ebooks.first).to
|
325
|
-
expect(rdf.ebooks.first).to have_key :mime_type
|
326
|
-
expect(rdf.ebooks.first).to have_key :encoding
|
327
|
-
expect(rdf.ebooks.first).to have_key :modified
|
328
|
-
end
|
329
|
-
it "expcts the modified value to be a DateTime" do
|
330
|
-
expect(rdf.ebooks.first[:modified].class).to be DateTime
|
331
|
-
end
|
332
|
-
|
333
|
-
it "should return the URL" do
|
334
|
-
expect(rdf.ebooks.first[:uri]).to eql 'http://www.gutenberg.org/ebooks/98765.txt.utf-8'
|
335
|
-
end
|
336
|
-
it "should return the mime_type" do
|
337
|
-
expect(rdf.ebooks.first[:mime_type]).to eql 'text/plain'
|
338
|
-
end
|
339
|
-
it "should return the encoding" do
|
340
|
-
expect(rdf.ebooks.first[:encoding]).to eql 'utf-8'
|
341
|
-
end
|
342
|
-
it "should return the modified datetime" do
|
343
|
-
expect(rdf.ebooks.first[:modified].to_s).to eql '2010-02-16T08:29:52-07:00'
|
344
|
-
end
|
345
|
-
|
346
|
-
context "when there are two mime-types" do
|
347
|
-
let(:xml) do
|
348
|
-
'<rdf:RDF xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
349
|
-
<pgterms:file rdf:about="http://www.gutenberg.org/files/98765/98765.zip">
|
350
|
-
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">116685</dcterms:extent>
|
351
|
-
<dcterms:format>
|
352
|
-
<rdf:Description>
|
353
|
-
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
|
354
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
|
355
|
-
<rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
|
356
|
-
</rdf:Description>
|
357
|
-
</dcterms:format>
|
358
|
-
<dcterms:isFormatOf rdf:resource="ebooks/98765"/>
|
359
|
-
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2006-09-28T12:37:26</dcterms:modified>
|
360
|
-
</pgterms:file>
|
361
|
-
</rdf:RDF>'
|
362
|
-
end
|
363
|
-
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
364
|
-
|
365
|
-
it "should use just the first one" do
|
366
|
-
expect(rdf.ebooks.first[:mime_type]).to eql 'application/zip'
|
367
|
-
end
|
368
|
-
it "expects the encoding to be an empty string" do
|
369
|
-
expect(rdf.ebooks.first[:encoding]).to eql ''
|
370
|
-
end
|
323
|
+
it "expects an entry to be a Media class" do
|
324
|
+
expect(rdf.ebooks.first.class).to be Rdf::Media
|
371
325
|
end
|
372
326
|
end
|
373
|
-
|
374
327
|
end
|
375
328
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gutenberg_rdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Cook
|
@@ -42,8 +42,10 @@ files:
|
|
42
42
|
- lib/gutenberg_rdf.rb
|
43
43
|
- lib/gutenberg_rdf/rdf.rb
|
44
44
|
- lib/gutenberg_rdf/rdf/agent.rb
|
45
|
+
- lib/gutenberg_rdf/rdf/media.rb
|
45
46
|
- lib/gutenberg_rdf/version.rb
|
46
47
|
- spec/gutenberg_rdf/rdf/agent_spec.rb
|
48
|
+
- spec/gutenberg_rdf/rdf/media_spec.rb
|
47
49
|
- spec/gutenberg_rdf/rdf_spec.rb
|
48
50
|
- spec/gutenberg_rdf_spec.rb
|
49
51
|
- spec/spec_helper.rb
|
@@ -73,6 +75,7 @@ specification_version: 4
|
|
73
75
|
summary: A Ruby wrapper for the Project Gutenberg RDF catalog files.
|
74
76
|
test_files:
|
75
77
|
- spec/gutenberg_rdf/rdf/agent_spec.rb
|
78
|
+
- spec/gutenberg_rdf/rdf/media_spec.rb
|
76
79
|
- spec/gutenberg_rdf/rdf_spec.rb
|
77
80
|
- spec/gutenberg_rdf_spec.rb
|
78
81
|
- spec/spec_helper.rb
|