gutenberg_rdf 0.2.3 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/gutenberg_rdf.gemspec +2 -2
- data/lib/gutenberg_rdf/rdf.rb +20 -2
- data/lib/gutenberg_rdf/rdf/agent.rb +9 -0
- data/lib/gutenberg_rdf/version.rb +1 -1
- data/spec/gutenberg_rdf/rdf/agent_spec.rb +38 -25
- data/spec/gutenberg_rdf/rdf_spec.rb +29 -2
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5eb92b0007804d96f27544f3e4248ed20d9a6a33
|
4
|
+
data.tar.gz: 4d58bb7b868157dfe461a7a0633ed75eb7933af2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74a4672ad2629c43853abcd4d15f6c033572ad0f71b0eb5177fa1c84dac697a35a775e0e1ba660fea2920b1edf69066aa2b11429fb31b36d5539c44f3d99fcfb
|
7
|
+
data.tar.gz: bcf6c1bc87aa2e6692ae7db25ae69d2a137467dba227e0702eacc02d8345e901dda2de9f6f5e7fedf32bba98ee47fc639f8008d9dcd55d2757644dccbc6321e7
|
data/gutenberg_rdf.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.email = ["m@mikecook.co.uk"]
|
10
10
|
spec.summary = %q{A Ruby wrapper for the Project Gutenberg RDF catalog files.}
|
11
11
|
spec.description = %q{A Ruby wrapper for the Project Gutenberg RDF catalog files (requires Ruby 2).}
|
12
|
-
spec.homepage = ""
|
12
|
+
spec.homepage = "https://github.com/mrcook/gutenberg_rdf"
|
13
13
|
spec.license = "MIT"
|
14
14
|
|
15
15
|
spec.files = `git ls-files`.split($/)
|
@@ -19,5 +19,5 @@ Gem::Specification.new do |spec|
|
|
19
19
|
|
20
20
|
spec.required_ruby_version = ">= 2.0.0" # so we have UTF-8 by default
|
21
21
|
|
22
|
-
spec.add_development_dependency "rspec", "~> 2.14
|
22
|
+
spec.add_development_dependency "rspec", "~> 2.14"
|
23
23
|
end
|
data/lib/gutenberg_rdf/rdf.rb
CHANGED
@@ -80,14 +80,32 @@ module GutenbergRdf
|
|
80
80
|
title_array = t.split(/\n/)
|
81
81
|
title_array = title_array.first.split(/:/) if title_array.count == 1
|
82
82
|
title_array = title_array.first.split(/;/) if title_array.count == 1
|
83
|
+
title_array = title_array.first.split(/, or,/) if title_array.count == 1
|
83
84
|
|
84
|
-
title_array.
|
85
|
+
title_array.map(&:strip)
|
86
|
+
end
|
87
|
+
|
88
|
+
def roles
|
89
|
+
@roles ||= extract_roles
|
90
|
+
end
|
91
|
+
|
92
|
+
def extract_roles
|
93
|
+
entries = Hash.new
|
94
|
+
xml.elements.each('pgterms:ebook/dcterms:creator') do |entry|
|
95
|
+
entries["#{entry.attributes['rdf:resource'].sub('2009/agents/', '')}"] = 'aut'
|
96
|
+
end
|
97
|
+
xml.elements.each('pgterms:ebook/marcrel:*') do |entry|
|
98
|
+
entries["#{entry.attributes['rdf:resource'].sub('2009/agents/', '')}"] = entry.name
|
99
|
+
end
|
100
|
+
entries
|
85
101
|
end
|
86
102
|
|
87
103
|
def extract_authors
|
88
104
|
entries = Array.new
|
89
105
|
xml.elements.each('pgterms:agent') do |agent|
|
90
|
-
|
106
|
+
entry = Agent.new(agent)
|
107
|
+
entry.assign_role(roles)
|
108
|
+
entries << entry
|
91
109
|
end
|
92
110
|
entries
|
93
111
|
end
|
@@ -2,6 +2,7 @@ module GutenbergRdf
|
|
2
2
|
class Rdf
|
3
3
|
class Agent
|
4
4
|
attr_reader :xml
|
5
|
+
attr_accessor :role
|
5
6
|
|
6
7
|
def initialize(xml)
|
7
8
|
@xml = xml
|
@@ -15,6 +16,14 @@ module GutenbergRdf
|
|
15
16
|
xml.attributes['about'].match(/\A\d\d\d\d\/agents\/(\d+)\z/)[1]
|
16
17
|
end
|
17
18
|
|
19
|
+
def role
|
20
|
+
@role ||= 'oth'
|
21
|
+
end
|
22
|
+
|
23
|
+
def assign_role(roles)
|
24
|
+
self.role = roles["#{id}"]
|
25
|
+
end
|
26
|
+
|
18
27
|
def fullname
|
19
28
|
[firstname, lastname].reject(&:empty?).join(' ')
|
20
29
|
end
|
@@ -5,53 +5,66 @@ module GutenbergRdf
|
|
5
5
|
describe Agent do
|
6
6
|
let(:agent) do
|
7
7
|
xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
8
|
+
<pgterms:agent rdf:about="2009/agents/402">
|
9
|
+
<pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
|
10
|
+
<pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
|
11
|
+
<pgterms:name>Doe, Jon James</pgterms:name>
|
12
|
+
<pgterms:alias>Doe, Jon</pgterms:alias>
|
13
|
+
<pgterms:alias>Doe, J. J.</pgterms:alias>
|
14
|
+
<pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Jon_James_Doe"/>
|
15
|
+
</pgterms:agent>
|
16
|
+
</rdf:RDF>'
|
17
17
|
Agent.new(REXML::Document.new(xml).root.elements['pgterms:agent'])
|
18
18
|
end
|
19
19
|
|
20
20
|
it "expects an agent ID" do
|
21
|
-
expect(agent.id).to
|
21
|
+
expect(agent.id).to eq '402'
|
22
|
+
end
|
23
|
+
|
24
|
+
it "sets a default role" do
|
25
|
+
expect(agent.role).to eq 'oth'
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "Assigning Roles" do
|
29
|
+
it "assigns the correct value to .role" do
|
30
|
+
roles = {'402' => 'aut', '116' => 'ctb'}
|
31
|
+
agent.assign_role(roles)
|
32
|
+
|
33
|
+
expect(agent.role).to eq 'aut'
|
34
|
+
end
|
22
35
|
end
|
23
36
|
|
24
37
|
it "expects the last name" do
|
25
|
-
expect(agent.lastname).to
|
38
|
+
expect(agent.lastname).to eq 'Doe'
|
26
39
|
end
|
27
40
|
|
28
41
|
it "expects the first name(s)" do
|
29
|
-
expect(agent.firstname).to
|
42
|
+
expect(agent.firstname).to eq 'Jon James'
|
30
43
|
end
|
31
44
|
|
32
45
|
it "expects the full name" do
|
33
|
-
expect(agent.fullname).to
|
46
|
+
expect(agent.fullname).to eq 'Jon James Doe'
|
34
47
|
end
|
35
48
|
|
36
49
|
it "returns the #fullname when to_s is called" do
|
37
|
-
expect(agent.to_s).to
|
50
|
+
expect(agent.to_s).to eq 'Jon James Doe'
|
38
51
|
end
|
39
52
|
|
40
53
|
it "expects a birth date" do
|
41
|
-
expect(agent.birthdate).to
|
54
|
+
expect(agent.birthdate).to eq '1830'
|
42
55
|
end
|
43
56
|
|
44
57
|
it "expects a death date" do
|
45
|
-
expect(agent.deathdate).to
|
58
|
+
expect(agent.deathdate).to eq '1905'
|
46
59
|
end
|
47
60
|
|
48
61
|
it "expects a webpage" do
|
49
|
-
expect(agent.webpage).to
|
62
|
+
expect(agent.webpage).to eq 'http://en.wikipedia.org/wiki/Jon_James_Doe'
|
50
63
|
end
|
51
64
|
|
52
65
|
it "expects any alias names" do
|
53
|
-
expect(agent.aliases[0]).to
|
54
|
-
expect(agent.aliases[1]).to
|
66
|
+
expect(agent.aliases[0]).to eq 'Doe, Jon'
|
67
|
+
expect(agent.aliases[1]).to eq 'Doe, J. J.'
|
55
68
|
end
|
56
69
|
|
57
70
|
context "when only a single name is given" do
|
@@ -65,10 +78,10 @@ module GutenbergRdf
|
|
65
78
|
end
|
66
79
|
|
67
80
|
it "expects it to be assigned to the last name" do
|
68
|
-
expect(agent.lastname).to
|
81
|
+
expect(agent.lastname).to eq 'Dato'
|
69
82
|
end
|
70
83
|
it "expects firstname to be an empty string" do
|
71
|
-
expect(agent.firstname).to
|
84
|
+
expect(agent.firstname).to eq ''
|
72
85
|
end
|
73
86
|
end
|
74
87
|
|
@@ -83,8 +96,8 @@ module GutenbergRdf
|
|
83
96
|
end
|
84
97
|
|
85
98
|
it "expects the correct name order" do
|
86
|
-
expect(agent.firstname).to
|
87
|
-
expect(agent.lastname).to
|
99
|
+
expect(agent.firstname).to eq 'Sir Jon'
|
100
|
+
expect(agent.lastname).to eq 'Doe'
|
88
101
|
end
|
89
102
|
end
|
90
103
|
|
@@ -100,8 +113,8 @@ module GutenbergRdf
|
|
100
113
|
|
101
114
|
it "expects initials to replaced by name in brackets" do
|
102
115
|
pending "Not yet implemented"
|
103
|
-
expect(agent.firstname).to
|
104
|
-
expect(agent.lastname).to
|
116
|
+
expect(agent.firstname).to eq 'Jon James'
|
117
|
+
expect(agent.lastname).to eq 'Doe'
|
105
118
|
end
|
106
119
|
it "expects the name (excluding name in brackets) to be added to the aliases"
|
107
120
|
it "should not have duplicate aliases"
|
@@ -86,6 +86,24 @@ module GutenbergRdf
|
|
86
86
|
end
|
87
87
|
end
|
88
88
|
|
89
|
+
context "with; title, or, subtitle (we need to split on the 'or')" do
|
90
|
+
let(:xml) do
|
91
|
+
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
92
|
+
<pgterms:ebook rdf:about="ebooks/98765">
|
93
|
+
<dcterms:title>A Great Multi-Title, or, a Subtitle</dcterms:title>
|
94
|
+
</pgterms:ebook>
|
95
|
+
</rdf:RDF>'
|
96
|
+
end
|
97
|
+
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
98
|
+
|
99
|
+
it "expects the title to be the first line" do
|
100
|
+
expect(rdf.title).to eql 'A Great Multi-Title'
|
101
|
+
end
|
102
|
+
it "expects the subtitle to be the second line" do
|
103
|
+
expect(rdf.subtitle).to eql 'a Subtitle'
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
89
107
|
context "when title:subtitle are separated by a colon" do
|
90
108
|
let(:xml) do
|
91
109
|
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
@@ -142,7 +160,11 @@ module GutenbergRdf
|
|
142
160
|
|
143
161
|
describe "#authors" do
|
144
162
|
let(:xml) do
|
145
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
163
|
+
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:marcrel="http://id.loc.gov/vocabulary/relators">
|
164
|
+
<pgterms:ebook rdf:about="ebooks/99999999">
|
165
|
+
<marcrel:ctb rdf:resource="2009/agents/402"/>
|
166
|
+
<dcterms:creator rdf:resource="2009/agents/116"/>
|
167
|
+
</pgterms:ebook>
|
146
168
|
<pgterms:agent rdf:about="2009/agents/402">
|
147
169
|
<pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
|
148
170
|
<pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
|
@@ -166,7 +188,12 @@ module GutenbergRdf
|
|
166
188
|
end
|
167
189
|
it "has the correct author names" do
|
168
190
|
expect(rdf.authors.first.fullname).to eq 'Mary Mapes Dodge'
|
169
|
-
|
191
|
+
end
|
192
|
+
it "expects the author to have an aut role" do
|
193
|
+
expect(rdf.authors.last.role).to eq 'aut'
|
194
|
+
end
|
195
|
+
it "expects other agents to have the correct role" do
|
196
|
+
expect(rdf.authors.first.role).to eq 'ctb'
|
170
197
|
end
|
171
198
|
end
|
172
199
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gutenberg_rdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Cook
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-02-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 2.14
|
19
|
+
version: '2.14'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 2.14
|
26
|
+
version: '2.14'
|
27
27
|
description: A Ruby wrapper for the Project Gutenberg RDF catalog files (requires
|
28
28
|
Ruby 2).
|
29
29
|
email:
|
@@ -49,7 +49,7 @@ files:
|
|
49
49
|
- spec/gutenberg_rdf/rdf_spec.rb
|
50
50
|
- spec/gutenberg_rdf_spec.rb
|
51
51
|
- spec/spec_helper.rb
|
52
|
-
homepage:
|
52
|
+
homepage: https://github.com/mrcook/gutenberg_rdf
|
53
53
|
licenses:
|
54
54
|
- MIT
|
55
55
|
metadata: {}
|