gutenberg_rdf 0.2.3 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/gutenberg_rdf.gemspec +2 -2
- data/lib/gutenberg_rdf/rdf.rb +20 -2
- data/lib/gutenberg_rdf/rdf/agent.rb +9 -0
- data/lib/gutenberg_rdf/version.rb +1 -1
- data/spec/gutenberg_rdf/rdf/agent_spec.rb +38 -25
- data/spec/gutenberg_rdf/rdf_spec.rb +29 -2
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5eb92b0007804d96f27544f3e4248ed20d9a6a33
|
4
|
+
data.tar.gz: 4d58bb7b868157dfe461a7a0633ed75eb7933af2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74a4672ad2629c43853abcd4d15f6c033572ad0f71b0eb5177fa1c84dac697a35a775e0e1ba660fea2920b1edf69066aa2b11429fb31b36d5539c44f3d99fcfb
|
7
|
+
data.tar.gz: bcf6c1bc87aa2e6692ae7db25ae69d2a137467dba227e0702eacc02d8345e901dda2de9f6f5e7fedf32bba98ee47fc639f8008d9dcd55d2757644dccbc6321e7
|
data/gutenberg_rdf.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.email = ["m@mikecook.co.uk"]
|
10
10
|
spec.summary = %q{A Ruby wrapper for the Project Gutenberg RDF catalog files.}
|
11
11
|
spec.description = %q{A Ruby wrapper for the Project Gutenberg RDF catalog files (requires Ruby 2).}
|
12
|
-
spec.homepage = ""
|
12
|
+
spec.homepage = "https://github.com/mrcook/gutenberg_rdf"
|
13
13
|
spec.license = "MIT"
|
14
14
|
|
15
15
|
spec.files = `git ls-files`.split($/)
|
@@ -19,5 +19,5 @@ Gem::Specification.new do |spec|
|
|
19
19
|
|
20
20
|
spec.required_ruby_version = ">= 2.0.0" # so we have UTF-8 by default
|
21
21
|
|
22
|
-
spec.add_development_dependency "rspec", "~> 2.14
|
22
|
+
spec.add_development_dependency "rspec", "~> 2.14"
|
23
23
|
end
|
data/lib/gutenberg_rdf/rdf.rb
CHANGED
@@ -80,14 +80,32 @@ module GutenbergRdf
|
|
80
80
|
title_array = t.split(/\n/)
|
81
81
|
title_array = title_array.first.split(/:/) if title_array.count == 1
|
82
82
|
title_array = title_array.first.split(/;/) if title_array.count == 1
|
83
|
+
title_array = title_array.first.split(/, or,/) if title_array.count == 1
|
83
84
|
|
84
|
-
title_array.
|
85
|
+
title_array.map(&:strip)
|
86
|
+
end
|
87
|
+
|
88
|
+
def roles
|
89
|
+
@roles ||= extract_roles
|
90
|
+
end
|
91
|
+
|
92
|
+
def extract_roles
|
93
|
+
entries = Hash.new
|
94
|
+
xml.elements.each('pgterms:ebook/dcterms:creator') do |entry|
|
95
|
+
entries["#{entry.attributes['rdf:resource'].sub('2009/agents/', '')}"] = 'aut'
|
96
|
+
end
|
97
|
+
xml.elements.each('pgterms:ebook/marcrel:*') do |entry|
|
98
|
+
entries["#{entry.attributes['rdf:resource'].sub('2009/agents/', '')}"] = entry.name
|
99
|
+
end
|
100
|
+
entries
|
85
101
|
end
|
86
102
|
|
87
103
|
def extract_authors
|
88
104
|
entries = Array.new
|
89
105
|
xml.elements.each('pgterms:agent') do |agent|
|
90
|
-
|
106
|
+
entry = Agent.new(agent)
|
107
|
+
entry.assign_role(roles)
|
108
|
+
entries << entry
|
91
109
|
end
|
92
110
|
entries
|
93
111
|
end
|
@@ -2,6 +2,7 @@ module GutenbergRdf
|
|
2
2
|
class Rdf
|
3
3
|
class Agent
|
4
4
|
attr_reader :xml
|
5
|
+
attr_accessor :role
|
5
6
|
|
6
7
|
def initialize(xml)
|
7
8
|
@xml = xml
|
@@ -15,6 +16,14 @@ module GutenbergRdf
|
|
15
16
|
xml.attributes['about'].match(/\A\d\d\d\d\/agents\/(\d+)\z/)[1]
|
16
17
|
end
|
17
18
|
|
19
|
+
def role
|
20
|
+
@role ||= 'oth'
|
21
|
+
end
|
22
|
+
|
23
|
+
def assign_role(roles)
|
24
|
+
self.role = roles["#{id}"]
|
25
|
+
end
|
26
|
+
|
18
27
|
def fullname
|
19
28
|
[firstname, lastname].reject(&:empty?).join(' ')
|
20
29
|
end
|
@@ -5,53 +5,66 @@ module GutenbergRdf
|
|
5
5
|
describe Agent do
|
6
6
|
let(:agent) do
|
7
7
|
xml = '<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
8
|
+
<pgterms:agent rdf:about="2009/agents/402">
|
9
|
+
<pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
|
10
|
+
<pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
|
11
|
+
<pgterms:name>Doe, Jon James</pgterms:name>
|
12
|
+
<pgterms:alias>Doe, Jon</pgterms:alias>
|
13
|
+
<pgterms:alias>Doe, J. J.</pgterms:alias>
|
14
|
+
<pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Jon_James_Doe"/>
|
15
|
+
</pgterms:agent>
|
16
|
+
</rdf:RDF>'
|
17
17
|
Agent.new(REXML::Document.new(xml).root.elements['pgterms:agent'])
|
18
18
|
end
|
19
19
|
|
20
20
|
it "expects an agent ID" do
|
21
|
-
expect(agent.id).to
|
21
|
+
expect(agent.id).to eq '402'
|
22
|
+
end
|
23
|
+
|
24
|
+
it "sets a default role" do
|
25
|
+
expect(agent.role).to eq 'oth'
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "Assigning Roles" do
|
29
|
+
it "assigns the correct value to .role" do
|
30
|
+
roles = {'402' => 'aut', '116' => 'ctb'}
|
31
|
+
agent.assign_role(roles)
|
32
|
+
|
33
|
+
expect(agent.role).to eq 'aut'
|
34
|
+
end
|
22
35
|
end
|
23
36
|
|
24
37
|
it "expects the last name" do
|
25
|
-
expect(agent.lastname).to
|
38
|
+
expect(agent.lastname).to eq 'Doe'
|
26
39
|
end
|
27
40
|
|
28
41
|
it "expects the first name(s)" do
|
29
|
-
expect(agent.firstname).to
|
42
|
+
expect(agent.firstname).to eq 'Jon James'
|
30
43
|
end
|
31
44
|
|
32
45
|
it "expects the full name" do
|
33
|
-
expect(agent.fullname).to
|
46
|
+
expect(agent.fullname).to eq 'Jon James Doe'
|
34
47
|
end
|
35
48
|
|
36
49
|
it "returns the #fullname when to_s is called" do
|
37
|
-
expect(agent.to_s).to
|
50
|
+
expect(agent.to_s).to eq 'Jon James Doe'
|
38
51
|
end
|
39
52
|
|
40
53
|
it "expects a birth date" do
|
41
|
-
expect(agent.birthdate).to
|
54
|
+
expect(agent.birthdate).to eq '1830'
|
42
55
|
end
|
43
56
|
|
44
57
|
it "expects a death date" do
|
45
|
-
expect(agent.deathdate).to
|
58
|
+
expect(agent.deathdate).to eq '1905'
|
46
59
|
end
|
47
60
|
|
48
61
|
it "expects a webpage" do
|
49
|
-
expect(agent.webpage).to
|
62
|
+
expect(agent.webpage).to eq 'http://en.wikipedia.org/wiki/Jon_James_Doe'
|
50
63
|
end
|
51
64
|
|
52
65
|
it "expects any alias names" do
|
53
|
-
expect(agent.aliases[0]).to
|
54
|
-
expect(agent.aliases[1]).to
|
66
|
+
expect(agent.aliases[0]).to eq 'Doe, Jon'
|
67
|
+
expect(agent.aliases[1]).to eq 'Doe, J. J.'
|
55
68
|
end
|
56
69
|
|
57
70
|
context "when only a single name is given" do
|
@@ -65,10 +78,10 @@ module GutenbergRdf
|
|
65
78
|
end
|
66
79
|
|
67
80
|
it "expects it to be assigned to the last name" do
|
68
|
-
expect(agent.lastname).to
|
81
|
+
expect(agent.lastname).to eq 'Dato'
|
69
82
|
end
|
70
83
|
it "expects firstname to be an empty string" do
|
71
|
-
expect(agent.firstname).to
|
84
|
+
expect(agent.firstname).to eq ''
|
72
85
|
end
|
73
86
|
end
|
74
87
|
|
@@ -83,8 +96,8 @@ module GutenbergRdf
|
|
83
96
|
end
|
84
97
|
|
85
98
|
it "expects the correct name order" do
|
86
|
-
expect(agent.firstname).to
|
87
|
-
expect(agent.lastname).to
|
99
|
+
expect(agent.firstname).to eq 'Sir Jon'
|
100
|
+
expect(agent.lastname).to eq 'Doe'
|
88
101
|
end
|
89
102
|
end
|
90
103
|
|
@@ -100,8 +113,8 @@ module GutenbergRdf
|
|
100
113
|
|
101
114
|
it "expects initials to replaced by name in brackets" do
|
102
115
|
pending "Not yet implemented"
|
103
|
-
expect(agent.firstname).to
|
104
|
-
expect(agent.lastname).to
|
116
|
+
expect(agent.firstname).to eq 'Jon James'
|
117
|
+
expect(agent.lastname).to eq 'Doe'
|
105
118
|
end
|
106
119
|
it "expects the name (excluding name in brackets) to be added to the aliases"
|
107
120
|
it "should not have duplicate aliases"
|
@@ -86,6 +86,24 @@ module GutenbergRdf
|
|
86
86
|
end
|
87
87
|
end
|
88
88
|
|
89
|
+
context "with; title, or, subtitle (we need to split on the 'or')" do
|
90
|
+
let(:xml) do
|
91
|
+
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
92
|
+
<pgterms:ebook rdf:about="ebooks/98765">
|
93
|
+
<dcterms:title>A Great Multi-Title, or, a Subtitle</dcterms:title>
|
94
|
+
</pgterms:ebook>
|
95
|
+
</rdf:RDF>'
|
96
|
+
end
|
97
|
+
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
|
98
|
+
|
99
|
+
it "expects the title to be the first line" do
|
100
|
+
expect(rdf.title).to eql 'A Great Multi-Title'
|
101
|
+
end
|
102
|
+
it "expects the subtitle to be the second line" do
|
103
|
+
expect(rdf.subtitle).to eql 'a Subtitle'
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
89
107
|
context "when title:subtitle are separated by a colon" do
|
90
108
|
let(:xml) do
|
91
109
|
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
@@ -142,7 +160,11 @@ module GutenbergRdf
|
|
142
160
|
|
143
161
|
describe "#authors" do
|
144
162
|
let(:xml) do
|
145
|
-
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
163
|
+
'<rdf:RDF xmlns:dcterms="http://purl.org/dc/terms/" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:marcrel="http://id.loc.gov/vocabulary/relators">
|
164
|
+
<pgterms:ebook rdf:about="ebooks/99999999">
|
165
|
+
<marcrel:ctb rdf:resource="2009/agents/402"/>
|
166
|
+
<dcterms:creator rdf:resource="2009/agents/116"/>
|
167
|
+
</pgterms:ebook>
|
146
168
|
<pgterms:agent rdf:about="2009/agents/402">
|
147
169
|
<pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1830</pgterms:birthdate>
|
148
170
|
<pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1905</pgterms:deathdate>
|
@@ -166,7 +188,12 @@ module GutenbergRdf
|
|
166
188
|
end
|
167
189
|
it "has the correct author names" do
|
168
190
|
expect(rdf.authors.first.fullname).to eq 'Mary Mapes Dodge'
|
169
|
-
|
191
|
+
end
|
192
|
+
it "expects the author to have an aut role" do
|
193
|
+
expect(rdf.authors.last.role).to eq 'aut'
|
194
|
+
end
|
195
|
+
it "expects other agents to have the correct role" do
|
196
|
+
expect(rdf.authors.first.role).to eq 'ctb'
|
170
197
|
end
|
171
198
|
end
|
172
199
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gutenberg_rdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Cook
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-02-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 2.14
|
19
|
+
version: '2.14'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 2.14
|
26
|
+
version: '2.14'
|
27
27
|
description: A Ruby wrapper for the Project Gutenberg RDF catalog files (requires
|
28
28
|
Ruby 2).
|
29
29
|
email:
|
@@ -49,7 +49,7 @@ files:
|
|
49
49
|
- spec/gutenberg_rdf/rdf_spec.rb
|
50
50
|
- spec/gutenberg_rdf_spec.rb
|
51
51
|
- spec/spec_helper.rb
|
52
|
-
homepage:
|
52
|
+
homepage: https://github.com/mrcook/gutenberg_rdf
|
53
53
|
licenses:
|
54
54
|
- MIT
|
55
55
|
metadata: {}
|