marc2linkeddata 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.env_example +62 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE +202 -0
- data/README.md +234 -0
- data/bin/console +8 -0
- data/bin/loc_downloads.sh +62 -0
- data/bin/loc_import_4store.sh +24 -0
- data/bin/loc_import_allegrograph.sh +22 -0
- data/bin/loc_import_marklogic.sh +19 -0
- data/bin/readMarcAuthority +113 -0
- data/lib/marc2linkeddata/configuration.rb +146 -0
- data/lib/marc2linkeddata/isni.rb +23 -0
- data/lib/marc2linkeddata/lib_auth.rb +17 -0
- data/lib/marc2linkeddata/loc.rb +91 -0
- data/lib/marc2linkeddata/oclc_creative_work.rb +44 -0
- data/lib/marc2linkeddata/oclc_identity.rb +46 -0
- data/lib/marc2linkeddata/oclc_resource.rb +79 -0
- data/lib/marc2linkeddata/oclc_work.rb +19 -0
- data/lib/marc2linkeddata/parseMarcAuthority.rb +492 -0
- data/lib/marc2linkeddata/readMarcCatalog.rb +175 -0
- data/lib/marc2linkeddata/resource.rb +131 -0
- data/lib/marc2linkeddata/sparql.rb +55 -0
- data/lib/marc2linkeddata/viaf.rb +48 -0
- data/lib/marc2linkeddata.rb +64 -0
- data/marc2linkeddata.gemspec +40 -0
- data/spec/marc2linkeddata/configuration_spec.rb +84 -0
- data/spec/marc2linkeddata/loc_spec.rb +71 -0
- data/spec/marc2linkeddata/resource_spec.rb +53 -0
- data/spec/marc2linkeddata/viaf_spec.rb +53 -0
- data/spec/marc2linkeddata_spec.rb +39 -0
- data/spec/spec_helper.rb +92 -0
- metadata +243 -0
@@ -0,0 +1,113 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'marc2linkeddata'
|
4
|
+
|
5
|
+
CONFIG = Marc2LinkedData.configuration
|
6
|
+
|
7
|
+
def marc_auth_count(marc_file)
|
8
|
+
auth_records = 0
|
9
|
+
until marc_file.eof?
|
10
|
+
begin
|
11
|
+
leader = Marc2LinkedData::ParseMarcAuthority::parse_leader(marc_file)
|
12
|
+
marc_file.seek(leader[:length], IO::SEEK_CUR)
|
13
|
+
auth_records += 1 if leader[:type] == 'z'
|
14
|
+
rescue => e
|
15
|
+
puts
|
16
|
+
puts 'ERROR'
|
17
|
+
puts e.message
|
18
|
+
puts e.backtrace
|
19
|
+
puts
|
20
|
+
binding.pry if CONFIG.debug
|
21
|
+
end
|
22
|
+
end
|
23
|
+
marc_file.seek(0, IO::SEEK_SET)
|
24
|
+
auth_records
|
25
|
+
end
|
26
|
+
|
27
|
+
def marc2ld(marc_filename)
|
28
|
+
ld_filename = marc_filename.gsub('.mrc','.ttl')
|
29
|
+
puts "Translating: #{marc_filename} to #{ld_filename}"
|
30
|
+
ld_file = File.open(ld_filename,'w')
|
31
|
+
# Marc2LinkedData.write_prefixes(ld_file)
|
32
|
+
marc_file = File.open(marc_filename,'r')
|
33
|
+
auth_count = 0
|
34
|
+
auth_records = marc_auth_count(marc_file)
|
35
|
+
progress = ProgressBar.create(:total => auth_records, :format => '%a %f |%b>>%i| %P%% %t')
|
36
|
+
until marc_file.eof?
|
37
|
+
begin
|
38
|
+
leader = Marc2LinkedData::ParseMarcAuthority::parse_leader(marc_file)
|
39
|
+
raw = marc_file.read(leader[:length])
|
40
|
+
if leader[:type] == 'z'
|
41
|
+
progress.increment
|
42
|
+
record = MARC::Reader.decode(raw)
|
43
|
+
# ParseMarcAuthority is a lazy parser, so
|
44
|
+
# init only assigns record to an instance var.
|
45
|
+
auth = Marc2LinkedData::ParseMarcAuthority.new(record)
|
46
|
+
auth_count += 1
|
47
|
+
# auth_id = "auth:#{auth.get_id}"
|
48
|
+
# triples = nil
|
49
|
+
# # TODO: enable additional persistence options
|
50
|
+
# # Use data already in redis (if enabled)
|
51
|
+
# triples = CONFIG.redis.get(auth_id) if CONFIG.redis_read
|
52
|
+
# if triples.nil?
|
53
|
+
# triples = auth.to_ttl # generate new triples
|
54
|
+
# # Update redis (if enabled) for triples not read from redis
|
55
|
+
# CONFIG.redis.set(auth_id, triples) if CONFIG.redis_write
|
56
|
+
# end
|
57
|
+
|
58
|
+
triples = auth.to_ttl.lines
|
59
|
+
binding.pry if (CONFIG.debug && triples.empty?)
|
60
|
+
triples.delete_if {|l| l.chomp.empty? }
|
61
|
+
triples.delete_if {|l| l.start_with?('@prefix') } if auth_count > 1
|
62
|
+
ld_file.write(triples.join)
|
63
|
+
ld_file.flush
|
64
|
+
end
|
65
|
+
rescue => e
|
66
|
+
puts
|
67
|
+
puts 'ERROR'
|
68
|
+
puts e.message
|
69
|
+
puts e.backtrace
|
70
|
+
puts record.to_s
|
71
|
+
puts
|
72
|
+
binding.pry if CONFIG.debug
|
73
|
+
end
|
74
|
+
end
|
75
|
+
marc_file.close
|
76
|
+
ld_file.flush
|
77
|
+
ld_file.close
|
78
|
+
end
|
79
|
+
|
80
|
+
marc_files = []
|
81
|
+
ARGV.each do |filename|
|
82
|
+
path = Pathname(filename)
|
83
|
+
marc_files.push(path) if path.exist?
|
84
|
+
end
|
85
|
+
if marc_files.empty?
|
86
|
+
puts <<HELP
|
87
|
+
#{__FILE__} marc_authority_file1.mrc [ marc_authority_file2.mrc .. marc_authority_fileN.mrc ]
|
88
|
+
|
89
|
+
Output is RDF triples in a turtle file (.ttl) for every input .mrc file.
|
90
|
+
Optional persistence services can be controlled by environment variables.
|
91
|
+
|
92
|
+
Redis Persistence - based on https://github.com/redis/redis-rb
|
93
|
+
- essential options:
|
94
|
+
export REDIS4MARC=true # enable redis persistence (default = false)
|
95
|
+
- supplementary options:
|
96
|
+
Set the REDIS_URL for a custom redis configuration.
|
97
|
+
export REDIS_URL="redis://{user}:{password}@{host}:{port}/{db}"
|
98
|
+
export REDIS_READ=true # enable redis reads (default = REDIS4MARC || false)
|
99
|
+
# faster reading of triples from pre-populated redis data
|
100
|
+
export REDIS_WRITE=true # enable redis writes (default = REDIS4MARC || false)
|
101
|
+
# recent data is updated in redis
|
102
|
+
|
103
|
+
HELP
|
104
|
+
else
|
105
|
+
end
|
106
|
+
|
107
|
+
puts "Logging to: #{CONFIG.log_file}"
|
108
|
+
marc_files.each do |path|
|
109
|
+
CONFIG.logger.info "Processing: #{path}"
|
110
|
+
marc2ld(path.to_s)
|
111
|
+
end
|
112
|
+
|
113
|
+
|
@@ -0,0 +1,146 @@
|
|
1
|
+
|
2
|
+
module Marc2LinkedData
|
3
|
+
|
4
|
+
class Configuration
|
5
|
+
|
6
|
+
attr_accessor :debug
|
7
|
+
|
8
|
+
attr_accessor :field_auth_loc
|
9
|
+
attr_accessor :field_auth_isni
|
10
|
+
attr_accessor :field_auth_oclc
|
11
|
+
attr_accessor :field_auth_viaf
|
12
|
+
|
13
|
+
attr_accessor :get_isni
|
14
|
+
attr_accessor :get_loc
|
15
|
+
attr_accessor :get_oclc
|
16
|
+
attr_accessor :get_viaf
|
17
|
+
attr_accessor :oclc_auth2works
|
18
|
+
|
19
|
+
attr_accessor :local_loc_user
|
20
|
+
attr_accessor :local_loc_pass
|
21
|
+
attr_accessor :local_loc_url
|
22
|
+
|
23
|
+
attr_accessor :prefixes
|
24
|
+
|
25
|
+
attr_accessor :use_foaf
|
26
|
+
attr_accessor :use_schema
|
27
|
+
|
28
|
+
attr_accessor :redis4marc
|
29
|
+
attr_accessor :redis_read
|
30
|
+
attr_accessor :redis_write
|
31
|
+
attr_accessor :redis
|
32
|
+
|
33
|
+
attr_accessor :log_file
|
34
|
+
attr_accessor :logger
|
35
|
+
|
36
|
+
def initialize
|
37
|
+
@debug = env_boolean('DEBUG')
|
38
|
+
|
39
|
+
# logging
|
40
|
+
log_file = ENV['LOG_FILE'] || 'marc2ld.log'
|
41
|
+
log_file = File.absolute_path log_file
|
42
|
+
@log_file = log_file
|
43
|
+
log_path = File.dirname log_file
|
44
|
+
unless File.directory? log_path
|
45
|
+
# try to create the log directory
|
46
|
+
Dir.mkdir log_path rescue nil
|
47
|
+
end
|
48
|
+
begin
|
49
|
+
log_file = File.new(@log_file, 'w+')
|
50
|
+
rescue
|
51
|
+
log_file = $stderr
|
52
|
+
@log_file = 'STDERR'
|
53
|
+
end
|
54
|
+
@logger = Logger.new(log_file, shift_age = 'monthly')
|
55
|
+
@logger.level = @debug ? Logger::DEBUG : Logger::INFO
|
56
|
+
|
57
|
+
# RDF prefixes
|
58
|
+
@prefixes = {}
|
59
|
+
# Library specific prefixes (use .env file or set shell ENV)
|
60
|
+
@prefixes['lib'] = ENV['LIB_PREFIX'] || 'http://linked-data.stanford.edu/library/'
|
61
|
+
@prefixes['lib_auth'] = "#{prefixes['lib']}authority/"
|
62
|
+
@prefixes['lib_cat'] = "#{prefixes['lib']}catalog/"
|
63
|
+
# Static Prefixes
|
64
|
+
@prefixes['bf'] = 'http://bibframe.org/vocab/'
|
65
|
+
@prefixes['foaf'] = 'http://xmlns.com/foaf/0.1/'
|
66
|
+
@prefixes['isni'] = 'http://www.isni.org/isni/'
|
67
|
+
@prefixes['loc_names'] = 'http://id.loc.gov/authorities/names/'
|
68
|
+
@prefixes['loc_subjects'] = 'http://id.loc.gov/authorities/subjects/'
|
69
|
+
@prefixes['rdf'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
70
|
+
@prefixes['rdfs'] = 'http://www.w3.org/2000/01/rdf-schema#'
|
71
|
+
@prefixes['schema'] = 'http://schema.org/'
|
72
|
+
@prefixes['owl'] = 'http://www.w3.org/2002/07/owl#'
|
73
|
+
@prefixes['viaf'] = 'http://viaf.org/viaf/'
|
74
|
+
|
75
|
+
# Authority parse options
|
76
|
+
@field_auth_loc = ENV['FIELD_AUTH_LOC']
|
77
|
+
@field_auth_isni = ENV['FIELD_AUTH_ISNI']
|
78
|
+
@field_auth_oclc = ENV['FIELD_AUTH_OCLC']
|
79
|
+
@field_auth_viaf = ENV['FIELD_AUTH_VIAF']
|
80
|
+
|
81
|
+
@get_isni = env_boolean('GET_ISNI')
|
82
|
+
@get_loc = env_boolean('GET_LOC')
|
83
|
+
@get_viaf = env_boolean('GET_VIAF')
|
84
|
+
@get_oclc = env_boolean('GET_OCLC')
|
85
|
+
@oclc_auth2works = env_boolean('OCLC_AUTH2WORKS')
|
86
|
+
|
87
|
+
# Vocabulary options
|
88
|
+
# foaf:Person or schema:Person or both?
|
89
|
+
@use_foaf = env_boolean('USE_FOAF')
|
90
|
+
@use_schema = env_boolean('USE_SCHEMA') # schema.org
|
91
|
+
|
92
|
+
# Local triple store for LOC authority data,
|
93
|
+
# accessed via an HTTP API with basic authentication.
|
94
|
+
# See downloads at http://id.loc.gov/download/
|
95
|
+
@local_loc_user = ENV['LOCAL_LOC_USER']
|
96
|
+
@local_loc_pass = ENV['LOCAL_LOC_PASS']
|
97
|
+
loc_host = ENV['LOCAL_LOC_HOST']
|
98
|
+
loc_port = ENV['LOCAL_LOC_PORT']
|
99
|
+
loc_path = ENV['LOCAL_LOC_PATH']
|
100
|
+
@local_loc_url = "http://#{loc_host}:#{loc_port}#{loc_path}"
|
101
|
+
|
102
|
+
# Persistence options
|
103
|
+
@redis = nil
|
104
|
+
@redis4marc = env_boolean('REDIS4MARC')
|
105
|
+
if @redis4marc
|
106
|
+
@redis_url = env_boolean('REDIS_URL')
|
107
|
+
@redis_read = env_boolean('REDIS_READ')
|
108
|
+
@redis_write = env_boolean('REDIS_WRITE')
|
109
|
+
redis_config
|
110
|
+
else
|
111
|
+
@redis_url = nil
|
112
|
+
@redis_read = false
|
113
|
+
@redis_write = false
|
114
|
+
end
|
115
|
+
# TODO: provide options for triple stores
|
116
|
+
end
|
117
|
+
|
118
|
+
def env_boolean(var)
|
119
|
+
# check if an ENV variable is true, use false as default
|
120
|
+
ENV[var].to_s.upcase == 'TRUE' rescue false
|
121
|
+
end
|
122
|
+
|
123
|
+
def redis_config
|
124
|
+
if @redis4marc
|
125
|
+
# https://github.com/redis/redis-rb
|
126
|
+
# storing objects in redis:
|
127
|
+
#redis.set "foo", [1, 2, 3].to_json
|
128
|
+
#JSON.parse(redis.get("foo"))
|
129
|
+
require 'hiredis'
|
130
|
+
require 'redis'
|
131
|
+
if @redis_url
|
132
|
+
# redis url should be of the form "redis://{user}:{password}@{host}:{port}/{db}"
|
133
|
+
@redis = Redis.new(:url => @redis_url)
|
134
|
+
@redis.ping
|
135
|
+
else
|
136
|
+
# default is 'redis://127.0.0.1:6379/0'
|
137
|
+
@redis = Redis.new
|
138
|
+
@redis.ping
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require_relative 'resource'
|
2
|
+
|
3
|
+
module Marc2LinkedData
|
4
|
+
|
5
|
+
class Isni < Resource
|
6
|
+
|
7
|
+
# Interesting slide presentation about ISNI
|
8
|
+
# http://www.slideshare.net/JaniferGatenby/viaf-and-isni-ifla-2014-0815
|
9
|
+
|
10
|
+
PREFIX = 'http://www.isni.org/isni/'
|
11
|
+
|
12
|
+
def rdf
|
13
|
+
# e.g. 'http://www.isni.org/isni/0000000109311081'
|
14
|
+
return nil if @iri.nil?
|
15
|
+
return @rdf unless @rdf.nil?
|
16
|
+
uri4rdf = @iri.to_s + '.rdf'
|
17
|
+
@rdf = get_rdf(uri4rdf)
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require_relative 'resource'
|
2
|
+
|
3
|
+
module Marc2LinkedData
|
4
|
+
|
5
|
+
class LibAuth < Resource
|
6
|
+
|
7
|
+
# def rdf
|
8
|
+
# return nil if @iri.nil?
|
9
|
+
# return @rdf unless @rdf.nil?
|
10
|
+
# uri4rdf = @iri.to_s + '.rdf'
|
11
|
+
# @rdf = get_rdf(uri4rdf)
|
12
|
+
# end
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require_relative 'resource'
|
2
|
+
|
3
|
+
module Marc2LinkedData
|
4
|
+
|
5
|
+
class Loc < Resource
|
6
|
+
|
7
|
+
PREFIX = 'http://id.loc.gov/authorities/'
|
8
|
+
PREFIX_NAMES = "#{PREFIX}names/"
|
9
|
+
PREFIX_SUBJECTS = "#{PREFIX}subjects/"
|
10
|
+
|
11
|
+
# def id
|
12
|
+
# return nil if @iri.nil?
|
13
|
+
# @id ||= @iri.basename
|
14
|
+
# # Could get id from rdf, but that incurs costs for RDF retrieval and parsing etc.
|
15
|
+
# #oclc_id = '<identifiers:oclcnum>oca04921729</identifiers:oclcnum>'
|
16
|
+
# #<identifiers:lccn>no 99010609</identifiers:lccn>
|
17
|
+
# #<identifiers:oclcnum>oca04921729</identifiers:oclcnum>
|
18
|
+
# end
|
19
|
+
|
20
|
+
def rdf
|
21
|
+
return nil if iri.nil?
|
22
|
+
return @rdf unless @rdf.nil?
|
23
|
+
uri4rdf = iri.to_s + '.rdf'
|
24
|
+
@rdf = get_rdf(uri4rdf)
|
25
|
+
end
|
26
|
+
|
27
|
+
def label
|
28
|
+
label_predicate = '<http://www.loc.gov/mads/rdf/v1#authoritativeLabel>'
|
29
|
+
query = SPARQL.parse("SELECT * WHERE { <#{@iri}> #{label_predicate} ?o }")
|
30
|
+
rdf.query(query).first[:o].to_s rescue nil
|
31
|
+
end
|
32
|
+
|
33
|
+
def authority?
|
34
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#Authority' }.length > 0
|
35
|
+
end
|
36
|
+
|
37
|
+
def deprecated?
|
38
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#DeprecatedAuthority' }.length > 0
|
39
|
+
end
|
40
|
+
|
41
|
+
def conference?
|
42
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#ConferenceName' }.length > 0
|
43
|
+
end
|
44
|
+
|
45
|
+
def corporation?
|
46
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#CorporateName' }.length > 0
|
47
|
+
end
|
48
|
+
|
49
|
+
def name_title?
|
50
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#NameTitle' }.length > 0
|
51
|
+
end
|
52
|
+
|
53
|
+
def person?
|
54
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#PersonalName' }.length > 0
|
55
|
+
# iri_types.filter {|s| s[:o] =~ /PersonalName/ }.length > 0
|
56
|
+
# obj = rdf_find_object 'PersonalName'
|
57
|
+
# obj.nil? ? false : true
|
58
|
+
end
|
59
|
+
|
60
|
+
def place?
|
61
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#Geographic' }.length > 0
|
62
|
+
end
|
63
|
+
|
64
|
+
def get_oclc_identity
|
65
|
+
# Try to get OCLC URI from LOC ID
|
66
|
+
# http://oclc.org/developer/develop/web-services/worldcat-identities.en.html
|
67
|
+
# e.g. http://www.worldcat.org/identities/lccn-n79044803/
|
68
|
+
# e.g. http://www.worldcat.org/identities/lccn-n79044798/
|
69
|
+
return @oclc_iri unless @oclc_iri.nil?
|
70
|
+
oclc_url = URI.encode('http://www.worldcat.org/identities/lccn-' + id + '/')
|
71
|
+
@oclc_iri = resolve_external_auth(oclc_url)
|
72
|
+
# TODO: OCLC might redirect and then provide a 'fast' URI for obsolete identity records.
|
73
|
+
end
|
74
|
+
|
75
|
+
def get_viaf
|
76
|
+
return @viaf_iri unless @viaf_iri.nil?
|
77
|
+
# Try to get VIAF from LOC sourceID
|
78
|
+
# LOC statement with VIAF URI, e.g.:
|
79
|
+
# s: <http://id.loc.gov/authorities/names/n79046291>
|
80
|
+
# p: <http://www.loc.gov/mads/rdf/v1#hasExactExternalAuthority>
|
81
|
+
# o: <http://viaf.org/viaf/sourceID/LC%7Cn+79046291#skos:Concept> .
|
82
|
+
#return nil unless rdf_valid?
|
83
|
+
#@viaf_iri ||= rdf_find_object 'viaf'
|
84
|
+
viaf_url = URI.encode('http://viaf.org/viaf/sourceID/LC|' + id + '#skos:Concept')
|
85
|
+
@viaf_iri = resolve_external_auth(viaf_url)
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require_relative 'oclc_resource'
|
2
|
+
|
3
|
+
module Marc2LinkedData
|
4
|
+
|
5
|
+
class OclcCreativeWork < OclcResource
|
6
|
+
|
7
|
+
PREFIX = 'http://www.worldcat.org/oclc/'
|
8
|
+
|
9
|
+
def get_works
|
10
|
+
# assume an exampleOfWork can only ever link to one work?
|
11
|
+
q = query_work(@iri)
|
12
|
+
works = rdf.query(q).collect {|s| s[:o] }
|
13
|
+
if works.empty?
|
14
|
+
# OCLC data is inconsistent in use of 'www.' in IRI, so try again.
|
15
|
+
# The OclcResource coerces @iri so it includes 'www.', so try without it.
|
16
|
+
uri = @iri.to_s.gsub('www.','')
|
17
|
+
q = query_work(uri)
|
18
|
+
works = rdf.query(q).collect {|s| s[:o] }
|
19
|
+
end
|
20
|
+
if works.empty?
|
21
|
+
# Keep the 'www.', cast the ID to an integer.
|
22
|
+
uri = @iri.to_s.gsub(id, id.to_i.to_s)
|
23
|
+
q = query_work(uri)
|
24
|
+
works = rdf.query(q).collect {|s| s[:o] }
|
25
|
+
end
|
26
|
+
if works.empty?
|
27
|
+
# Remove the 'www.' AND cast the ID to an integer.
|
28
|
+
uri = @iri.to_s.gsub('www.','').gsub(id, id.to_i.to_s)
|
29
|
+
q = query_work(uri)
|
30
|
+
works = rdf.query(q).collect {|s| s[:o] }
|
31
|
+
end
|
32
|
+
works
|
33
|
+
end
|
34
|
+
|
35
|
+
def query_work(uri)
|
36
|
+
SPARQL.parse("SELECT * WHERE { <#{uri}> <http://schema.org/exampleOfWork> ?o }")
|
37
|
+
end
|
38
|
+
|
39
|
+
# TODO: get ISBN?
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require_relative 'resource'
|
2
|
+
|
3
|
+
module Marc2LinkedData
|
4
|
+
|
5
|
+
class OclcIdentity < Resource
|
6
|
+
|
7
|
+
PREFIX = 'http://www.worldcat.org/identities/'
|
8
|
+
|
9
|
+
def rdf
|
10
|
+
# e.g. 'http://www.worldcat.org/identities/lccn-n79044803/'
|
11
|
+
# the html returned contains RDFa data
|
12
|
+
return nil if @iri.nil?
|
13
|
+
return @rdf unless @rdf.nil?
|
14
|
+
uri4rdf = @iri.to_s
|
15
|
+
uri4rdf += '/' unless uri4rdf.end_with? '/'
|
16
|
+
@rdf = get_rdf(uri4rdf)
|
17
|
+
end
|
18
|
+
|
19
|
+
# def get_xml
|
20
|
+
# begin
|
21
|
+
# return @xml unless @xml.nil?
|
22
|
+
# http = Net::HTTP.new @iri.host
|
23
|
+
# resp = http.get(@iri.path, {'Accept' => 'application/xml'})
|
24
|
+
# case resp.code
|
25
|
+
# when '301','302','303'
|
26
|
+
# #301 Moved Permanently; 302 Moved Temporarily; 303 See Other
|
27
|
+
# resp = http.get(resp['location'], {'Accept' => 'application/xml'})
|
28
|
+
# end
|
29
|
+
# if resp.code != '200'
|
30
|
+
# raise
|
31
|
+
# end
|
32
|
+
# @xml = resp.body
|
33
|
+
# rescue
|
34
|
+
# puts 'ERROR: Failed to request OCLC identity xml.'
|
35
|
+
# end
|
36
|
+
# end
|
37
|
+
|
38
|
+
def creative_works
|
39
|
+
q = SPARQL.parse('SELECT * WHERE { ?oclcWork a <http://schema.org/CreativeWork> }')
|
40
|
+
rdf.query(q).collect {|s| s[:oclcWork] }
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require_relative 'resource'
|
2
|
+
|
3
|
+
module Marc2LinkedData
|
4
|
+
|
5
|
+
class OclcResource < Resource
|
6
|
+
|
7
|
+
PREFIX = 'http://www.worldcat.org/oclc/'
|
8
|
+
|
9
|
+
def initialize(uri=nil)
|
10
|
+
# Ensure the OCLC IRI contains 'www' in the host name.
|
11
|
+
unless uri =~ /www\./
|
12
|
+
uri = uri.to_s.gsub('worldcat.org','www.worldcat.org')
|
13
|
+
end
|
14
|
+
super(uri)
|
15
|
+
end
|
16
|
+
|
17
|
+
def rdf
|
18
|
+
# e.g. 'http://worldcat.org/oclc/004957186'
|
19
|
+
# also 'http://www.worldcat.org/oclc/004957186'
|
20
|
+
return nil if @iri.nil?
|
21
|
+
return @rdf unless @rdf.nil?
|
22
|
+
uri4rdf = @iri.to_s
|
23
|
+
uri4rdf += '.rdf' unless uri4rdf.end_with? '.rdf'
|
24
|
+
@rdf = get_rdf(uri4rdf)
|
25
|
+
end
|
26
|
+
|
27
|
+
def book?
|
28
|
+
iri_types.filter {|s| s[:o] == 'http://schema.org/Book' }.length > 0
|
29
|
+
end
|
30
|
+
|
31
|
+
def creator?(uri)
|
32
|
+
creators.include? RDF::URI.new(uri)
|
33
|
+
end
|
34
|
+
|
35
|
+
def contributor?(uri)
|
36
|
+
contributors.include? RDF::URI.new(uri)
|
37
|
+
end
|
38
|
+
|
39
|
+
def editor?(uri)
|
40
|
+
editors.include? RDF::URI.new(uri)
|
41
|
+
end
|
42
|
+
|
43
|
+
def media_object?
|
44
|
+
iri_types.filter {|s| s[:o] == 'http://schema.org/MediaObject' }.length > 0
|
45
|
+
end
|
46
|
+
|
47
|
+
def about
|
48
|
+
q = SPARQL.parse('SELECT * WHERE { ?s <http://schema.org/about> ?o }')
|
49
|
+
rdf.query(q)
|
50
|
+
end
|
51
|
+
|
52
|
+
def creators
|
53
|
+
q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/creator> ?o }")
|
54
|
+
rdf.query(q).collect {|s| s[:o] }
|
55
|
+
end
|
56
|
+
|
57
|
+
def contributors
|
58
|
+
q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/contributor> ?o }")
|
59
|
+
rdf.query(q).collect {|s| s[:o] }
|
60
|
+
end
|
61
|
+
|
62
|
+
def editors
|
63
|
+
q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/editor> ?o }")
|
64
|
+
rdf.query(q).collect {|s| s[:o] }
|
65
|
+
end
|
66
|
+
|
67
|
+
def publishers
|
68
|
+
q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/publisher> ?o }")
|
69
|
+
rdf.query(q).collect {|s| s[:o] }
|
70
|
+
end
|
71
|
+
|
72
|
+
def isbns
|
73
|
+
q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/isbn> ?o }")
|
74
|
+
rdf.query(q).collect {|s| s[:o] }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require_relative 'oclc_resource'
|
2
|
+
|
3
|
+
module Marc2LinkedData
|
4
|
+
|
5
|
+
class OclcWork < OclcResource
|
6
|
+
|
7
|
+
# OCLC is inconsistent with use of 'www' in URIs
|
8
|
+
#PREFIX = 'http://www.worldcat.org/entity/work/id/'
|
9
|
+
PREFIX = 'http://worldcat.org/entity/work/id/'
|
10
|
+
|
11
|
+
def example_works
|
12
|
+
q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/workExample> ?o }")
|
13
|
+
rdf.query(q).collect {|s| s[:o] }
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|