marc2linkeddata 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.env_example +62 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE +202 -0
- data/README.md +234 -0
- data/bin/console +8 -0
- data/bin/loc_downloads.sh +62 -0
- data/bin/loc_import_4store.sh +24 -0
- data/bin/loc_import_allegrograph.sh +22 -0
- data/bin/loc_import_marklogic.sh +19 -0
- data/bin/readMarcAuthority +113 -0
- data/lib/marc2linkeddata/configuration.rb +146 -0
- data/lib/marc2linkeddata/isni.rb +23 -0
- data/lib/marc2linkeddata/lib_auth.rb +17 -0
- data/lib/marc2linkeddata/loc.rb +91 -0
- data/lib/marc2linkeddata/oclc_creative_work.rb +44 -0
- data/lib/marc2linkeddata/oclc_identity.rb +46 -0
- data/lib/marc2linkeddata/oclc_resource.rb +79 -0
- data/lib/marc2linkeddata/oclc_work.rb +19 -0
- data/lib/marc2linkeddata/parseMarcAuthority.rb +492 -0
- data/lib/marc2linkeddata/readMarcCatalog.rb +175 -0
- data/lib/marc2linkeddata/resource.rb +131 -0
- data/lib/marc2linkeddata/sparql.rb +55 -0
- data/lib/marc2linkeddata/viaf.rb +48 -0
- data/lib/marc2linkeddata.rb +64 -0
- data/marc2linkeddata.gemspec +40 -0
- data/spec/marc2linkeddata/configuration_spec.rb +84 -0
- data/spec/marc2linkeddata/loc_spec.rb +71 -0
- data/spec/marc2linkeddata/resource_spec.rb +53 -0
- data/spec/marc2linkeddata/viaf_spec.rb +53 -0
- data/spec/marc2linkeddata_spec.rb +39 -0
- data/spec/spec_helper.rb +92 -0
- metadata +243 -0
@@ -0,0 +1,113 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'marc2linkeddata'
|
4
|
+
|
5
|
+
CONFIG = Marc2LinkedData.configuration
|
6
|
+
|
7
|
+
def marc_auth_count(marc_file)
|
8
|
+
auth_records = 0
|
9
|
+
until marc_file.eof?
|
10
|
+
begin
|
11
|
+
leader = Marc2LinkedData::ParseMarcAuthority::parse_leader(marc_file)
|
12
|
+
marc_file.seek(leader[:length], IO::SEEK_CUR)
|
13
|
+
auth_records += 1 if leader[:type] == 'z'
|
14
|
+
rescue => e
|
15
|
+
puts
|
16
|
+
puts 'ERROR'
|
17
|
+
puts e.message
|
18
|
+
puts e.backtrace
|
19
|
+
puts
|
20
|
+
binding.pry if CONFIG.debug
|
21
|
+
end
|
22
|
+
end
|
23
|
+
marc_file.seek(0, IO::SEEK_SET)
|
24
|
+
auth_records
|
25
|
+
end
|
26
|
+
|
27
|
+
def marc2ld(marc_filename)
|
28
|
+
ld_filename = marc_filename.gsub('.mrc','.ttl')
|
29
|
+
puts "Translating: #{marc_filename} to #{ld_filename}"
|
30
|
+
ld_file = File.open(ld_filename,'w')
|
31
|
+
# Marc2LinkedData.write_prefixes(ld_file)
|
32
|
+
marc_file = File.open(marc_filename,'r')
|
33
|
+
auth_count = 0
|
34
|
+
auth_records = marc_auth_count(marc_file)
|
35
|
+
progress = ProgressBar.create(:total => auth_records, :format => '%a %f |%b>>%i| %P%% %t')
|
36
|
+
until marc_file.eof?
|
37
|
+
begin
|
38
|
+
leader = Marc2LinkedData::ParseMarcAuthority::parse_leader(marc_file)
|
39
|
+
raw = marc_file.read(leader[:length])
|
40
|
+
if leader[:type] == 'z'
|
41
|
+
progress.increment
|
42
|
+
record = MARC::Reader.decode(raw)
|
43
|
+
# ParseMarcAuthority is a lazy parser, so
|
44
|
+
# init only assigns record to an instance var.
|
45
|
+
auth = Marc2LinkedData::ParseMarcAuthority.new(record)
|
46
|
+
auth_count += 1
|
47
|
+
# auth_id = "auth:#{auth.get_id}"
|
48
|
+
# triples = nil
|
49
|
+
# # TODO: enable additional persistence options
|
50
|
+
# # Use data already in redis (if enabled)
|
51
|
+
# triples = CONFIG.redis.get(auth_id) if CONFIG.redis_read
|
52
|
+
# if triples.nil?
|
53
|
+
# triples = auth.to_ttl # generate new triples
|
54
|
+
# # Update redis (if enabled) for triples not read from redis
|
55
|
+
# CONFIG.redis.set(auth_id, triples) if CONFIG.redis_write
|
56
|
+
# end
|
57
|
+
|
58
|
+
triples = auth.to_ttl.lines
|
59
|
+
binding.pry if (CONFIG.debug && triples.empty?)
|
60
|
+
triples.delete_if {|l| l.chomp.empty? }
|
61
|
+
triples.delete_if {|l| l.start_with?('@prefix') } if auth_count > 1
|
62
|
+
ld_file.write(triples.join)
|
63
|
+
ld_file.flush
|
64
|
+
end
|
65
|
+
rescue => e
|
66
|
+
puts
|
67
|
+
puts 'ERROR'
|
68
|
+
puts e.message
|
69
|
+
puts e.backtrace
|
70
|
+
puts record.to_s
|
71
|
+
puts
|
72
|
+
binding.pry if CONFIG.debug
|
73
|
+
end
|
74
|
+
end
|
75
|
+
marc_file.close
|
76
|
+
ld_file.flush
|
77
|
+
ld_file.close
|
78
|
+
end
|
79
|
+
|
80
|
+
marc_files = []
|
81
|
+
ARGV.each do |filename|
|
82
|
+
path = Pathname(filename)
|
83
|
+
marc_files.push(path) if path.exist?
|
84
|
+
end
|
85
|
+
if marc_files.empty?
|
86
|
+
puts <<HELP
|
87
|
+
#{__FILE__} marc_authority_file1.mrc [ marc_authority_file2.mrc .. marc_authority_fileN.mrc ]
|
88
|
+
|
89
|
+
Output is RDF triples in a turtle file (.ttl) for every input .mrc file.
|
90
|
+
Optional persistence services can be controlled by environment variables.
|
91
|
+
|
92
|
+
Redis Persistence - based on https://github.com/redis/redis-rb
|
93
|
+
- essential options:
|
94
|
+
export REDIS4MARC=true # enable redis persistence (default = false)
|
95
|
+
- supplementary options:
|
96
|
+
Set the REDIS_URL for a custom redis configuration.
|
97
|
+
export REDIS_URL="redis://{user}:{password}@{host}:{port}/{db}"
|
98
|
+
export REDIS_READ=true # enable redis reads (default = REDIS4MARC || false)
|
99
|
+
# faster reading of triples from pre-populated redis data
|
100
|
+
export REDIS_WRITE=true # enable redis writes (default = REDIS4MARC || false)
|
101
|
+
# recent data is updated in redis
|
102
|
+
|
103
|
+
HELP
|
104
|
+
else
|
105
|
+
end
|
106
|
+
|
107
|
+
puts "Logging to: #{CONFIG.log_file}"
|
108
|
+
marc_files.each do |path|
|
109
|
+
CONFIG.logger.info "Processing: #{path}"
|
110
|
+
marc2ld(path.to_s)
|
111
|
+
end
|
112
|
+
|
113
|
+
|
@@ -0,0 +1,146 @@
|
|
1
|
+
|
2
|
+
module Marc2LinkedData
|
3
|
+
|
4
|
+
class Configuration
|
5
|
+
|
6
|
+
attr_accessor :debug
|
7
|
+
|
8
|
+
attr_accessor :field_auth_loc
|
9
|
+
attr_accessor :field_auth_isni
|
10
|
+
attr_accessor :field_auth_oclc
|
11
|
+
attr_accessor :field_auth_viaf
|
12
|
+
|
13
|
+
attr_accessor :get_isni
|
14
|
+
attr_accessor :get_loc
|
15
|
+
attr_accessor :get_oclc
|
16
|
+
attr_accessor :get_viaf
|
17
|
+
attr_accessor :oclc_auth2works
|
18
|
+
|
19
|
+
attr_accessor :local_loc_user
|
20
|
+
attr_accessor :local_loc_pass
|
21
|
+
attr_accessor :local_loc_url
|
22
|
+
|
23
|
+
attr_accessor :prefixes
|
24
|
+
|
25
|
+
attr_accessor :use_foaf
|
26
|
+
attr_accessor :use_schema
|
27
|
+
|
28
|
+
attr_accessor :redis4marc
|
29
|
+
attr_accessor :redis_read
|
30
|
+
attr_accessor :redis_write
|
31
|
+
attr_accessor :redis
|
32
|
+
|
33
|
+
attr_accessor :log_file
|
34
|
+
attr_accessor :logger
|
35
|
+
|
36
|
+
def initialize
|
37
|
+
@debug = env_boolean('DEBUG')
|
38
|
+
|
39
|
+
# logging
|
40
|
+
log_file = ENV['LOG_FILE'] || 'marc2ld.log'
|
41
|
+
log_file = File.absolute_path log_file
|
42
|
+
@log_file = log_file
|
43
|
+
log_path = File.dirname log_file
|
44
|
+
unless File.directory? log_path
|
45
|
+
# try to create the log directory
|
46
|
+
Dir.mkdir log_path rescue nil
|
47
|
+
end
|
48
|
+
begin
|
49
|
+
log_file = File.new(@log_file, 'w+')
|
50
|
+
rescue
|
51
|
+
log_file = $stderr
|
52
|
+
@log_file = 'STDERR'
|
53
|
+
end
|
54
|
+
@logger = Logger.new(log_file, shift_age = 'monthly')
|
55
|
+
@logger.level = @debug ? Logger::DEBUG : Logger::INFO
|
56
|
+
|
57
|
+
# RDF prefixes
|
58
|
+
@prefixes = {}
|
59
|
+
# Library specific prefixes (use .env file or set shell ENV)
|
60
|
+
@prefixes['lib'] = ENV['LIB_PREFIX'] || 'http://linked-data.stanford.edu/library/'
|
61
|
+
@prefixes['lib_auth'] = "#{prefixes['lib']}authority/"
|
62
|
+
@prefixes['lib_cat'] = "#{prefixes['lib']}catalog/"
|
63
|
+
# Static Prefixes
|
64
|
+
@prefixes['bf'] = 'http://bibframe.org/vocab/'
|
65
|
+
@prefixes['foaf'] = 'http://xmlns.com/foaf/0.1/'
|
66
|
+
@prefixes['isni'] = 'http://www.isni.org/isni/'
|
67
|
+
@prefixes['loc_names'] = 'http://id.loc.gov/authorities/names/'
|
68
|
+
@prefixes['loc_subjects'] = 'http://id.loc.gov/authorities/subjects/'
|
69
|
+
@prefixes['rdf'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
70
|
+
@prefixes['rdfs'] = 'http://www.w3.org/2000/01/rdf-schema#'
|
71
|
+
@prefixes['schema'] = 'http://schema.org/'
|
72
|
+
@prefixes['owl'] = 'http://www.w3.org/2002/07/owl#'
|
73
|
+
@prefixes['viaf'] = 'http://viaf.org/viaf/'
|
74
|
+
|
75
|
+
# Authority parse options
|
76
|
+
@field_auth_loc = ENV['FIELD_AUTH_LOC']
|
77
|
+
@field_auth_isni = ENV['FIELD_AUTH_ISNI']
|
78
|
+
@field_auth_oclc = ENV['FIELD_AUTH_OCLC']
|
79
|
+
@field_auth_viaf = ENV['FIELD_AUTH_VIAF']
|
80
|
+
|
81
|
+
@get_isni = env_boolean('GET_ISNI')
|
82
|
+
@get_loc = env_boolean('GET_LOC')
|
83
|
+
@get_viaf = env_boolean('GET_VIAF')
|
84
|
+
@get_oclc = env_boolean('GET_OCLC')
|
85
|
+
@oclc_auth2works = env_boolean('OCLC_AUTH2WORKS')
|
86
|
+
|
87
|
+
# Vocabulary options
|
88
|
+
# foaf:Person or schema:Person or both?
|
89
|
+
@use_foaf = env_boolean('USE_FOAF')
|
90
|
+
@use_schema = env_boolean('USE_SCHEMA') # schema.org
|
91
|
+
|
92
|
+
# Local triple store for LOC authority data,
|
93
|
+
# accessed via an HTTP API with basic authentication.
|
94
|
+
# See downloads at http://id.loc.gov/download/
|
95
|
+
@local_loc_user = ENV['LOCAL_LOC_USER']
|
96
|
+
@local_loc_pass = ENV['LOCAL_LOC_PASS']
|
97
|
+
loc_host = ENV['LOCAL_LOC_HOST']
|
98
|
+
loc_port = ENV['LOCAL_LOC_PORT']
|
99
|
+
loc_path = ENV['LOCAL_LOC_PATH']
|
100
|
+
@local_loc_url = "http://#{loc_host}:#{loc_port}#{loc_path}"
|
101
|
+
|
102
|
+
# Persistence options
|
103
|
+
@redis = nil
|
104
|
+
@redis4marc = env_boolean('REDIS4MARC')
|
105
|
+
if @redis4marc
|
106
|
+
@redis_url = env_boolean('REDIS_URL')
|
107
|
+
@redis_read = env_boolean('REDIS_READ')
|
108
|
+
@redis_write = env_boolean('REDIS_WRITE')
|
109
|
+
redis_config
|
110
|
+
else
|
111
|
+
@redis_url = nil
|
112
|
+
@redis_read = false
|
113
|
+
@redis_write = false
|
114
|
+
end
|
115
|
+
# TODO: provide options for triple stores
|
116
|
+
end
|
117
|
+
|
118
|
+
def env_boolean(var)
|
119
|
+
# check if an ENV variable is true, use false as default
|
120
|
+
ENV[var].to_s.upcase == 'TRUE' rescue false
|
121
|
+
end
|
122
|
+
|
123
|
+
def redis_config
|
124
|
+
if @redis4marc
|
125
|
+
# https://github.com/redis/redis-rb
|
126
|
+
# storing objects in redis:
|
127
|
+
#redis.set "foo", [1, 2, 3].to_json
|
128
|
+
#JSON.parse(redis.get("foo"))
|
129
|
+
require 'hiredis'
|
130
|
+
require 'redis'
|
131
|
+
if @redis_url
|
132
|
+
# redis url should be of the form "redis://{user}:{password}@{host}:{port}/{db}"
|
133
|
+
@redis = Redis.new(:url => @redis_url)
|
134
|
+
@redis.ping
|
135
|
+
else
|
136
|
+
# default is 'redis://127.0.0.1:6379/0'
|
137
|
+
@redis = Redis.new
|
138
|
+
@redis.ping
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require_relative 'resource'
|
2
|
+
|
3
|
+
module Marc2LinkedData
|
4
|
+
|
5
|
+
class Isni < Resource
|
6
|
+
|
7
|
+
# Interesting slide presentation about ISNI
|
8
|
+
# http://www.slideshare.net/JaniferGatenby/viaf-and-isni-ifla-2014-0815
|
9
|
+
|
10
|
+
PREFIX = 'http://www.isni.org/isni/'
|
11
|
+
|
12
|
+
def rdf
|
13
|
+
# e.g. 'http://www.isni.org/isni/0000000109311081'
|
14
|
+
return nil if @iri.nil?
|
15
|
+
return @rdf unless @rdf.nil?
|
16
|
+
uri4rdf = @iri.to_s + '.rdf'
|
17
|
+
@rdf = get_rdf(uri4rdf)
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require_relative 'resource'
|
2
|
+
|
3
|
+
module Marc2LinkedData
|
4
|
+
|
5
|
+
class LibAuth < Resource
|
6
|
+
|
7
|
+
# def rdf
|
8
|
+
# return nil if @iri.nil?
|
9
|
+
# return @rdf unless @rdf.nil?
|
10
|
+
# uri4rdf = @iri.to_s + '.rdf'
|
11
|
+
# @rdf = get_rdf(uri4rdf)
|
12
|
+
# end
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require_relative 'resource'
|
2
|
+
|
3
|
+
module Marc2LinkedData
|
4
|
+
|
5
|
+
class Loc < Resource
|
6
|
+
|
7
|
+
PREFIX = 'http://id.loc.gov/authorities/'
|
8
|
+
PREFIX_NAMES = "#{PREFIX}names/"
|
9
|
+
PREFIX_SUBJECTS = "#{PREFIX}subjects/"
|
10
|
+
|
11
|
+
# def id
|
12
|
+
# return nil if @iri.nil?
|
13
|
+
# @id ||= @iri.basename
|
14
|
+
# # Could get id from rdf, but that incurs costs for RDF retrieval and parsing etc.
|
15
|
+
# #oclc_id = '<identifiers:oclcnum>oca04921729</identifiers:oclcnum>'
|
16
|
+
# #<identifiers:lccn>no 99010609</identifiers:lccn>
|
17
|
+
# #<identifiers:oclcnum>oca04921729</identifiers:oclcnum>
|
18
|
+
# end
|
19
|
+
|
20
|
+
def rdf
|
21
|
+
return nil if iri.nil?
|
22
|
+
return @rdf unless @rdf.nil?
|
23
|
+
uri4rdf = iri.to_s + '.rdf'
|
24
|
+
@rdf = get_rdf(uri4rdf)
|
25
|
+
end
|
26
|
+
|
27
|
+
def label
|
28
|
+
label_predicate = '<http://www.loc.gov/mads/rdf/v1#authoritativeLabel>'
|
29
|
+
query = SPARQL.parse("SELECT * WHERE { <#{@iri}> #{label_predicate} ?o }")
|
30
|
+
rdf.query(query).first[:o].to_s rescue nil
|
31
|
+
end
|
32
|
+
|
33
|
+
def authority?
|
34
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#Authority' }.length > 0
|
35
|
+
end
|
36
|
+
|
37
|
+
def deprecated?
|
38
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#DeprecatedAuthority' }.length > 0
|
39
|
+
end
|
40
|
+
|
41
|
+
def conference?
|
42
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#ConferenceName' }.length > 0
|
43
|
+
end
|
44
|
+
|
45
|
+
def corporation?
|
46
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#CorporateName' }.length > 0
|
47
|
+
end
|
48
|
+
|
49
|
+
def name_title?
|
50
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#NameTitle' }.length > 0
|
51
|
+
end
|
52
|
+
|
53
|
+
def person?
|
54
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#PersonalName' }.length > 0
|
55
|
+
# iri_types.filter {|s| s[:o] =~ /PersonalName/ }.length > 0
|
56
|
+
# obj = rdf_find_object 'PersonalName'
|
57
|
+
# obj.nil? ? false : true
|
58
|
+
end
|
59
|
+
|
60
|
+
def place?
|
61
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#Geographic' }.length > 0
|
62
|
+
end
|
63
|
+
|
64
|
+
def get_oclc_identity
|
65
|
+
# Try to get OCLC URI from LOC ID
|
66
|
+
# http://oclc.org/developer/develop/web-services/worldcat-identities.en.html
|
67
|
+
# e.g. http://www.worldcat.org/identities/lccn-n79044803/
|
68
|
+
# e.g. http://www.worldcat.org/identities/lccn-n79044798/
|
69
|
+
return @oclc_iri unless @oclc_iri.nil?
|
70
|
+
oclc_url = URI.encode('http://www.worldcat.org/identities/lccn-' + id + '/')
|
71
|
+
@oclc_iri = resolve_external_auth(oclc_url)
|
72
|
+
# TODO: OCLC might redirect and then provide a 'fast' URI for obsolete identity records.
|
73
|
+
end
|
74
|
+
|
75
|
+
def get_viaf
|
76
|
+
return @viaf_iri unless @viaf_iri.nil?
|
77
|
+
# Try to get VIAF from LOC sourceID
|
78
|
+
# LOC statement with VIAF URI, e.g.:
|
79
|
+
# s: <http://id.loc.gov/authorities/names/n79046291>
|
80
|
+
# p: <http://www.loc.gov/mads/rdf/v1#hasExactExternalAuthority>
|
81
|
+
# o: <http://viaf.org/viaf/sourceID/LC%7Cn+79046291#skos:Concept> .
|
82
|
+
#return nil unless rdf_valid?
|
83
|
+
#@viaf_iri ||= rdf_find_object 'viaf'
|
84
|
+
viaf_url = URI.encode('http://viaf.org/viaf/sourceID/LC|' + id + '#skos:Concept')
|
85
|
+
@viaf_iri = resolve_external_auth(viaf_url)
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require_relative 'oclc_resource'
|
2
|
+
|
3
|
+
module Marc2LinkedData
|
4
|
+
|
5
|
+
class OclcCreativeWork < OclcResource
|
6
|
+
|
7
|
+
PREFIX = 'http://www.worldcat.org/oclc/'
|
8
|
+
|
9
|
+
def get_works
|
10
|
+
# assume an exampleOfWork can only ever link to one work?
|
11
|
+
q = query_work(@iri)
|
12
|
+
works = rdf.query(q).collect {|s| s[:o] }
|
13
|
+
if works.empty?
|
14
|
+
# OCLC data is inconsistent in use of 'www.' in IRI, so try again.
|
15
|
+
# The OclcResource coerces @iri so it includes 'www.', so try without it.
|
16
|
+
uri = @iri.to_s.gsub('www.','')
|
17
|
+
q = query_work(uri)
|
18
|
+
works = rdf.query(q).collect {|s| s[:o] }
|
19
|
+
end
|
20
|
+
if works.empty?
|
21
|
+
# Keep the 'www.', cast the ID to an integer.
|
22
|
+
uri = @iri.to_s.gsub(id, id.to_i.to_s)
|
23
|
+
q = query_work(uri)
|
24
|
+
works = rdf.query(q).collect {|s| s[:o] }
|
25
|
+
end
|
26
|
+
if works.empty?
|
27
|
+
# Remove the 'www.' AND cast the ID to an integer.
|
28
|
+
uri = @iri.to_s.gsub('www.','').gsub(id, id.to_i.to_s)
|
29
|
+
q = query_work(uri)
|
30
|
+
works = rdf.query(q).collect {|s| s[:o] }
|
31
|
+
end
|
32
|
+
works
|
33
|
+
end
|
34
|
+
|
35
|
+
def query_work(uri)
|
36
|
+
SPARQL.parse("SELECT * WHERE { <#{uri}> <http://schema.org/exampleOfWork> ?o }")
|
37
|
+
end
|
38
|
+
|
39
|
+
# TODO: get ISBN?
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require_relative 'resource'
|
2
|
+
|
3
|
+
module Marc2LinkedData
|
4
|
+
|
5
|
+
class OclcIdentity < Resource
|
6
|
+
|
7
|
+
PREFIX = 'http://www.worldcat.org/identities/'
|
8
|
+
|
9
|
+
def rdf
|
10
|
+
# e.g. 'http://www.worldcat.org/identities/lccn-n79044803/'
|
11
|
+
# the html returned contains RDFa data
|
12
|
+
return nil if @iri.nil?
|
13
|
+
return @rdf unless @rdf.nil?
|
14
|
+
uri4rdf = @iri.to_s
|
15
|
+
uri4rdf += '/' unless uri4rdf.end_with? '/'
|
16
|
+
@rdf = get_rdf(uri4rdf)
|
17
|
+
end
|
18
|
+
|
19
|
+
# def get_xml
|
20
|
+
# begin
|
21
|
+
# return @xml unless @xml.nil?
|
22
|
+
# http = Net::HTTP.new @iri.host
|
23
|
+
# resp = http.get(@iri.path, {'Accept' => 'application/xml'})
|
24
|
+
# case resp.code
|
25
|
+
# when '301','302','303'
|
26
|
+
# #301 Moved Permanently; 302 Moved Temporarily; 303 See Other
|
27
|
+
# resp = http.get(resp['location'], {'Accept' => 'application/xml'})
|
28
|
+
# end
|
29
|
+
# if resp.code != '200'
|
30
|
+
# raise
|
31
|
+
# end
|
32
|
+
# @xml = resp.body
|
33
|
+
# rescue
|
34
|
+
# puts 'ERROR: Failed to request OCLC identity xml.'
|
35
|
+
# end
|
36
|
+
# end
|
37
|
+
|
38
|
+
def creative_works
|
39
|
+
q = SPARQL.parse('SELECT * WHERE { ?oclcWork a <http://schema.org/CreativeWork> }')
|
40
|
+
rdf.query(q).collect {|s| s[:oclcWork] }
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require_relative 'resource'
|
2
|
+
|
3
|
+
module Marc2LinkedData
|
4
|
+
|
5
|
+
class OclcResource < Resource
|
6
|
+
|
7
|
+
PREFIX = 'http://www.worldcat.org/oclc/'
|
8
|
+
|
9
|
+
def initialize(uri=nil)
|
10
|
+
# Ensure the OCLC IRI contains 'www' in the host name.
|
11
|
+
unless uri =~ /www\./
|
12
|
+
uri = uri.to_s.gsub('worldcat.org','www.worldcat.org')
|
13
|
+
end
|
14
|
+
super(uri)
|
15
|
+
end
|
16
|
+
|
17
|
+
def rdf
|
18
|
+
# e.g. 'http://worldcat.org/oclc/004957186'
|
19
|
+
# also 'http://www.worldcat.org/oclc/004957186'
|
20
|
+
return nil if @iri.nil?
|
21
|
+
return @rdf unless @rdf.nil?
|
22
|
+
uri4rdf = @iri.to_s
|
23
|
+
uri4rdf += '.rdf' unless uri4rdf.end_with? '.rdf'
|
24
|
+
@rdf = get_rdf(uri4rdf)
|
25
|
+
end
|
26
|
+
|
27
|
+
def book?
|
28
|
+
iri_types.filter {|s| s[:o] == 'http://schema.org/Book' }.length > 0
|
29
|
+
end
|
30
|
+
|
31
|
+
def creator?(uri)
|
32
|
+
creators.include? RDF::URI.new(uri)
|
33
|
+
end
|
34
|
+
|
35
|
+
def contributor?(uri)
|
36
|
+
contributors.include? RDF::URI.new(uri)
|
37
|
+
end
|
38
|
+
|
39
|
+
def editor?(uri)
|
40
|
+
editors.include? RDF::URI.new(uri)
|
41
|
+
end
|
42
|
+
|
43
|
+
def media_object?
|
44
|
+
iri_types.filter {|s| s[:o] == 'http://schema.org/MediaObject' }.length > 0
|
45
|
+
end
|
46
|
+
|
47
|
+
def about
|
48
|
+
q = SPARQL.parse('SELECT * WHERE { ?s <http://schema.org/about> ?o }')
|
49
|
+
rdf.query(q)
|
50
|
+
end
|
51
|
+
|
52
|
+
def creators
|
53
|
+
q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/creator> ?o }")
|
54
|
+
rdf.query(q).collect {|s| s[:o] }
|
55
|
+
end
|
56
|
+
|
57
|
+
def contributors
|
58
|
+
q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/contributor> ?o }")
|
59
|
+
rdf.query(q).collect {|s| s[:o] }
|
60
|
+
end
|
61
|
+
|
62
|
+
def editors
|
63
|
+
q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/editor> ?o }")
|
64
|
+
rdf.query(q).collect {|s| s[:o] }
|
65
|
+
end
|
66
|
+
|
67
|
+
def publishers
|
68
|
+
q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/publisher> ?o }")
|
69
|
+
rdf.query(q).collect {|s| s[:o] }
|
70
|
+
end
|
71
|
+
|
72
|
+
def isbns
|
73
|
+
q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/isbn> ?o }")
|
74
|
+
rdf.query(q).collect {|s| s[:o] }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require_relative 'oclc_resource'
|
2
|
+
|
3
|
+
module Marc2LinkedData
|
4
|
+
|
5
|
+
class OclcWork < OclcResource
|
6
|
+
|
7
|
+
# OCLC is inconsistent with use of 'www' in URIs
|
8
|
+
#PREFIX = 'http://www.worldcat.org/entity/work/id/'
|
9
|
+
PREFIX = 'http://worldcat.org/entity/work/id/'
|
10
|
+
|
11
|
+
def example_works
|
12
|
+
q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/workExample> ?o }")
|
13
|
+
rdf.query(q).collect {|s| s[:o] }
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|