marc2linkeddata 0.0.7 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.env_example +52 -27
- data/README.md +33 -2
- data/bin/marcAuthority2LD +131 -40
- data/bin/run_test_data.sh +68 -0
- data/lib/includes.rb +44 -0
- data/lib/marc2linkeddata.rb +1 -29
- data/lib/marc2linkeddata/cap.rb +15 -0
- data/lib/marc2linkeddata/cap_db.rb +44 -0
- data/lib/marc2linkeddata/configuration.rb +5 -0
- data/lib/marc2linkeddata/loc.rb +5 -1
- data/lib/marc2linkeddata/parseMarcAuthority.rb +333 -89
- data/lib/marc2linkeddata/sparql.rb +4 -37
- data/lib/marc2linkeddata/sparql_dbpedia.rb +22 -0
- data/lib/marc2linkeddata/sparql_local_loc.rb +29 -0
- data/lib/marc2linkeddata/sparql_pubmed.rb +2 -4
- data/log/.gitignore +4 -0
- data/marc2linkeddata.gemspec +11 -2
- metadata +67 -3
data/lib/includes.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'dotenv'
|
2
|
+
Dotenv.load
|
3
|
+
|
4
|
+
require 'addressable/uri'
|
5
|
+
require 'json'
|
6
|
+
require 'rest_client'
|
7
|
+
RestClient.proxy = ENV['http_proxy'] unless ENV['http_proxy'].nil?
|
8
|
+
require 'thread'
|
9
|
+
|
10
|
+
require 'marc'
|
11
|
+
require 'linkeddata'
|
12
|
+
require 'rdf/4store'
|
13
|
+
require 'rdf/mongo'
|
14
|
+
|
15
|
+
require 'pry'
|
16
|
+
require 'pry-doc'
|
17
|
+
require 'ruby-progressbar'
|
18
|
+
|
19
|
+
require_relative 'marc2linkeddata/configuration'
|
20
|
+
|
21
|
+
require_relative 'marc2linkeddata/resource'
|
22
|
+
require_relative 'marc2linkeddata/isni'
|
23
|
+
require_relative 'marc2linkeddata/lib_auth'
|
24
|
+
require_relative 'marc2linkeddata/loc'
|
25
|
+
require_relative 'marc2linkeddata/viaf'
|
26
|
+
|
27
|
+
if ENV['SUL_CAP_ENABLED'].to_s.upcase == 'TRUE'
|
28
|
+
require_relative 'marc2linkeddata/cap'
|
29
|
+
end
|
30
|
+
|
31
|
+
require_relative 'marc2linkeddata/oclc_resource'
|
32
|
+
require_relative 'marc2linkeddata/oclc_identity'
|
33
|
+
require_relative 'marc2linkeddata/oclc_creative_work'
|
34
|
+
require_relative 'marc2linkeddata/oclc_work'
|
35
|
+
|
36
|
+
require_relative 'marc2linkeddata/sparql'
|
37
|
+
require_relative 'marc2linkeddata/sparql_dbpedia'
|
38
|
+
require_relative 'marc2linkeddata/sparql_pubmed'
|
39
|
+
|
40
|
+
require_relative 'marc2linkeddata/parseMarcAuthority'
|
41
|
+
#require_relative 'marc2linkeddata/parseMarcCatalog'
|
42
|
+
|
43
|
+
|
44
|
+
|
data/lib/marc2linkeddata.rb
CHANGED
@@ -1,33 +1,5 @@
|
|
1
|
-
require 'dotenv'
|
2
|
-
Dotenv.load
|
3
|
-
|
4
|
-
require 'addressable/uri'
|
5
|
-
require 'json'
|
6
|
-
require 'linkeddata'
|
7
|
-
require 'marc'
|
8
|
-
require 'rdf/4store'
|
9
|
-
require 'ruby-progressbar'
|
10
|
-
|
11
|
-
require 'pry'
|
12
|
-
require 'pry-doc'
|
13
|
-
|
14
|
-
require_relative 'marc2linkeddata/configuration'
|
15
|
-
|
16
|
-
require_relative 'marc2linkeddata/resource'
|
17
|
-
require_relative 'marc2linkeddata/isni'
|
18
|
-
require_relative 'marc2linkeddata/lib_auth'
|
19
|
-
require_relative 'marc2linkeddata/loc'
|
20
|
-
require_relative 'marc2linkeddata/viaf'
|
21
|
-
|
22
|
-
require_relative 'marc2linkeddata/oclc_resource'
|
23
|
-
require_relative 'marc2linkeddata/oclc_identity'
|
24
|
-
require_relative 'marc2linkeddata/oclc_creative_work'
|
25
|
-
require_relative 'marc2linkeddata/oclc_work'
|
26
|
-
|
27
|
-
require_relative 'marc2linkeddata/parseMarcAuthority'
|
28
|
-
#require_relative 'marc2linkeddata/parseMarcCatalog'
|
29
|
-
require_relative 'marc2linkeddata/sparql'
|
30
1
|
|
2
|
+
require_relative 'includes'
|
31
3
|
|
32
4
|
module Marc2LinkedData
|
33
5
|
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'mysql'
|
3
|
+
require 'sequel'
|
4
|
+
|
5
|
+
module Marc2LinkedData
|
6
|
+
|
7
|
+
# An interface to an SQL database using Sequel
|
8
|
+
# @see http://sequel.jeremyevans.net/documentation.html Sequel RDoc
|
9
|
+
# @see http://sequel.jeremyevans.net/rdoc/files/README_rdoc.html Sequel README
|
10
|
+
# @see http://sequel.jeremyevans.net/rdoc/files/doc/code_order_rdoc.html Sequel code order
|
11
|
+
class CapDb
|
12
|
+
|
13
|
+
@@log = Logger.new('log/cap_db.log')
|
14
|
+
|
15
|
+
attr_accessor :db
|
16
|
+
attr_accessor :db_config
|
17
|
+
|
18
|
+
def self.log_model_info(m)
|
19
|
+
@@log.info "table: #{m.table_name}, columns: #{m.columns}, pk: #{m.primary_key}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize
|
23
|
+
@db_config = {}
|
24
|
+
@db_config['host'] = ENV['SUL_CAP_DB_HOST'] || 'localhost'
|
25
|
+
@db_config['port'] = ENV['SUL_CAP_DB_PORT'] || '3306'
|
26
|
+
@db_config['user'] = ENV['SUL_CAP_DB_USER'] || 'capUser'
|
27
|
+
@db_config['password'] = ENV['SUL_CAP_DB_PASSWORD'] || 'capPass'
|
28
|
+
@db_config['database'] = ENV['SUL_CAP_DB_DATABASE'] || 'cap'
|
29
|
+
options = @db_config.merge(
|
30
|
+
{
|
31
|
+
:encoding => 'utf8',
|
32
|
+
:max_connections => 10,
|
33
|
+
:logger => @@log
|
34
|
+
})
|
35
|
+
@db = Sequel.mysql(options)
|
36
|
+
@db.extension(:pagination)
|
37
|
+
# Ensure the connection is good on startup, raises exceptions on failure
|
38
|
+
puts "#{@db} connected: #{@db.test_connection}"
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
@@ -5,6 +5,9 @@ module Marc2LinkedData
|
|
5
5
|
|
6
6
|
attr_accessor :debug
|
7
7
|
|
8
|
+
attr_accessor :threads
|
9
|
+
attr_accessor :thread_limit
|
10
|
+
|
8
11
|
attr_accessor :field_auth_loc
|
9
12
|
attr_accessor :field_auth_isni
|
10
13
|
attr_accessor :field_auth_oclc
|
@@ -35,6 +38,8 @@ module Marc2LinkedData
|
|
35
38
|
|
36
39
|
def initialize
|
37
40
|
@debug = env_boolean('DEBUG')
|
41
|
+
@threads = env_boolean('THREADS')
|
42
|
+
@thread_limit = ENV['THREAD_LIMIT'].to_i || 25
|
38
43
|
|
39
44
|
# logging
|
40
45
|
log_file = ENV['LOG_FILE'] || 'marc2ld.log'
|
data/lib/marc2linkeddata/loc.rb
CHANGED
@@ -57,10 +57,14 @@ module Marc2LinkedData
|
|
57
57
|
# obj.nil? ? false : true
|
58
58
|
end
|
59
59
|
|
60
|
-
def
|
60
|
+
def geographic?
|
61
61
|
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#Geographic' }.length > 0
|
62
62
|
end
|
63
63
|
|
64
|
+
def uniform_title?
|
65
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#Title' }.length > 0
|
66
|
+
end
|
67
|
+
|
64
68
|
def get_oclc_identity
|
65
69
|
# Try to get OCLC URI from LOC ID
|
66
70
|
# http://oclc.org/developer/develop/web-services/worldcat-identities.en.html
|
@@ -143,7 +143,7 @@ module Marc2LinkedData
|
|
143
143
|
# VIAF RSS feed for changes, e.g. http://viaf.org/viaf/181829329.rss
|
144
144
|
field = get_fields(@@config.field_auth_viaf).first
|
145
145
|
viaf_iri = get_iri(field, 'viaf.org')
|
146
|
-
# If VIAF is not already in the MARC record, try to get from LOC.
|
146
|
+
# If VIAF is not already in the MARC record, try to get it from LOC.
|
147
147
|
if viaf_iri.nil? && @@config.get_viaf
|
148
148
|
viaf_iri = @loc.get_viaf rescue nil
|
149
149
|
@@config.logger.debug 'Failed to resolve VIAF URI' if viaf_iri.nil?
|
@@ -185,6 +185,10 @@ module Marc2LinkedData
|
|
185
185
|
}
|
186
186
|
end
|
187
187
|
|
188
|
+
|
189
|
+
# BLOCK ----------------------------------------------------
|
190
|
+
# Parse fields
|
191
|
+
|
188
192
|
def parse_008
|
189
193
|
# http://www.loc.gov/marc/authority/concise/ad008.html
|
190
194
|
field = get_fields('008').first
|
@@ -236,99 +240,288 @@ module Marc2LinkedData
|
|
236
240
|
}
|
237
241
|
end
|
238
242
|
|
239
|
-
def
|
243
|
+
def field100
|
240
244
|
# http://www.loc.gov/marc/authority/concise/ad100.html
|
245
|
+
# [#<MARC::Subfield:0x007f009d6a74e0 @code="a", @value="Abe, Eiichi,">,
|
246
|
+
# #<MARC::Subfield:0x007f009d6a7440 @code="d", @value="1927-">,
|
247
|
+
# #<MARC::Subfield:0x007f009d6a73a0 @code="t", @value="Hoppu dais\xC5\xAB.">,
|
248
|
+
# #<MARC::Subfield:0x007f009d6a7300 @code="l", @value="English">],
|
249
|
+
# @tag="100">
|
241
250
|
begin
|
242
|
-
# 100 is a personal name
|
251
|
+
# 100 is a personal name or name-title
|
252
|
+
return @field100 unless @field100.nil?
|
243
253
|
field = get_fields('100').first
|
244
254
|
# field = @record.fields.select {|f| f if f.tag == '100' }.first
|
245
255
|
name = field.subfields.select {|f| f.code == 'a' }.first.value rescue ''
|
246
|
-
|
247
|
-
|
248
|
-
'
|
256
|
+
date = field.subfields.select {|f| f.code == 'd' }.first.value rescue ''
|
257
|
+
title = field.subfields.select {|f| f.code == 't' }.first.value rescue ''
|
258
|
+
lang = field.subfields.select {|f| f.code == 'l' }.first.value rescue ''
|
259
|
+
@field100 = {
|
260
|
+
:name => name.force_encoding('UTF-8'),
|
261
|
+
:date => date,
|
262
|
+
:title => title.force_encoding('UTF-8'),
|
263
|
+
:lang => lang,
|
264
|
+
:error => nil
|
265
|
+
}
|
266
|
+
rescue => e
|
267
|
+
@@config.logger.debug "Failed to parse field 100 for #{get_id}: #{e.message}"
|
268
|
+
@field100 = {
|
269
|
+
:name => nil,
|
270
|
+
:date => nil,
|
271
|
+
:title => nil,
|
272
|
+
:lang => nil,
|
273
|
+
:error => 'ERROR_PERSON_NAME' #e.message
|
274
|
+
}
|
249
275
|
end
|
250
276
|
end
|
251
277
|
|
252
|
-
def
|
278
|
+
def field110
|
253
279
|
# http://www.loc.gov/marc/authority/concise/ad110.html
|
254
280
|
begin
|
255
281
|
# 110 is a corporate name
|
282
|
+
return @field110 unless @field110.nil?
|
256
283
|
field = get_fields('110').first
|
257
284
|
a = field.subfields.collect {|f| f.value if f.code == 'a' }.compact rescue []
|
258
285
|
b = field.subfields.collect {|f| f.value if f.code == 'b' }.compact rescue []
|
259
286
|
c = field.subfields.collect {|f| f.value if f.code == 'c' }.compact rescue []
|
260
287
|
name = [a,b,c].flatten.join(' : ')
|
261
|
-
|
262
|
-
|
263
|
-
|
288
|
+
@field110 = {
|
289
|
+
:name => name.force_encoding('UTF-8'),
|
290
|
+
:error => nil
|
291
|
+
}
|
292
|
+
rescue => e
|
293
|
+
@@config.logger.debug "Failed to parse field 110 for #{get_id}: #{e.message}"
|
294
|
+
@field110 = {
|
295
|
+
:name => nil,
|
296
|
+
:error => 'ERROR_CORPORATE_NAME' #e.message
|
297
|
+
}
|
264
298
|
end
|
265
299
|
end
|
266
300
|
|
267
|
-
def
|
301
|
+
def field111
|
268
302
|
# http://www.loc.gov/marc/authority/concise/ad111.html
|
303
|
+
# #<MARC::Subfield:0x007f43a50fd1e8 @code="a", @value="Joseph Priestley Symposium">,
|
304
|
+
# #<MARC::Subfield:0x007f43a50fd148 @code="d", @value="(1974 :">,
|
305
|
+
# #<MARC::Subfield:0x007f43a50fd0a8 @code="c", @value="Wilkes-Barre, Pa.)">],
|
306
|
+
# @tag="111">,
|
269
307
|
begin
|
270
308
|
# 111 is a meeting name
|
309
|
+
return @field111 unless @field111.nil?
|
271
310
|
field = get_fields('111').first
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
311
|
+
name = field.subfields.select {|f| f.code == 'a' }.first.value rescue ''
|
312
|
+
date = field.subfields.select {|f| f.code == 'd' }.first.value rescue ''
|
313
|
+
city = field.subfields.select {|f| f.code == 'c' }.first.value rescue ''
|
314
|
+
@field111 = {
|
315
|
+
:name => name.force_encoding('UTF-8'),
|
316
|
+
:date => date,
|
317
|
+
:city => city.force_encoding('UTF-8'),
|
318
|
+
:error => nil
|
319
|
+
}
|
320
|
+
rescue => e
|
321
|
+
@@config.logger.debug "Failed to parse field 111 for #{get_id}: #{e.message}"
|
322
|
+
@field111 = {
|
323
|
+
:name => nil,
|
324
|
+
:date => nil,
|
325
|
+
:city => nil,
|
326
|
+
:error => 'ERROR_MEETING_NAME'
|
327
|
+
}
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
def field130
|
332
|
+
# http://www.loc.gov/marc/authority/concise/ad151.html
|
333
|
+
# e.g. http://id.loc.gov/authorities/names/n79119331
|
334
|
+
# #<MARC::DataField:0x007f7f6bffe708
|
335
|
+
# @indicator1=" ",
|
336
|
+
# @indicator2="0",
|
337
|
+
# @subfields=[#<MARC::Subfield:0x007f7f6bffe208 @code="a", @value="Fair maid of the Exchange">],
|
338
|
+
# @tag="130">,
|
339
|
+
# plus a lot of 400 fields
|
340
|
+
begin
|
341
|
+
# 130 is a uniform title
|
342
|
+
return @field130 unless @field130.nil?
|
343
|
+
field = get_fields('130').first
|
344
|
+
title = field.subfields.collect {|f| f.value if f.code == 'a'}.first rescue ''
|
345
|
+
@field130 = {
|
346
|
+
:title => title.force_encoding('UTF-8'),
|
347
|
+
:error => nil
|
348
|
+
}
|
349
|
+
rescue => e
|
350
|
+
@@config.logger.debug "Failed to parse field 130 for #{get_id}: #{e.message}"
|
351
|
+
@field130 = {
|
352
|
+
:title => nil,
|
353
|
+
:error => 'ERROR_UNIFORM_TITLE'
|
354
|
+
}
|
282
355
|
end
|
283
356
|
end
|
284
357
|
|
285
|
-
def
|
358
|
+
def field151
|
286
359
|
# http://www.loc.gov/marc/authority/concise/ad151.html
|
360
|
+
# e.g. http://id.loc.gov/authorities/names/n79045127
|
287
361
|
begin
|
288
362
|
# 151 is a geographic name
|
363
|
+
return @field151 unless @field151.nil?
|
289
364
|
field = get_fields('151').first
|
290
365
|
name = field.subfields.collect {|f| f.value if f.code == 'a' }.first rescue ''
|
291
|
-
|
292
|
-
|
293
|
-
|
366
|
+
@field151 = {
|
367
|
+
:name => name.force_encoding('UTF-8'),
|
368
|
+
:error => nil
|
369
|
+
}
|
370
|
+
rescue => e
|
371
|
+
@@config.logger.debug "Failed to parse field 151 for #{get_id}: #{e.message}"
|
372
|
+
@field151 = {
|
373
|
+
:name => nil,
|
374
|
+
:error => 'ERROR_PLACE_NAME'
|
375
|
+
}
|
294
376
|
end
|
295
377
|
end
|
296
378
|
|
297
|
-
|
379
|
+
|
380
|
+
# BLOCK ----------------------------------------------------
|
381
|
+
# Authority record types
|
382
|
+
|
383
|
+
# TODO: other authority types?
|
384
|
+
# The MARC data differentiates them according to the tag number.
|
385
|
+
# Methods below ordered by field number.
|
386
|
+
|
387
|
+
# X00 - Personal Name
|
388
|
+
def person?
|
389
|
+
field = field100
|
390
|
+
field[:error].nil? && (! field[:name].empty?) && field[:title].empty?
|
391
|
+
end
|
392
|
+
|
393
|
+
# X00 - Name-Title
|
394
|
+
def name_title?
|
395
|
+
# e.g. http://id.loc.gov/authorities/names/n79044934
|
396
|
+
# if get_id == 'n79044934'.upcase
|
397
|
+
# binding.pry if @@config.debug
|
398
|
+
# end
|
399
|
+
field = field100
|
400
|
+
field[:error].nil? && (! field[:name].empty?) && (! field[:title].empty?)
|
401
|
+
end
|
402
|
+
|
403
|
+
# X10 - Corporate Name
|
404
|
+
def corporation?
|
405
|
+
field110[:error].nil?
|
406
|
+
end
|
407
|
+
|
408
|
+
# X11 - Meeting Name
|
409
|
+
def conference?
|
410
|
+
# e.g. http://id.loc.gov/authorities/names/n79044866
|
411
|
+
field111[:error].nil?
|
412
|
+
end
|
413
|
+
|
414
|
+
# X30 - Uniform Title
|
415
|
+
def uniform_title?
|
416
|
+
field130[:error].nil?
|
417
|
+
end
|
418
|
+
|
419
|
+
# X51 - Jurisdiction / Geographic Name
|
420
|
+
# - http://www.loc.gov/mads/rdf/v1#Geographic
|
421
|
+
def geographic?
|
422
|
+
# e.g. http://id.loc.gov/authorities/names/n79046135.html
|
423
|
+
field151[:error].nil?
|
424
|
+
end
|
425
|
+
|
426
|
+
# BLOCK ----------------------------------------------------
|
427
|
+
# Parse authority record
|
428
|
+
|
429
|
+
def parse_auth_details
|
430
|
+
if @loc.iri.to_s =~ /name/
|
431
|
+
if @@config.get_loc
|
432
|
+
# Retrieve and use LOC RDF
|
433
|
+
parse_auth_name_rdf
|
434
|
+
else
|
435
|
+
# Use only the MARC record, without RDF retrieval
|
436
|
+
parse_auth_name
|
437
|
+
end
|
438
|
+
elsif @loc.iri.to_s =~ /subjects/
|
439
|
+
# TODO: what to do with subjects?
|
440
|
+
binding.pry if @@config.debug
|
441
|
+
# parse_auth_subject_rdf
|
442
|
+
else
|
443
|
+
# What is this?
|
444
|
+
binding.pry if @@config.debug
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
|
449
|
+
# BLOCK ----------------------------------------------------
|
450
|
+
# Parse authority record without RDF
|
451
|
+
|
452
|
+
def parse_auth_name
|
298
453
|
#
|
299
454
|
# Create triples for various kinds of LOC authority.
|
300
|
-
# At present, this relies on LOC RDF to differentiate
|
301
|
-
# types of authorities. It should be possible to do this
|
302
|
-
# from the MARC directly, if @@config.get_loc is false.
|
303
|
-
#
|
304
|
-
# The MARC data differentiates them according to the tag number.
|
305
|
-
# The term 'name' refers to:
|
306
|
-
# X00 - Personal Name
|
307
|
-
# X10 - Corporate Name
|
308
|
-
# X11 - Meeting Name
|
309
|
-
# X30 - Uniform Title
|
310
|
-
# X51 - Jurisdiction / Geographic Name
|
311
455
|
#
|
312
|
-
@@config.logger.warn "LOC URL: #{@loc.iri} DEPRECATED" if @loc.deprecated?
|
313
456
|
name = ''
|
314
|
-
if
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
name
|
320
|
-
|
321
|
-
|
322
|
-
|
457
|
+
if person?
|
458
|
+
name = field100[:name]
|
459
|
+
graph_type_person(@lib.rdf_uri)
|
460
|
+
|
461
|
+
# TODO: find another way to get first and last names without VIAF
|
462
|
+
# # VIAF extracts first and last name, try to use them. Note
|
463
|
+
# # that VIAF uses schema:name, schema:givenName, and schema:familyName.
|
464
|
+
# if @@config.get_viaf && ! @viaf.nil?
|
465
|
+
# @viaf.family_names.each do |n|
|
466
|
+
# # ln = URI.encode(n)
|
467
|
+
# # TODO: try to get a language type, if VIAF provide it.
|
468
|
+
# # name = RDF::Literal.new(n, :language => :en)
|
469
|
+
# ln = RDF::Literal.new(n)
|
470
|
+
# @graph.insert RDF::Statement(@lib.rdf_uri, RDF::FOAF.familyName, ln) if @@config.use_foaf
|
471
|
+
# @graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.familyName, ln) if @@config.use_schema
|
472
|
+
# end
|
473
|
+
# @viaf.given_names.each do |n|
|
474
|
+
# # fn = URI.encode(n)
|
475
|
+
# # TODO: try to get a language type, if VIAF provide it.
|
476
|
+
# # name = RDF::Literal.new(n, :language => :en)
|
477
|
+
# fn = RDF::Literal.new(n)
|
478
|
+
# @graph.insert RDF::Statement(@lib.rdf_uri, RDF::FOAF.firstName, fn) if @@config.use_foaf
|
479
|
+
# @graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.givenName, fn) if @@config.use_schema
|
480
|
+
# end
|
481
|
+
# end
|
482
|
+
elsif name_title?
|
323
483
|
# e.g. http://id.loc.gov/authorities/names/n79044934
|
324
|
-
#
|
325
|
-
|
484
|
+
# http://viaf.org/viaf/182251325/rdf.xml
|
485
|
+
name = field100[:name]
|
486
|
+
graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#NameTitle'))
|
487
|
+
elsif corporation?
|
488
|
+
name = field110[:name]
|
489
|
+
graph_type_organization(@lib.rdf_uri)
|
490
|
+
elsif conference?
|
491
|
+
# e.g. http://id.loc.gov/authorities/names/n79044866
|
492
|
+
name = [field111[:name],field111[:date],field111[:city]].join('')
|
493
|
+
graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.event)
|
494
|
+
elsif uniform_title?
|
495
|
+
name = field130[:title] # use 'name' for code below, although it's a title
|
496
|
+
graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#Title'))
|
497
|
+
graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.title)
|
498
|
+
elsif geographic?
|
499
|
+
name = field151[:name] # use 'name' for code below, although it's a place
|
500
|
+
graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.Place)
|
501
|
+
else
|
502
|
+
# TODO: find out what type this is.
|
326
503
|
binding.pry if @@config.debug
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
504
|
+
name = ''
|
505
|
+
graph_type_agent(@lib.rdf_uri)
|
506
|
+
end
|
507
|
+
if name != ''
|
508
|
+
name = RDF::Literal.new(name)
|
509
|
+
graph_insert_name(@lib.rdf_uri, name)
|
510
|
+
end
|
511
|
+
end
|
512
|
+
|
513
|
+
|
514
|
+
# BLOCK ----------------------------------------------------
|
515
|
+
# Parse authority record using RDF
|
516
|
+
|
517
|
+
# Create triples for various kinds of LOC authority.
|
518
|
+
# This method relies on RDF data retrieval.
|
519
|
+
def parse_auth_name_rdf
|
520
|
+
@@config.logger.warn "#{@loc.iri} DEPRECATED" if @loc.deprecated?
|
521
|
+
name = ''
|
522
|
+
if @loc.person?
|
523
|
+
name = @loc.label || field100[:name]
|
524
|
+
graph_type_person(@lib.rdf_uri)
|
332
525
|
# VIAF extracts first and last name, try to use them. Note
|
333
526
|
# that VIAF uses schema:name, schema:givenName, and schema:familyName.
|
334
527
|
if @@config.get_viaf && ! @viaf.nil?
|
@@ -349,29 +542,41 @@ module Marc2LinkedData
|
|
349
542
|
@graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.givenName, fn) if @@config.use_schema
|
350
543
|
end
|
351
544
|
end
|
352
|
-
elsif @loc.
|
545
|
+
elsif @loc.name_title?
|
546
|
+
# e.g. http://id.loc.gov/authorities/names/n79044934
|
547
|
+
# http://viaf.org/viaf/182251325/rdf.xml
|
548
|
+
name = @loc.label || field100[:name]
|
549
|
+
graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#NameTitle'))
|
550
|
+
elsif @loc.corporation?
|
551
|
+
name = @loc.label || field110[:name]
|
552
|
+
graph_type_organization(@lib.rdf_uri)
|
553
|
+
elsif @loc.conference?
|
554
|
+
# e.g. http://id.loc.gov/authorities/names/n79044866
|
555
|
+
name = @loc.label || [field111[:name],field111[:date],field111[:city]].join('')
|
556
|
+
graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.event)
|
557
|
+
elsif @loc.geographic?
|
353
558
|
# e.g. http://id.loc.gov/authorities/names/n79045127
|
354
|
-
name = @loc.label ||
|
355
|
-
|
559
|
+
name = @loc.label || field151[:name]
|
560
|
+
graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.Place)
|
561
|
+
elsif @loc.uniform_title?
|
562
|
+
name = field130[:title] # use 'name' for code below, although it's a title
|
563
|
+
graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#Title'))
|
564
|
+
graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.title)
|
356
565
|
else
|
357
566
|
# TODO: find out what type this is.
|
358
567
|
binding.pry if @@config.debug
|
359
568
|
name = @loc.label || ''
|
360
|
-
|
361
|
-
@graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::FOAF.Agent) if @@config.use_foaf
|
362
|
-
@graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::SCHEMA.Thing) if @@config.use_schema
|
569
|
+
graph_type_agent(@lib.rdf_uri)
|
363
570
|
end
|
364
571
|
if name != ''
|
365
|
-
# name_encoding = URI.encode(name)
|
366
572
|
name = RDF::Literal.new(name)
|
367
|
-
|
368
|
-
@graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.name, name) if @@config.use_schema
|
573
|
+
graph_insert_name(@lib.rdf_uri, name)
|
369
574
|
end
|
370
575
|
end
|
371
576
|
|
372
|
-
|
373
|
-
|
374
|
-
|
577
|
+
|
578
|
+
|
579
|
+
def parse_auth_subject_rdf
|
375
580
|
# The term 'subject' refers to:
|
376
581
|
# X30 - Uniform Titles
|
377
582
|
# X48 - Chronological Terms
|
@@ -400,7 +605,7 @@ module Marc2LinkedData
|
|
400
605
|
# Try to get additional data from OCLC, using the RDFa
|
401
606
|
# available in the OCLC identities pages.
|
402
607
|
oclc_auth = OclcIdentity.new oclc_iri
|
403
|
-
|
608
|
+
graph_insert_sameAs(@loc.rdf_uri, oclc_auth.rdf_uri)
|
404
609
|
oclc_auth.creative_works.each do |creative_work_uri|
|
405
610
|
# Notes on work-around for OCLC data inconsistency:
|
406
611
|
# RDFa for http://www.worldcat.org/identities/lccn-n79044798 contains:
|
@@ -411,17 +616,17 @@ module Marc2LinkedData
|
|
411
616
|
#creative_work_iri = creative_work.to_s.gsub('worldcat.org','www.worldcat.org')
|
412
617
|
#creative_work_iri = creative_work_iri.gsub('wwwwww','www') # in case it gets added already by OCLC
|
413
618
|
creative_work = OclcCreativeWork.new creative_work_uri
|
414
|
-
|
619
|
+
graph_insert_seeAlso(oclc_auth.rdf_uri, creative_work.rdf_uri)
|
415
620
|
if @@config.oclc_auth2works
|
416
621
|
# Try to use VIAF to relate auth to work as creator, contributor, editor, etc.
|
417
622
|
# Note that this requires additional RDF retrieval for each work (slower processing).
|
418
623
|
unless @viaf.nil?
|
419
624
|
if creative_work.creator? @viaf.iri
|
420
|
-
|
625
|
+
graph_insert_creator(creative_work.rdf_uri, oclc_auth.rdf_uri)
|
421
626
|
elsif creative_work.contributor? @viaf.iri
|
422
|
-
|
627
|
+
graph_insert_contributor(creative_work.rdf_uri, oclc_auth.rdf_uri)
|
423
628
|
elsif creative_work.editor? @viaf.iri
|
424
|
-
|
629
|
+
graph_insert_editor(creative_work.rdf_uri, oclc_auth.rdf_uri)
|
425
630
|
end
|
426
631
|
end
|
427
632
|
# TODO: Is auth the subject of the work (as in biography) or both (as in autobiography)?
|
@@ -430,7 +635,7 @@ module Marc2LinkedData
|
|
430
635
|
# Try to find the generic work entity for this example work.
|
431
636
|
creative_work.get_works.each do |oclc_work_uri|
|
432
637
|
oclc_work = OclcWork.new oclc_work_uri
|
433
|
-
|
638
|
+
graph_insert_exampleOfWork(creative_work.rdf_uri, oclc_work.rdf_uri)
|
434
639
|
end
|
435
640
|
end
|
436
641
|
end
|
@@ -439,6 +644,9 @@ module Marc2LinkedData
|
|
439
644
|
|
440
645
|
# TODO: use an institutional 'affiliation' entry, maybe 373? (optional field)
|
441
646
|
|
647
|
+
# BLOCK ----------------------------------------------------
|
648
|
+
# Graph methods
|
649
|
+
|
442
650
|
def to_ttl
|
443
651
|
graph.to_ttl
|
444
652
|
end
|
@@ -463,27 +671,63 @@ module Marc2LinkedData
|
|
463
671
|
|
464
672
|
# Get LOC control number and add catalog permalink? e.g.
|
465
673
|
# http://lccn.loc.gov/n79046291
|
674
|
+
graph_insert_sameAs(@lib.rdf_uri, @loc.rdf_uri)
|
675
|
+
graph_insert_sameAs(@lib.rdf_uri, @viaf.rdf_uri) unless @viaf.nil?
|
676
|
+
graph_insert_sameAs(@lib.rdf_uri, @isni.rdf_uri) unless @isni.nil?
|
677
|
+
parse_auth_details
|
678
|
+
# Optional elaboration of authority data with OCLC identity and works.
|
679
|
+
get_oclc_links if @@config.get_oclc
|
680
|
+
# @@config.logger.info "Extracted #{@loc.id}"
|
681
|
+
@graph
|
682
|
+
end
|
466
683
|
|
467
|
-
|
468
|
-
@graph.insert RDF::Statement(
|
469
|
-
|
470
|
-
|
684
|
+
def graph_insert(uriS, uriP, uriO)
|
685
|
+
@graph.insert RDF::Statement(uriS, uriP, uriO)
|
686
|
+
end
|
687
|
+
def graph_insert_sameAs(uriS, uriO)
|
688
|
+
graph_insert(uriS, RDF::OWL.sameAs, uriO)
|
689
|
+
end
|
690
|
+
def graph_insert_seeAlso(uriS, uriO)
|
691
|
+
graph_insert(uriS, RDF::RDFS.seeAlso, uriO)
|
692
|
+
end
|
693
|
+
def graph_insert_exampleOfWork(uriS, uriO)
|
694
|
+
graph_insert(uriS, RDF::SCHEMA.exampleOfWork, uriO)
|
695
|
+
end
|
696
|
+
def graph_insert_creator(uriS, uriO)
|
697
|
+
graph_insert(uriS, RDF::SCHEMA.creator, uriO)
|
698
|
+
end
|
699
|
+
def graph_insert_contributor(uriS, uriO)
|
700
|
+
graph_insert(uriS, RDF::SCHEMA.contributor, uriO)
|
701
|
+
end
|
702
|
+
def graph_insert_editor(uriS, uriO)
|
703
|
+
graph_insert(uriS, RDF::SCHEMA.editor, uriO)
|
704
|
+
end
|
705
|
+
def graph_insert_type(uriS, uriO)
|
706
|
+
graph_insert(uriS, RDF.type, uriO)
|
707
|
+
end
|
471
708
|
|
472
|
-
|
473
|
-
|
709
|
+
# ----
|
710
|
+
# Methods that can use FOAF or SCHEMA or both (or neither?)
|
474
711
|
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
binding.pry if @@config.debug
|
481
|
-
end
|
482
|
-
# Optional elaboration of authority data with OCLC identity and works.
|
483
|
-
get_oclc_links if @@config.get_oclc
|
712
|
+
def graph_type_agent(uriS)
|
713
|
+
# Note: schema.org has no immediate parent for Person or Organization
|
714
|
+
graph_insert_type(uriS, RDF::FOAF.Agent) if @@config.use_foaf
|
715
|
+
graph_insert_type(uriS, RDF::SCHEMA.Thing) if @@config.use_schema
|
716
|
+
end
|
484
717
|
|
485
|
-
|
486
|
-
|
718
|
+
def graph_insert_name(uriS, name)
|
719
|
+
graph_insert(uriS, RDF::FOAF.name, name) if @@config.use_foaf
|
720
|
+
graph_insert(uriS, RDF::SCHEMA.name, name) if @@config.use_schema
|
721
|
+
end
|
722
|
+
|
723
|
+
def graph_type_organization(uriS)
|
724
|
+
graph_insert_type(uriS, RDF::FOAF.Organization) if @@config.use_foaf
|
725
|
+
graph_insert_type(uriS, RDF::SCHEMA.Organization) if @@config.use_schema
|
726
|
+
end
|
727
|
+
|
728
|
+
def graph_type_person(uriS)
|
729
|
+
graph_insert_type(uriS, RDF::FOAF.Person) if @@config.use_foaf
|
730
|
+
graph_insert_type(uriS, RDF::SCHEMA.Person) if @@config.use_schema
|
487
731
|
end
|
488
732
|
end
|
489
733
|
|