marc2linkeddata 0.0.7 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env_example +52 -27
- data/README.md +33 -2
- data/bin/marcAuthority2LD +131 -40
- data/bin/run_test_data.sh +68 -0
- data/lib/includes.rb +44 -0
- data/lib/marc2linkeddata.rb +1 -29
- data/lib/marc2linkeddata/cap.rb +15 -0
- data/lib/marc2linkeddata/cap_db.rb +44 -0
- data/lib/marc2linkeddata/configuration.rb +5 -0
- data/lib/marc2linkeddata/loc.rb +5 -1
- data/lib/marc2linkeddata/parseMarcAuthority.rb +333 -89
- data/lib/marc2linkeddata/sparql.rb +4 -37
- data/lib/marc2linkeddata/sparql_dbpedia.rb +22 -0
- data/lib/marc2linkeddata/sparql_local_loc.rb +29 -0
- data/lib/marc2linkeddata/sparql_pubmed.rb +2 -4
- data/log/.gitignore +4 -0
- data/marc2linkeddata.gemspec +11 -2
- metadata +67 -3
data/lib/includes.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'dotenv'
|
2
|
+
Dotenv.load
|
3
|
+
|
4
|
+
require 'addressable/uri'
|
5
|
+
require 'json'
|
6
|
+
require 'rest_client'
|
7
|
+
RestClient.proxy = ENV['http_proxy'] unless ENV['http_proxy'].nil?
|
8
|
+
require 'thread'
|
9
|
+
|
10
|
+
require 'marc'
|
11
|
+
require 'linkeddata'
|
12
|
+
require 'rdf/4store'
|
13
|
+
require 'rdf/mongo'
|
14
|
+
|
15
|
+
require 'pry'
|
16
|
+
require 'pry-doc'
|
17
|
+
require 'ruby-progressbar'
|
18
|
+
|
19
|
+
require_relative 'marc2linkeddata/configuration'
|
20
|
+
|
21
|
+
require_relative 'marc2linkeddata/resource'
|
22
|
+
require_relative 'marc2linkeddata/isni'
|
23
|
+
require_relative 'marc2linkeddata/lib_auth'
|
24
|
+
require_relative 'marc2linkeddata/loc'
|
25
|
+
require_relative 'marc2linkeddata/viaf'
|
26
|
+
|
27
|
+
if ENV['SUL_CAP_ENABLED'].to_s.upcase == 'TRUE'
|
28
|
+
require_relative 'marc2linkeddata/cap'
|
29
|
+
end
|
30
|
+
|
31
|
+
require_relative 'marc2linkeddata/oclc_resource'
|
32
|
+
require_relative 'marc2linkeddata/oclc_identity'
|
33
|
+
require_relative 'marc2linkeddata/oclc_creative_work'
|
34
|
+
require_relative 'marc2linkeddata/oclc_work'
|
35
|
+
|
36
|
+
require_relative 'marc2linkeddata/sparql'
|
37
|
+
require_relative 'marc2linkeddata/sparql_dbpedia'
|
38
|
+
require_relative 'marc2linkeddata/sparql_pubmed'
|
39
|
+
|
40
|
+
require_relative 'marc2linkeddata/parseMarcAuthority'
|
41
|
+
#require_relative 'marc2linkeddata/parseMarcCatalog'
|
42
|
+
|
43
|
+
|
44
|
+
|
data/lib/marc2linkeddata.rb
CHANGED
@@ -1,33 +1,5 @@
|
|
1
|
-
require 'dotenv'
|
2
|
-
Dotenv.load
|
3
|
-
|
4
|
-
require 'addressable/uri'
|
5
|
-
require 'json'
|
6
|
-
require 'linkeddata'
|
7
|
-
require 'marc'
|
8
|
-
require 'rdf/4store'
|
9
|
-
require 'ruby-progressbar'
|
10
|
-
|
11
|
-
require 'pry'
|
12
|
-
require 'pry-doc'
|
13
|
-
|
14
|
-
require_relative 'marc2linkeddata/configuration'
|
15
|
-
|
16
|
-
require_relative 'marc2linkeddata/resource'
|
17
|
-
require_relative 'marc2linkeddata/isni'
|
18
|
-
require_relative 'marc2linkeddata/lib_auth'
|
19
|
-
require_relative 'marc2linkeddata/loc'
|
20
|
-
require_relative 'marc2linkeddata/viaf'
|
21
|
-
|
22
|
-
require_relative 'marc2linkeddata/oclc_resource'
|
23
|
-
require_relative 'marc2linkeddata/oclc_identity'
|
24
|
-
require_relative 'marc2linkeddata/oclc_creative_work'
|
25
|
-
require_relative 'marc2linkeddata/oclc_work'
|
26
|
-
|
27
|
-
require_relative 'marc2linkeddata/parseMarcAuthority'
|
28
|
-
#require_relative 'marc2linkeddata/parseMarcCatalog'
|
29
|
-
require_relative 'marc2linkeddata/sparql'
|
30
1
|
|
2
|
+
require_relative 'includes'
|
31
3
|
|
32
4
|
module Marc2LinkedData
|
33
5
|
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'mysql'
|
3
|
+
require 'sequel'
|
4
|
+
|
5
|
+
module Marc2LinkedData
|
6
|
+
|
7
|
+
# An interface to an SQL database using Sequel
|
8
|
+
# @see http://sequel.jeremyevans.net/documentation.html Sequel RDoc
|
9
|
+
# @see http://sequel.jeremyevans.net/rdoc/files/README_rdoc.html Sequel README
|
10
|
+
# @see http://sequel.jeremyevans.net/rdoc/files/doc/code_order_rdoc.html Sequel code order
|
11
|
+
class CapDb
|
12
|
+
|
13
|
+
@@log = Logger.new('log/cap_db.log')
|
14
|
+
|
15
|
+
attr_accessor :db
|
16
|
+
attr_accessor :db_config
|
17
|
+
|
18
|
+
def self.log_model_info(m)
|
19
|
+
@@log.info "table: #{m.table_name}, columns: #{m.columns}, pk: #{m.primary_key}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize
|
23
|
+
@db_config = {}
|
24
|
+
@db_config['host'] = ENV['SUL_CAP_DB_HOST'] || 'localhost'
|
25
|
+
@db_config['port'] = ENV['SUL_CAP_DB_PORT'] || '3306'
|
26
|
+
@db_config['user'] = ENV['SUL_CAP_DB_USER'] || 'capUser'
|
27
|
+
@db_config['password'] = ENV['SUL_CAP_DB_PASSWORD'] || 'capPass'
|
28
|
+
@db_config['database'] = ENV['SUL_CAP_DB_DATABASE'] || 'cap'
|
29
|
+
options = @db_config.merge(
|
30
|
+
{
|
31
|
+
:encoding => 'utf8',
|
32
|
+
:max_connections => 10,
|
33
|
+
:logger => @@log
|
34
|
+
})
|
35
|
+
@db = Sequel.mysql(options)
|
36
|
+
@db.extension(:pagination)
|
37
|
+
# Ensure the connection is good on startup, raises exceptions on failure
|
38
|
+
puts "#{@db} connected: #{@db.test_connection}"
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
@@ -5,6 +5,9 @@ module Marc2LinkedData
|
|
5
5
|
|
6
6
|
attr_accessor :debug
|
7
7
|
|
8
|
+
attr_accessor :threads
|
9
|
+
attr_accessor :thread_limit
|
10
|
+
|
8
11
|
attr_accessor :field_auth_loc
|
9
12
|
attr_accessor :field_auth_isni
|
10
13
|
attr_accessor :field_auth_oclc
|
@@ -35,6 +38,8 @@ module Marc2LinkedData
|
|
35
38
|
|
36
39
|
def initialize
|
37
40
|
@debug = env_boolean('DEBUG')
|
41
|
+
@threads = env_boolean('THREADS')
|
42
|
+
@thread_limit = ENV['THREAD_LIMIT'].to_i || 25
|
38
43
|
|
39
44
|
# logging
|
40
45
|
log_file = ENV['LOG_FILE'] || 'marc2ld.log'
|
data/lib/marc2linkeddata/loc.rb
CHANGED
@@ -57,10 +57,14 @@ module Marc2LinkedData
|
|
57
57
|
# obj.nil? ? false : true
|
58
58
|
end
|
59
59
|
|
60
|
-
def
|
60
|
+
def geographic?
|
61
61
|
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#Geographic' }.length > 0
|
62
62
|
end
|
63
63
|
|
64
|
+
def uniform_title?
|
65
|
+
iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#Title' }.length > 0
|
66
|
+
end
|
67
|
+
|
64
68
|
def get_oclc_identity
|
65
69
|
# Try to get OCLC URI from LOC ID
|
66
70
|
# http://oclc.org/developer/develop/web-services/worldcat-identities.en.html
|
@@ -143,7 +143,7 @@ module Marc2LinkedData
|
|
143
143
|
# VIAF RSS feed for changes, e.g. http://viaf.org/viaf/181829329.rss
|
144
144
|
field = get_fields(@@config.field_auth_viaf).first
|
145
145
|
viaf_iri = get_iri(field, 'viaf.org')
|
146
|
-
# If VIAF is not already in the MARC record, try to get from LOC.
|
146
|
+
# If VIAF is not already in the MARC record, try to get it from LOC.
|
147
147
|
if viaf_iri.nil? && @@config.get_viaf
|
148
148
|
viaf_iri = @loc.get_viaf rescue nil
|
149
149
|
@@config.logger.debug 'Failed to resolve VIAF URI' if viaf_iri.nil?
|
@@ -185,6 +185,10 @@ module Marc2LinkedData
|
|
185
185
|
}
|
186
186
|
end
|
187
187
|
|
188
|
+
|
189
|
+
# BLOCK ----------------------------------------------------
|
190
|
+
# Parse fields
|
191
|
+
|
188
192
|
def parse_008
|
189
193
|
# http://www.loc.gov/marc/authority/concise/ad008.html
|
190
194
|
field = get_fields('008').first
|
@@ -236,99 +240,288 @@ module Marc2LinkedData
|
|
236
240
|
}
|
237
241
|
end
|
238
242
|
|
239
|
-
def
|
243
|
+
def field100
|
240
244
|
# http://www.loc.gov/marc/authority/concise/ad100.html
|
245
|
+
# [#<MARC::Subfield:0x007f009d6a74e0 @code="a", @value="Abe, Eiichi,">,
|
246
|
+
# #<MARC::Subfield:0x007f009d6a7440 @code="d", @value="1927-">,
|
247
|
+
# #<MARC::Subfield:0x007f009d6a73a0 @code="t", @value="Hoppu dais\xC5\xAB.">,
|
248
|
+
# #<MARC::Subfield:0x007f009d6a7300 @code="l", @value="English">],
|
249
|
+
# @tag="100">
|
241
250
|
begin
|
242
|
-
# 100 is a personal name
|
251
|
+
# 100 is a personal name or name-title
|
252
|
+
return @field100 unless @field100.nil?
|
243
253
|
field = get_fields('100').first
|
244
254
|
# field = @record.fields.select {|f| f if f.tag == '100' }.first
|
245
255
|
name = field.subfields.select {|f| f.code == 'a' }.first.value rescue ''
|
246
|
-
|
247
|
-
|
248
|
-
'
|
256
|
+
date = field.subfields.select {|f| f.code == 'd' }.first.value rescue ''
|
257
|
+
title = field.subfields.select {|f| f.code == 't' }.first.value rescue ''
|
258
|
+
lang = field.subfields.select {|f| f.code == 'l' }.first.value rescue ''
|
259
|
+
@field100 = {
|
260
|
+
:name => name.force_encoding('UTF-8'),
|
261
|
+
:date => date,
|
262
|
+
:title => title.force_encoding('UTF-8'),
|
263
|
+
:lang => lang,
|
264
|
+
:error => nil
|
265
|
+
}
|
266
|
+
rescue => e
|
267
|
+
@@config.logger.debug "Failed to parse field 100 for #{get_id}: #{e.message}"
|
268
|
+
@field100 = {
|
269
|
+
:name => nil,
|
270
|
+
:date => nil,
|
271
|
+
:title => nil,
|
272
|
+
:lang => nil,
|
273
|
+
:error => 'ERROR_PERSON_NAME' #e.message
|
274
|
+
}
|
249
275
|
end
|
250
276
|
end
|
251
277
|
|
252
|
-
def
|
278
|
+
def field110
|
253
279
|
# http://www.loc.gov/marc/authority/concise/ad110.html
|
254
280
|
begin
|
255
281
|
# 110 is a corporate name
|
282
|
+
return @field110 unless @field110.nil?
|
256
283
|
field = get_fields('110').first
|
257
284
|
a = field.subfields.collect {|f| f.value if f.code == 'a' }.compact rescue []
|
258
285
|
b = field.subfields.collect {|f| f.value if f.code == 'b' }.compact rescue []
|
259
286
|
c = field.subfields.collect {|f| f.value if f.code == 'c' }.compact rescue []
|
260
287
|
name = [a,b,c].flatten.join(' : ')
|
261
|
-
|
262
|
-
|
263
|
-
|
288
|
+
@field110 = {
|
289
|
+
:name => name.force_encoding('UTF-8'),
|
290
|
+
:error => nil
|
291
|
+
}
|
292
|
+
rescue => e
|
293
|
+
@@config.logger.debug "Failed to parse field 110 for #{get_id}: #{e.message}"
|
294
|
+
@field110 = {
|
295
|
+
:name => nil,
|
296
|
+
:error => 'ERROR_CORPORATE_NAME' #e.message
|
297
|
+
}
|
264
298
|
end
|
265
299
|
end
|
266
300
|
|
267
|
-
def
|
301
|
+
def field111
|
268
302
|
# http://www.loc.gov/marc/authority/concise/ad111.html
|
303
|
+
# #<MARC::Subfield:0x007f43a50fd1e8 @code="a", @value="Joseph Priestley Symposium">,
|
304
|
+
# #<MARC::Subfield:0x007f43a50fd148 @code="d", @value="(1974 :">,
|
305
|
+
# #<MARC::Subfield:0x007f43a50fd0a8 @code="c", @value="Wilkes-Barre, Pa.)">],
|
306
|
+
# @tag="111">,
|
269
307
|
begin
|
270
308
|
# 111 is a meeting name
|
309
|
+
return @field111 unless @field111.nil?
|
271
310
|
field = get_fields('111').first
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
311
|
+
name = field.subfields.select {|f| f.code == 'a' }.first.value rescue ''
|
312
|
+
date = field.subfields.select {|f| f.code == 'd' }.first.value rescue ''
|
313
|
+
city = field.subfields.select {|f| f.code == 'c' }.first.value rescue ''
|
314
|
+
@field111 = {
|
315
|
+
:name => name.force_encoding('UTF-8'),
|
316
|
+
:date => date,
|
317
|
+
:city => city.force_encoding('UTF-8'),
|
318
|
+
:error => nil
|
319
|
+
}
|
320
|
+
rescue => e
|
321
|
+
@@config.logger.debug "Failed to parse field 111 for #{get_id}: #{e.message}"
|
322
|
+
@field111 = {
|
323
|
+
:name => nil,
|
324
|
+
:date => nil,
|
325
|
+
:city => nil,
|
326
|
+
:error => 'ERROR_MEETING_NAME'
|
327
|
+
}
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
def field130
|
332
|
+
# http://www.loc.gov/marc/authority/concise/ad151.html
|
333
|
+
# e.g. http://id.loc.gov/authorities/names/n79119331
|
334
|
+
# #<MARC::DataField:0x007f7f6bffe708
|
335
|
+
# @indicator1=" ",
|
336
|
+
# @indicator2="0",
|
337
|
+
# @subfields=[#<MARC::Subfield:0x007f7f6bffe208 @code="a", @value="Fair maid of the Exchange">],
|
338
|
+
# @tag="130">,
|
339
|
+
# plus a lot of 400 fields
|
340
|
+
begin
|
341
|
+
# 130 is a uniform title
|
342
|
+
return @field130 unless @field130.nil?
|
343
|
+
field = get_fields('130').first
|
344
|
+
title = field.subfields.collect {|f| f.value if f.code == 'a'}.first rescue ''
|
345
|
+
@field130 = {
|
346
|
+
:title => title.force_encoding('UTF-8'),
|
347
|
+
:error => nil
|
348
|
+
}
|
349
|
+
rescue => e
|
350
|
+
@@config.logger.debug "Failed to parse field 130 for #{get_id}: #{e.message}"
|
351
|
+
@field130 = {
|
352
|
+
:title => nil,
|
353
|
+
:error => 'ERROR_UNIFORM_TITLE'
|
354
|
+
}
|
282
355
|
end
|
283
356
|
end
|
284
357
|
|
285
|
-
def
|
358
|
+
def field151
|
286
359
|
# http://www.loc.gov/marc/authority/concise/ad151.html
|
360
|
+
# e.g. http://id.loc.gov/authorities/names/n79045127
|
287
361
|
begin
|
288
362
|
# 151 is a geographic name
|
363
|
+
return @field151 unless @field151.nil?
|
289
364
|
field = get_fields('151').first
|
290
365
|
name = field.subfields.collect {|f| f.value if f.code == 'a' }.first rescue ''
|
291
|
-
|
292
|
-
|
293
|
-
|
366
|
+
@field151 = {
|
367
|
+
:name => name.force_encoding('UTF-8'),
|
368
|
+
:error => nil
|
369
|
+
}
|
370
|
+
rescue => e
|
371
|
+
@@config.logger.debug "Failed to parse field 151 for #{get_id}: #{e.message}"
|
372
|
+
@field151 = {
|
373
|
+
:name => nil,
|
374
|
+
:error => 'ERROR_PLACE_NAME'
|
375
|
+
}
|
294
376
|
end
|
295
377
|
end
|
296
378
|
|
297
|
-
|
379
|
+
|
380
|
+
# BLOCK ----------------------------------------------------
|
381
|
+
# Authority record types
|
382
|
+
|
383
|
+
# TODO: other authority types?
|
384
|
+
# The MARC data differentiates them according to the tag number.
|
385
|
+
# Methods below ordered by field number.
|
386
|
+
|
387
|
+
# X00 - Personal Name
|
388
|
+
def person?
|
389
|
+
field = field100
|
390
|
+
field[:error].nil? && (! field[:name].empty?) && field[:title].empty?
|
391
|
+
end
|
392
|
+
|
393
|
+
# X00 - Name-Title
|
394
|
+
def name_title?
|
395
|
+
# e.g. http://id.loc.gov/authorities/names/n79044934
|
396
|
+
# if get_id == 'n79044934'.upcase
|
397
|
+
# binding.pry if @@config.debug
|
398
|
+
# end
|
399
|
+
field = field100
|
400
|
+
field[:error].nil? && (! field[:name].empty?) && (! field[:title].empty?)
|
401
|
+
end
|
402
|
+
|
403
|
+
# X10 - Corporate Name
|
404
|
+
def corporation?
|
405
|
+
field110[:error].nil?
|
406
|
+
end
|
407
|
+
|
408
|
+
# X11 - Meeting Name
|
409
|
+
def conference?
|
410
|
+
# e.g. http://id.loc.gov/authorities/names/n79044866
|
411
|
+
field111[:error].nil?
|
412
|
+
end
|
413
|
+
|
414
|
+
# X30 - Uniform Title
|
415
|
+
def uniform_title?
|
416
|
+
field130[:error].nil?
|
417
|
+
end
|
418
|
+
|
419
|
+
# X51 - Jurisdiction / Geographic Name
|
420
|
+
# - http://www.loc.gov/mads/rdf/v1#Geographic
|
421
|
+
def geographic?
|
422
|
+
# e.g. http://id.loc.gov/authorities/names/n79046135.html
|
423
|
+
field151[:error].nil?
|
424
|
+
end
|
425
|
+
|
426
|
+
# BLOCK ----------------------------------------------------
|
427
|
+
# Parse authority record
|
428
|
+
|
429
|
+
def parse_auth_details
|
430
|
+
if @loc.iri.to_s =~ /name/
|
431
|
+
if @@config.get_loc
|
432
|
+
# Retrieve and use LOC RDF
|
433
|
+
parse_auth_name_rdf
|
434
|
+
else
|
435
|
+
# Use only the MARC record, without RDF retrieval
|
436
|
+
parse_auth_name
|
437
|
+
end
|
438
|
+
elsif @loc.iri.to_s =~ /subjects/
|
439
|
+
# TODO: what to do with subjects?
|
440
|
+
binding.pry if @@config.debug
|
441
|
+
# parse_auth_subject_rdf
|
442
|
+
else
|
443
|
+
# What is this?
|
444
|
+
binding.pry if @@config.debug
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
|
449
|
+
# BLOCK ----------------------------------------------------
|
450
|
+
# Parse authority record without RDF
|
451
|
+
|
452
|
+
def parse_auth_name
|
298
453
|
#
|
299
454
|
# Create triples for various kinds of LOC authority.
|
300
|
-
# At present, this relies on LOC RDF to differentiate
|
301
|
-
# types of authorities. It should be possible to do this
|
302
|
-
# from the MARC directly, if @@config.get_loc is false.
|
303
|
-
#
|
304
|
-
# The MARC data differentiates them according to the tag number.
|
305
|
-
# The term 'name' refers to:
|
306
|
-
# X00 - Personal Name
|
307
|
-
# X10 - Corporate Name
|
308
|
-
# X11 - Meeting Name
|
309
|
-
# X30 - Uniform Title
|
310
|
-
# X51 - Jurisdiction / Geographic Name
|
311
455
|
#
|
312
|
-
@@config.logger.warn "LOC URL: #{@loc.iri} DEPRECATED" if @loc.deprecated?
|
313
456
|
name = ''
|
314
|
-
if
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
name
|
320
|
-
|
321
|
-
|
322
|
-
|
457
|
+
if person?
|
458
|
+
name = field100[:name]
|
459
|
+
graph_type_person(@lib.rdf_uri)
|
460
|
+
|
461
|
+
# TODO: find another way to get first and last names without VIAF
|
462
|
+
# # VIAF extracts first and last name, try to use them. Note
|
463
|
+
# # that VIAF uses schema:name, schema:givenName, and schema:familyName.
|
464
|
+
# if @@config.get_viaf && ! @viaf.nil?
|
465
|
+
# @viaf.family_names.each do |n|
|
466
|
+
# # ln = URI.encode(n)
|
467
|
+
# # TODO: try to get a language type, if VIAF provide it.
|
468
|
+
# # name = RDF::Literal.new(n, :language => :en)
|
469
|
+
# ln = RDF::Literal.new(n)
|
470
|
+
# @graph.insert RDF::Statement(@lib.rdf_uri, RDF::FOAF.familyName, ln) if @@config.use_foaf
|
471
|
+
# @graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.familyName, ln) if @@config.use_schema
|
472
|
+
# end
|
473
|
+
# @viaf.given_names.each do |n|
|
474
|
+
# # fn = URI.encode(n)
|
475
|
+
# # TODO: try to get a language type, if VIAF provide it.
|
476
|
+
# # name = RDF::Literal.new(n, :language => :en)
|
477
|
+
# fn = RDF::Literal.new(n)
|
478
|
+
# @graph.insert RDF::Statement(@lib.rdf_uri, RDF::FOAF.firstName, fn) if @@config.use_foaf
|
479
|
+
# @graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.givenName, fn) if @@config.use_schema
|
480
|
+
# end
|
481
|
+
# end
|
482
|
+
elsif name_title?
|
323
483
|
# e.g. http://id.loc.gov/authorities/names/n79044934
|
324
|
-
#
|
325
|
-
|
484
|
+
# http://viaf.org/viaf/182251325/rdf.xml
|
485
|
+
name = field100[:name]
|
486
|
+
graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#NameTitle'))
|
487
|
+
elsif corporation?
|
488
|
+
name = field110[:name]
|
489
|
+
graph_type_organization(@lib.rdf_uri)
|
490
|
+
elsif conference?
|
491
|
+
# e.g. http://id.loc.gov/authorities/names/n79044866
|
492
|
+
name = [field111[:name],field111[:date],field111[:city]].join('')
|
493
|
+
graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.event)
|
494
|
+
elsif uniform_title?
|
495
|
+
name = field130[:title] # use 'name' for code below, although it's a title
|
496
|
+
graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#Title'))
|
497
|
+
graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.title)
|
498
|
+
elsif geographic?
|
499
|
+
name = field151[:name] # use 'name' for code below, although it's a place
|
500
|
+
graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.Place)
|
501
|
+
else
|
502
|
+
# TODO: find out what type this is.
|
326
503
|
binding.pry if @@config.debug
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
504
|
+
name = ''
|
505
|
+
graph_type_agent(@lib.rdf_uri)
|
506
|
+
end
|
507
|
+
if name != ''
|
508
|
+
name = RDF::Literal.new(name)
|
509
|
+
graph_insert_name(@lib.rdf_uri, name)
|
510
|
+
end
|
511
|
+
end
|
512
|
+
|
513
|
+
|
514
|
+
# BLOCK ----------------------------------------------------
|
515
|
+
# Parse authority record using RDF
|
516
|
+
|
517
|
+
# Create triples for various kinds of LOC authority.
|
518
|
+
# This method relies on RDF data retrieval.
|
519
|
+
def parse_auth_name_rdf
|
520
|
+
@@config.logger.warn "#{@loc.iri} DEPRECATED" if @loc.deprecated?
|
521
|
+
name = ''
|
522
|
+
if @loc.person?
|
523
|
+
name = @loc.label || field100[:name]
|
524
|
+
graph_type_person(@lib.rdf_uri)
|
332
525
|
# VIAF extracts first and last name, try to use them. Note
|
333
526
|
# that VIAF uses schema:name, schema:givenName, and schema:familyName.
|
334
527
|
if @@config.get_viaf && ! @viaf.nil?
|
@@ -349,29 +542,41 @@ module Marc2LinkedData
|
|
349
542
|
@graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.givenName, fn) if @@config.use_schema
|
350
543
|
end
|
351
544
|
end
|
352
|
-
elsif @loc.
|
545
|
+
elsif @loc.name_title?
|
546
|
+
# e.g. http://id.loc.gov/authorities/names/n79044934
|
547
|
+
# http://viaf.org/viaf/182251325/rdf.xml
|
548
|
+
name = @loc.label || field100[:name]
|
549
|
+
graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#NameTitle'))
|
550
|
+
elsif @loc.corporation?
|
551
|
+
name = @loc.label || field110[:name]
|
552
|
+
graph_type_organization(@lib.rdf_uri)
|
553
|
+
elsif @loc.conference?
|
554
|
+
# e.g. http://id.loc.gov/authorities/names/n79044866
|
555
|
+
name = @loc.label || [field111[:name],field111[:date],field111[:city]].join('')
|
556
|
+
graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.event)
|
557
|
+
elsif @loc.geographic?
|
353
558
|
# e.g. http://id.loc.gov/authorities/names/n79045127
|
354
|
-
name = @loc.label ||
|
355
|
-
|
559
|
+
name = @loc.label || field151[:name]
|
560
|
+
graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.Place)
|
561
|
+
elsif @loc.uniform_title?
|
562
|
+
name = field130[:title] # use 'name' for code below, although it's a title
|
563
|
+
graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#Title'))
|
564
|
+
graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.title)
|
356
565
|
else
|
357
566
|
# TODO: find out what type this is.
|
358
567
|
binding.pry if @@config.debug
|
359
568
|
name = @loc.label || ''
|
360
|
-
|
361
|
-
@graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::FOAF.Agent) if @@config.use_foaf
|
362
|
-
@graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::SCHEMA.Thing) if @@config.use_schema
|
569
|
+
graph_type_agent(@lib.rdf_uri)
|
363
570
|
end
|
364
571
|
if name != ''
|
365
|
-
# name_encoding = URI.encode(name)
|
366
572
|
name = RDF::Literal.new(name)
|
367
|
-
|
368
|
-
@graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.name, name) if @@config.use_schema
|
573
|
+
graph_insert_name(@lib.rdf_uri, name)
|
369
574
|
end
|
370
575
|
end
|
371
576
|
|
372
|
-
|
373
|
-
|
374
|
-
|
577
|
+
|
578
|
+
|
579
|
+
def parse_auth_subject_rdf
|
375
580
|
# The term 'subject' refers to:
|
376
581
|
# X30 - Uniform Titles
|
377
582
|
# X48 - Chronological Terms
|
@@ -400,7 +605,7 @@ module Marc2LinkedData
|
|
400
605
|
# Try to get additional data from OCLC, using the RDFa
|
401
606
|
# available in the OCLC identities pages.
|
402
607
|
oclc_auth = OclcIdentity.new oclc_iri
|
403
|
-
|
608
|
+
graph_insert_sameAs(@loc.rdf_uri, oclc_auth.rdf_uri)
|
404
609
|
oclc_auth.creative_works.each do |creative_work_uri|
|
405
610
|
# Notes on work-around for OCLC data inconsistency:
|
406
611
|
# RDFa for http://www.worldcat.org/identities/lccn-n79044798 contains:
|
@@ -411,17 +616,17 @@ module Marc2LinkedData
|
|
411
616
|
#creative_work_iri = creative_work.to_s.gsub('worldcat.org','www.worldcat.org')
|
412
617
|
#creative_work_iri = creative_work_iri.gsub('wwwwww','www') # in case it gets added already by OCLC
|
413
618
|
creative_work = OclcCreativeWork.new creative_work_uri
|
414
|
-
|
619
|
+
graph_insert_seeAlso(oclc_auth.rdf_uri, creative_work.rdf_uri)
|
415
620
|
if @@config.oclc_auth2works
|
416
621
|
# Try to use VIAF to relate auth to work as creator, contributor, editor, etc.
|
417
622
|
# Note that this requires additional RDF retrieval for each work (slower processing).
|
418
623
|
unless @viaf.nil?
|
419
624
|
if creative_work.creator? @viaf.iri
|
420
|
-
|
625
|
+
graph_insert_creator(creative_work.rdf_uri, oclc_auth.rdf_uri)
|
421
626
|
elsif creative_work.contributor? @viaf.iri
|
422
|
-
|
627
|
+
graph_insert_contributor(creative_work.rdf_uri, oclc_auth.rdf_uri)
|
423
628
|
elsif creative_work.editor? @viaf.iri
|
424
|
-
|
629
|
+
graph_insert_editor(creative_work.rdf_uri, oclc_auth.rdf_uri)
|
425
630
|
end
|
426
631
|
end
|
427
632
|
# TODO: Is auth the subject of the work (as in biography) or both (as in autobiography)?
|
@@ -430,7 +635,7 @@ module Marc2LinkedData
|
|
430
635
|
# Try to find the generic work entity for this example work.
|
431
636
|
creative_work.get_works.each do |oclc_work_uri|
|
432
637
|
oclc_work = OclcWork.new oclc_work_uri
|
433
|
-
|
638
|
+
graph_insert_exampleOfWork(creative_work.rdf_uri, oclc_work.rdf_uri)
|
434
639
|
end
|
435
640
|
end
|
436
641
|
end
|
@@ -439,6 +644,9 @@ module Marc2LinkedData
|
|
439
644
|
|
440
645
|
# TODO: use an institutional 'affiliation' entry, maybe 373? (optional field)
|
441
646
|
|
647
|
+
# BLOCK ----------------------------------------------------
|
648
|
+
# Graph methods
|
649
|
+
|
442
650
|
def to_ttl
|
443
651
|
graph.to_ttl
|
444
652
|
end
|
@@ -463,27 +671,63 @@ module Marc2LinkedData
|
|
463
671
|
|
464
672
|
# Get LOC control number and add catalog permalink? e.g.
|
465
673
|
# http://lccn.loc.gov/n79046291
|
674
|
+
graph_insert_sameAs(@lib.rdf_uri, @loc.rdf_uri)
|
675
|
+
graph_insert_sameAs(@lib.rdf_uri, @viaf.rdf_uri) unless @viaf.nil?
|
676
|
+
graph_insert_sameAs(@lib.rdf_uri, @isni.rdf_uri) unless @isni.nil?
|
677
|
+
parse_auth_details
|
678
|
+
# Optional elaboration of authority data with OCLC identity and works.
|
679
|
+
get_oclc_links if @@config.get_oclc
|
680
|
+
# @@config.logger.info "Extracted #{@loc.id}"
|
681
|
+
@graph
|
682
|
+
end
|
466
683
|
|
467
|
-
|
468
|
-
@graph.insert RDF::Statement(
|
469
|
-
|
470
|
-
|
684
|
+
def graph_insert(uriS, uriP, uriO)
|
685
|
+
@graph.insert RDF::Statement(uriS, uriP, uriO)
|
686
|
+
end
|
687
|
+
def graph_insert_sameAs(uriS, uriO)
|
688
|
+
graph_insert(uriS, RDF::OWL.sameAs, uriO)
|
689
|
+
end
|
690
|
+
def graph_insert_seeAlso(uriS, uriO)
|
691
|
+
graph_insert(uriS, RDF::RDFS.seeAlso, uriO)
|
692
|
+
end
|
693
|
+
def graph_insert_exampleOfWork(uriS, uriO)
|
694
|
+
graph_insert(uriS, RDF::SCHEMA.exampleOfWork, uriO)
|
695
|
+
end
|
696
|
+
def graph_insert_creator(uriS, uriO)
|
697
|
+
graph_insert(uriS, RDF::SCHEMA.creator, uriO)
|
698
|
+
end
|
699
|
+
def graph_insert_contributor(uriS, uriO)
|
700
|
+
graph_insert(uriS, RDF::SCHEMA.contributor, uriO)
|
701
|
+
end
|
702
|
+
def graph_insert_editor(uriS, uriO)
|
703
|
+
graph_insert(uriS, RDF::SCHEMA.editor, uriO)
|
704
|
+
end
|
705
|
+
def graph_insert_type(uriS, uriO)
|
706
|
+
graph_insert(uriS, RDF.type, uriO)
|
707
|
+
end
|
471
708
|
|
472
|
-
|
473
|
-
|
709
|
+
# ----
|
710
|
+
# Methods that can use FOAF or SCHEMA or both (or neither?)
|
474
711
|
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
binding.pry if @@config.debug
|
481
|
-
end
|
482
|
-
# Optional elaboration of authority data with OCLC identity and works.
|
483
|
-
get_oclc_links if @@config.get_oclc
|
712
|
+
def graph_type_agent(uriS)
|
713
|
+
# Note: schema.org has no immediate parent for Person or Organization
|
714
|
+
graph_insert_type(uriS, RDF::FOAF.Agent) if @@config.use_foaf
|
715
|
+
graph_insert_type(uriS, RDF::SCHEMA.Thing) if @@config.use_schema
|
716
|
+
end
|
484
717
|
|
485
|
-
|
486
|
-
|
718
|
+
def graph_insert_name(uriS, name)
|
719
|
+
graph_insert(uriS, RDF::FOAF.name, name) if @@config.use_foaf
|
720
|
+
graph_insert(uriS, RDF::SCHEMA.name, name) if @@config.use_schema
|
721
|
+
end
|
722
|
+
|
723
|
+
def graph_type_organization(uriS)
|
724
|
+
graph_insert_type(uriS, RDF::FOAF.Organization) if @@config.use_foaf
|
725
|
+
graph_insert_type(uriS, RDF::SCHEMA.Organization) if @@config.use_schema
|
726
|
+
end
|
727
|
+
|
728
|
+
def graph_type_person(uriS)
|
729
|
+
graph_insert_type(uriS, RDF::FOAF.Person) if @@config.use_foaf
|
730
|
+
graph_insert_type(uriS, RDF::SCHEMA.Person) if @@config.use_schema
|
487
731
|
end
|
488
732
|
end
|
489
733
|
|