marc2linkeddata 0.0.7 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/includes.rb ADDED
@@ -0,0 +1,44 @@
1
+ require 'dotenv'
2
+ Dotenv.load
3
+
4
+ require 'addressable/uri'
5
+ require 'json'
6
+ require 'rest_client'
7
+ RestClient.proxy = ENV['http_proxy'] unless ENV['http_proxy'].nil?
8
+ require 'thread'
9
+
10
+ require 'marc'
11
+ require 'linkeddata'
12
+ require 'rdf/4store'
13
+ require 'rdf/mongo'
14
+
15
+ require 'pry'
16
+ require 'pry-doc'
17
+ require 'ruby-progressbar'
18
+
19
+ require_relative 'marc2linkeddata/configuration'
20
+
21
+ require_relative 'marc2linkeddata/resource'
22
+ require_relative 'marc2linkeddata/isni'
23
+ require_relative 'marc2linkeddata/lib_auth'
24
+ require_relative 'marc2linkeddata/loc'
25
+ require_relative 'marc2linkeddata/viaf'
26
+
27
+ if ENV['SUL_CAP_ENABLED'].to_s.upcase == 'TRUE'
28
+ require_relative 'marc2linkeddata/cap'
29
+ end
30
+
31
+ require_relative 'marc2linkeddata/oclc_resource'
32
+ require_relative 'marc2linkeddata/oclc_identity'
33
+ require_relative 'marc2linkeddata/oclc_creative_work'
34
+ require_relative 'marc2linkeddata/oclc_work'
35
+
36
+ require_relative 'marc2linkeddata/sparql'
37
+ require_relative 'marc2linkeddata/sparql_dbpedia'
38
+ require_relative 'marc2linkeddata/sparql_pubmed'
39
+
40
+ require_relative 'marc2linkeddata/parseMarcAuthority'
41
+ #require_relative 'marc2linkeddata/parseMarcCatalog'
42
+
43
+
44
+
@@ -1,33 +1,5 @@
1
- require 'dotenv'
2
- Dotenv.load
3
-
4
- require 'addressable/uri'
5
- require 'json'
6
- require 'linkeddata'
7
- require 'marc'
8
- require 'rdf/4store'
9
- require 'ruby-progressbar'
10
-
11
- require 'pry'
12
- require 'pry-doc'
13
-
14
- require_relative 'marc2linkeddata/configuration'
15
-
16
- require_relative 'marc2linkeddata/resource'
17
- require_relative 'marc2linkeddata/isni'
18
- require_relative 'marc2linkeddata/lib_auth'
19
- require_relative 'marc2linkeddata/loc'
20
- require_relative 'marc2linkeddata/viaf'
21
-
22
- require_relative 'marc2linkeddata/oclc_resource'
23
- require_relative 'marc2linkeddata/oclc_identity'
24
- require_relative 'marc2linkeddata/oclc_creative_work'
25
- require_relative 'marc2linkeddata/oclc_work'
26
-
27
- require_relative 'marc2linkeddata/parseMarcAuthority'
28
- #require_relative 'marc2linkeddata/parseMarcCatalog'
29
- require_relative 'marc2linkeddata/sparql'
30
1
 
2
+ require_relative 'includes'
31
3
 
32
4
  module Marc2LinkedData
33
5
 
@@ -0,0 +1,15 @@
1
+ require_relative 'cap_db'
2
+
3
+ module Marc2LinkedData
4
+
5
+ class Cap
6
+
7
+ attr_accessor :db
8
+
9
+ def initialize
10
+ @db = Marc2LinkedData::CapDb.new
11
+ end
12
+ end
13
+
14
+ end
15
+
@@ -0,0 +1,44 @@
1
+ require 'logger'
2
+ require 'mysql'
3
+ require 'sequel'
4
+
5
+ module Marc2LinkedData
6
+
7
+ # An interface to an SQL database using Sequel
8
+ # @see http://sequel.jeremyevans.net/documentation.html Sequel RDoc
9
+ # @see http://sequel.jeremyevans.net/rdoc/files/README_rdoc.html Sequel README
10
+ # @see http://sequel.jeremyevans.net/rdoc/files/doc/code_order_rdoc.html Sequel code order
11
+ class CapDb
12
+
13
+ @@log = Logger.new('log/cap_db.log')
14
+
15
+ attr_accessor :db
16
+ attr_accessor :db_config
17
+
18
+ def self.log_model_info(m)
19
+ @@log.info "table: #{m.table_name}, columns: #{m.columns}, pk: #{m.primary_key}"
20
+ end
21
+
22
+ def initialize
23
+ @db_config = {}
24
+ @db_config['host'] = ENV['SUL_CAP_DB_HOST'] || 'localhost'
25
+ @db_config['port'] = ENV['SUL_CAP_DB_PORT'] || '3306'
26
+ @db_config['user'] = ENV['SUL_CAP_DB_USER'] || 'capUser'
27
+ @db_config['password'] = ENV['SUL_CAP_DB_PASSWORD'] || 'capPass'
28
+ @db_config['database'] = ENV['SUL_CAP_DB_DATABASE'] || 'cap'
29
+ options = @db_config.merge(
30
+ {
31
+ :encoding => 'utf8',
32
+ :max_connections => 10,
33
+ :logger => @@log
34
+ })
35
+ @db = Sequel.mysql(options)
36
+ @db.extension(:pagination)
37
+ # Ensure the connection is good on startup, raises exceptions on failure
38
+ puts "#{@db} connected: #{@db.test_connection}"
39
+ end
40
+
41
+ end
42
+
43
+ end
44
+
@@ -5,6 +5,9 @@ module Marc2LinkedData
5
5
 
6
6
  attr_accessor :debug
7
7
 
8
+ attr_accessor :threads
9
+ attr_accessor :thread_limit
10
+
8
11
  attr_accessor :field_auth_loc
9
12
  attr_accessor :field_auth_isni
10
13
  attr_accessor :field_auth_oclc
@@ -35,6 +38,8 @@ module Marc2LinkedData
35
38
 
36
39
  def initialize
37
40
  @debug = env_boolean('DEBUG')
41
+ @threads = env_boolean('THREADS')
42
+ @thread_limit = ENV['THREAD_LIMIT'].to_i || 25
38
43
 
39
44
  # logging
40
45
  log_file = ENV['LOG_FILE'] || 'marc2ld.log'
@@ -57,10 +57,14 @@ module Marc2LinkedData
57
57
  # obj.nil? ? false : true
58
58
  end
59
59
 
60
- def place?
60
+ def geographic?
61
61
  iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#Geographic' }.length > 0
62
62
  end
63
63
 
64
+ def uniform_title?
65
+ iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#Title' }.length > 0
66
+ end
67
+
64
68
  def get_oclc_identity
65
69
  # Try to get OCLC URI from LOC ID
66
70
  # http://oclc.org/developer/develop/web-services/worldcat-identities.en.html
@@ -143,7 +143,7 @@ module Marc2LinkedData
143
143
  # VIAF RSS feed for changes, e.g. http://viaf.org/viaf/181829329.rss
144
144
  field = get_fields(@@config.field_auth_viaf).first
145
145
  viaf_iri = get_iri(field, 'viaf.org')
146
- # If VIAF is not already in the MARC record, try to get from LOC.
146
+ # If VIAF is not already in the MARC record, try to get it from LOC.
147
147
  if viaf_iri.nil? && @@config.get_viaf
148
148
  viaf_iri = @loc.get_viaf rescue nil
149
149
  @@config.logger.debug 'Failed to resolve VIAF URI' if viaf_iri.nil?
@@ -185,6 +185,10 @@ module Marc2LinkedData
185
185
  }
186
186
  end
187
187
 
188
+
189
+ # BLOCK ----------------------------------------------------
190
+ # Parse fields
191
+
188
192
  def parse_008
189
193
  # http://www.loc.gov/marc/authority/concise/ad008.html
190
194
  field = get_fields('008').first
@@ -236,99 +240,288 @@ module Marc2LinkedData
236
240
  }
237
241
  end
238
242
 
239
- def parse_100
243
+ def field100
240
244
  # http://www.loc.gov/marc/authority/concise/ad100.html
245
+ # [#<MARC::Subfield:0x007f009d6a74e0 @code="a", @value="Abe, Eiichi,">,
246
+ # #<MARC::Subfield:0x007f009d6a7440 @code="d", @value="1927-">,
247
+ # #<MARC::Subfield:0x007f009d6a73a0 @code="t", @value="Hoppu dais\xC5\xAB.">,
248
+ # #<MARC::Subfield:0x007f009d6a7300 @code="l", @value="English">],
249
+ # @tag="100">
241
250
  begin
242
- # 100 is a personal name
251
+ # 100 is a personal name or name-title
252
+ return @field100 unless @field100.nil?
243
253
  field = get_fields('100').first
244
254
  # field = @record.fields.select {|f| f if f.tag == '100' }.first
245
255
  name = field.subfields.select {|f| f.code == 'a' }.first.value rescue ''
246
- name.force_encoding('UTF-8')
247
- rescue
248
- 'ERROR_PERSONAL_NAME'
256
+ date = field.subfields.select {|f| f.code == 'd' }.first.value rescue ''
257
+ title = field.subfields.select {|f| f.code == 't' }.first.value rescue ''
258
+ lang = field.subfields.select {|f| f.code == 'l' }.first.value rescue ''
259
+ @field100 = {
260
+ :name => name.force_encoding('UTF-8'),
261
+ :date => date,
262
+ :title => title.force_encoding('UTF-8'),
263
+ :lang => lang,
264
+ :error => nil
265
+ }
266
+ rescue => e
267
+ @@config.logger.debug "Failed to parse field 100 for #{get_id}: #{e.message}"
268
+ @field100 = {
269
+ :name => nil,
270
+ :date => nil,
271
+ :title => nil,
272
+ :lang => nil,
273
+ :error => 'ERROR_PERSON_NAME' #e.message
274
+ }
249
275
  end
250
276
  end
251
277
 
252
- def parse_110
278
+ def field110
253
279
  # http://www.loc.gov/marc/authority/concise/ad110.html
254
280
  begin
255
281
  # 110 is a corporate name
282
+ return @field110 unless @field110.nil?
256
283
  field = get_fields('110').first
257
284
  a = field.subfields.collect {|f| f.value if f.code == 'a' }.compact rescue []
258
285
  b = field.subfields.collect {|f| f.value if f.code == 'b' }.compact rescue []
259
286
  c = field.subfields.collect {|f| f.value if f.code == 'c' }.compact rescue []
260
287
  name = [a,b,c].flatten.join(' : ')
261
- name.force_encoding('UTF-8')
262
- rescue
263
- 'ERROR_CORPORATE_NAME'
288
+ @field110 = {
289
+ :name => name.force_encoding('UTF-8'),
290
+ :error => nil
291
+ }
292
+ rescue => e
293
+ @@config.logger.debug "Failed to parse field 110 for #{get_id}: #{e.message}"
294
+ @field110 = {
295
+ :name => nil,
296
+ :error => 'ERROR_CORPORATE_NAME' #e.message
297
+ }
264
298
  end
265
299
  end
266
300
 
267
- def parse_111
301
+ def field111
268
302
  # http://www.loc.gov/marc/authority/concise/ad111.html
303
+ # #<MARC::Subfield:0x007f43a50fd1e8 @code="a", @value="Joseph Priestley Symposium">,
304
+ # #<MARC::Subfield:0x007f43a50fd148 @code="d", @value="(1974 :">,
305
+ # #<MARC::Subfield:0x007f43a50fd0a8 @code="c", @value="Wilkes-Barre, Pa.)">],
306
+ # @tag="111">,
269
307
  begin
270
308
  # 111 is a meeting name
309
+ return @field111 unless @field111.nil?
271
310
  field = get_fields('111').first
272
- field = @record.fields.select {|f| f if f.tag == '111' }.first
273
- a = field.subfields.collect {|f| f.value if f.code == 'a' }.compact rescue []
274
- # TODO: incorporate additional subfields?
275
- # b = field.subfields.collect {|f| f.value if f.code == 'b' }.compact rescue []
276
- # c = field.subfields.collect {|f| f.value if f.code == 'c' }.compact rescue []
277
- # name = [a,b,c].flatten.join(' : ')
278
- # name.force_encoding('UTF-8')
279
- a.force_encoding('UTF-8')
280
- rescue
281
- 'ERROR_MEETING_NAME'
311
+ name = field.subfields.select {|f| f.code == 'a' }.first.value rescue ''
312
+ date = field.subfields.select {|f| f.code == 'd' }.first.value rescue ''
313
+ city = field.subfields.select {|f| f.code == 'c' }.first.value rescue ''
314
+ @field111 = {
315
+ :name => name.force_encoding('UTF-8'),
316
+ :date => date,
317
+ :city => city.force_encoding('UTF-8'),
318
+ :error => nil
319
+ }
320
+ rescue => e
321
+ @@config.logger.debug "Failed to parse field 111 for #{get_id}: #{e.message}"
322
+ @field111 = {
323
+ :name => nil,
324
+ :date => nil,
325
+ :city => nil,
326
+ :error => 'ERROR_MEETING_NAME'
327
+ }
328
+ end
329
+ end
330
+
331
+ def field130
332
+ # http://www.loc.gov/marc/authority/concise/ad151.html
333
+ # e.g. http://id.loc.gov/authorities/names/n79119331
334
+ # #<MARC::DataField:0x007f7f6bffe708
335
+ # @indicator1=" ",
336
+ # @indicator2="0",
337
+ # @subfields=[#<MARC::Subfield:0x007f7f6bffe208 @code="a", @value="Fair maid of the Exchange">],
338
+ # @tag="130">,
339
+ # plus a lot of 400 fields
340
+ begin
341
+ # 130 is a uniform title
342
+ return @field130 unless @field130.nil?
343
+ field = get_fields('130').first
344
+ title = field.subfields.collect {|f| f.value if f.code == 'a'}.first rescue ''
345
+ @field130 = {
346
+ :title => title.force_encoding('UTF-8'),
347
+ :error => nil
348
+ }
349
+ rescue => e
350
+ @@config.logger.debug "Failed to parse field 130 for #{get_id}: #{e.message}"
351
+ @field130 = {
352
+ :title => nil,
353
+ :error => 'ERROR_UNIFORM_TITLE'
354
+ }
282
355
  end
283
356
  end
284
357
 
285
- def parse_151
358
+ def field151
286
359
  # http://www.loc.gov/marc/authority/concise/ad151.html
360
+ # e.g. http://id.loc.gov/authorities/names/n79045127
287
361
  begin
288
362
  # 151 is a geographic name
363
+ return @field151 unless @field151.nil?
289
364
  field = get_fields('151').first
290
365
  name = field.subfields.collect {|f| f.value if f.code == 'a' }.first rescue ''
291
- name.force_encoding('UTF-8')
292
- rescue
293
- 'ERROR_PLACE_NAME'
366
+ @field151 = {
367
+ :name => name.force_encoding('UTF-8'),
368
+ :error => nil
369
+ }
370
+ rescue => e
371
+ @@config.logger.debug "Failed to parse field 151 for #{get_id}: #{e.message}"
372
+ @field151 = {
373
+ :name => nil,
374
+ :error => 'ERROR_PLACE_NAME'
375
+ }
294
376
  end
295
377
  end
296
378
 
297
- def parse_loc_auth_name
379
+
380
+ # BLOCK ----------------------------------------------------
381
+ # Authority record types
382
+
383
+ # TODO: other authority types?
384
+ # The MARC data differentiates them according to the tag number.
385
+ # Methods below ordered by field number.
386
+
387
+ # X00 - Personal Name
388
+ def person?
389
+ field = field100
390
+ field[:error].nil? && (! field[:name].empty?) && field[:title].empty?
391
+ end
392
+
393
+ # X00 - Name-Title
394
+ def name_title?
395
+ # e.g. http://id.loc.gov/authorities/names/n79044934
396
+ # if get_id == 'n79044934'.upcase
397
+ # binding.pry if @@config.debug
398
+ # end
399
+ field = field100
400
+ field[:error].nil? && (! field[:name].empty?) && (! field[:title].empty?)
401
+ end
402
+
403
+ # X10 - Corporate Name
404
+ def corporation?
405
+ field110[:error].nil?
406
+ end
407
+
408
+ # X11 - Meeting Name
409
+ def conference?
410
+ # e.g. http://id.loc.gov/authorities/names/n79044866
411
+ field111[:error].nil?
412
+ end
413
+
414
+ # X30 - Uniform Title
415
+ def uniform_title?
416
+ field130[:error].nil?
417
+ end
418
+
419
+ # X51 - Jurisdiction / Geographic Name
420
+ # - http://www.loc.gov/mads/rdf/v1#Geographic
421
+ def geographic?
422
+ # e.g. http://id.loc.gov/authorities/names/n79046135.html
423
+ field151[:error].nil?
424
+ end
425
+
426
+ # BLOCK ----------------------------------------------------
427
+ # Parse authority record
428
+
429
+ def parse_auth_details
430
+ if @loc.iri.to_s =~ /name/
431
+ if @@config.get_loc
432
+ # Retrieve and use LOC RDF
433
+ parse_auth_name_rdf
434
+ else
435
+ # Use only the MARC record, without RDF retrieval
436
+ parse_auth_name
437
+ end
438
+ elsif @loc.iri.to_s =~ /subjects/
439
+ # TODO: what to do with subjects?
440
+ binding.pry if @@config.debug
441
+ # parse_auth_subject_rdf
442
+ else
443
+ # What is this?
444
+ binding.pry if @@config.debug
445
+ end
446
+ end
447
+
448
+
449
+ # BLOCK ----------------------------------------------------
450
+ # Parse authority record without RDF
451
+
452
+ def parse_auth_name
298
453
  #
299
454
  # Create triples for various kinds of LOC authority.
300
- # At present, this relies on LOC RDF to differentiate
301
- # types of authorities. It should be possible to do this
302
- # from the MARC directly, if @@config.get_loc is false.
303
- #
304
- # The MARC data differentiates them according to the tag number.
305
- # The term 'name' refers to:
306
- # X00 - Personal Name
307
- # X10 - Corporate Name
308
- # X11 - Meeting Name
309
- # X30 - Uniform Title
310
- # X51 - Jurisdiction / Geographic Name
311
455
  #
312
- @@config.logger.warn "LOC URL: #{@loc.iri} DEPRECATED" if @loc.deprecated?
313
456
  name = ''
314
- if @loc.conference?
315
- # e.g. http://id.loc.gov/authorities/names/n79044866
316
- name = @loc.label || parse_111
317
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::SCHEMA.event)
318
- elsif @loc.corporation?
319
- name = @loc.label || parse_110
320
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::FOAF.Organization) if @@config.use_foaf
321
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::SCHEMA.Organization) if @@config.use_schema
322
- elsif @loc.name_title?
457
+ if person?
458
+ name = field100[:name]
459
+ graph_type_person(@lib.rdf_uri)
460
+
461
+ # TODO: find another way to get first and last names without VIAF
462
+ # # VIAF extracts first and last name, try to use them. Note
463
+ # # that VIAF uses schema:name, schema:givenName, and schema:familyName.
464
+ # if @@config.get_viaf && ! @viaf.nil?
465
+ # @viaf.family_names.each do |n|
466
+ # # ln = URI.encode(n)
467
+ # # TODO: try to get a language type, if VIAF provide it.
468
+ # # name = RDF::Literal.new(n, :language => :en)
469
+ # ln = RDF::Literal.new(n)
470
+ # @graph.insert RDF::Statement(@lib.rdf_uri, RDF::FOAF.familyName, ln) if @@config.use_foaf
471
+ # @graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.familyName, ln) if @@config.use_schema
472
+ # end
473
+ # @viaf.given_names.each do |n|
474
+ # # fn = URI.encode(n)
475
+ # # TODO: try to get a language type, if VIAF provide it.
476
+ # # name = RDF::Literal.new(n, :language => :en)
477
+ # fn = RDF::Literal.new(n)
478
+ # @graph.insert RDF::Statement(@lib.rdf_uri, RDF::FOAF.firstName, fn) if @@config.use_foaf
479
+ # @graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.givenName, fn) if @@config.use_schema
480
+ # end
481
+ # end
482
+ elsif name_title?
323
483
  # e.g. http://id.loc.gov/authorities/names/n79044934
324
- # Skipping these, because the person entity should be in
325
- # an additional record and we don't want the title content.
484
+ # http://viaf.org/viaf/182251325/rdf.xml
485
+ name = field100[:name]
486
+ graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#NameTitle'))
487
+ elsif corporation?
488
+ name = field110[:name]
489
+ graph_type_organization(@lib.rdf_uri)
490
+ elsif conference?
491
+ # e.g. http://id.loc.gov/authorities/names/n79044866
492
+ name = [field111[:name],field111[:date],field111[:city]].join('')
493
+ graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.event)
494
+ elsif uniform_title?
495
+ name = field130[:title] # use 'name' for code below, although it's a title
496
+ graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#Title'))
497
+ graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.title)
498
+ elsif geographic?
499
+ name = field151[:name] # use 'name' for code below, although it's a place
500
+ graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.Place)
501
+ else
502
+ # TODO: find out what type this is.
326
503
  binding.pry if @@config.debug
327
- return ''
328
- elsif @loc.person?
329
- name = @loc.label || parse_100
330
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::FOAF.Person) if @@config.use_foaf
331
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::SCHEMA.Person) if @@config.use_schema
504
+ name = ''
505
+ graph_type_agent(@lib.rdf_uri)
506
+ end
507
+ if name != ''
508
+ name = RDF::Literal.new(name)
509
+ graph_insert_name(@lib.rdf_uri, name)
510
+ end
511
+ end
512
+
513
+
514
+ # BLOCK ----------------------------------------------------
515
+ # Parse authority record using RDF
516
+
517
+ # Create triples for various kinds of LOC authority.
518
+ # This method relies on RDF data retrieval.
519
+ def parse_auth_name_rdf
520
+ @@config.logger.warn "#{@loc.iri} DEPRECATED" if @loc.deprecated?
521
+ name = ''
522
+ if @loc.person?
523
+ name = @loc.label || field100[:name]
524
+ graph_type_person(@lib.rdf_uri)
332
525
  # VIAF extracts first and last name, try to use them. Note
333
526
  # that VIAF uses schema:name, schema:givenName, and schema:familyName.
334
527
  if @@config.get_viaf && ! @viaf.nil?
@@ -349,29 +542,41 @@ module Marc2LinkedData
349
542
  @graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.givenName, fn) if @@config.use_schema
350
543
  end
351
544
  end
352
- elsif @loc.place?
545
+ elsif @loc.name_title?
546
+ # e.g. http://id.loc.gov/authorities/names/n79044934
547
+ # http://viaf.org/viaf/182251325/rdf.xml
548
+ name = @loc.label || field100[:name]
549
+ graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#NameTitle'))
550
+ elsif @loc.corporation?
551
+ name = @loc.label || field110[:name]
552
+ graph_type_organization(@lib.rdf_uri)
553
+ elsif @loc.conference?
554
+ # e.g. http://id.loc.gov/authorities/names/n79044866
555
+ name = @loc.label || [field111[:name],field111[:date],field111[:city]].join('')
556
+ graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.event)
557
+ elsif @loc.geographic?
353
558
  # e.g. http://id.loc.gov/authorities/names/n79045127
354
- name = @loc.label || parse_151
355
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::SCHEMA.Place)
559
+ name = @loc.label || field151[:name]
560
+ graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.Place)
561
+ elsif @loc.uniform_title?
562
+ name = field130[:title] # use 'name' for code below, although it's a title
563
+ graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#Title'))
564
+ graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.title)
356
565
  else
357
566
  # TODO: find out what type this is.
358
567
  binding.pry if @@config.debug
359
568
  name = @loc.label || ''
360
- # Note: schema.org has no immediate parent for Person or Organization
361
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::FOAF.Agent) if @@config.use_foaf
362
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::SCHEMA.Thing) if @@config.use_schema
569
+ graph_type_agent(@lib.rdf_uri)
363
570
  end
364
571
  if name != ''
365
- # name_encoding = URI.encode(name)
366
572
  name = RDF::Literal.new(name)
367
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF::FOAF.name, name) if @@config.use_foaf
368
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.name, name) if @@config.use_schema
573
+ graph_insert_name(@lib.rdf_uri, name)
369
574
  end
370
575
  end
371
576
 
372
- def parse_loc_auth_subject
373
- # TODO: what to do with subjects?
374
- binding.pry if @@config.debug
577
+
578
+
579
+ def parse_auth_subject_rdf
375
580
  # The term 'subject' refers to:
376
581
  # X30 - Uniform Titles
377
582
  # X48 - Chronological Terms
@@ -400,7 +605,7 @@ module Marc2LinkedData
400
605
  # Try to get additional data from OCLC, using the RDFa
401
606
  # available in the OCLC identities pages.
402
607
  oclc_auth = OclcIdentity.new oclc_iri
403
- @graph.insert RDF::Statement(@loc.rdf_uri, RDF::OWL.sameAs, oclc_auth.rdf_uri)
608
+ graph_insert_sameAs(@loc.rdf_uri, oclc_auth.rdf_uri)
404
609
  oclc_auth.creative_works.each do |creative_work_uri|
405
610
  # Notes on work-around for OCLC data inconsistency:
406
611
  # RDFa for http://www.worldcat.org/identities/lccn-n79044798 contains:
@@ -411,17 +616,17 @@ module Marc2LinkedData
411
616
  #creative_work_iri = creative_work.to_s.gsub('worldcat.org','www.worldcat.org')
412
617
  #creative_work_iri = creative_work_iri.gsub('wwwwww','www') # in case it gets added already by OCLC
413
618
  creative_work = OclcCreativeWork.new creative_work_uri
414
- @graph.insert RDF::Statement(oclc_auth.rdf_uri, RDF::RDFS.seeAlso, creative_work.rdf_uri)
619
+ graph_insert_seeAlso(oclc_auth.rdf_uri, creative_work.rdf_uri)
415
620
  if @@config.oclc_auth2works
416
621
  # Try to use VIAF to relate auth to work as creator, contributor, editor, etc.
417
622
  # Note that this requires additional RDF retrieval for each work (slower processing).
418
623
  unless @viaf.nil?
419
624
  if creative_work.creator? @viaf.iri
420
- @graph.insert RDF::Statement(creative_work.rdf_uri, RDF::SCHEMA.creator, oclc_auth.rdf_uri)
625
+ graph_insert_creator(creative_work.rdf_uri, oclc_auth.rdf_uri)
421
626
  elsif creative_work.contributor? @viaf.iri
422
- @graph.insert RDF::Statement(creative_work.rdf_uri, RDF::SCHEMA.contributor, oclc_auth.rdf_uri)
627
+ graph_insert_contributor(creative_work.rdf_uri, oclc_auth.rdf_uri)
423
628
  elsif creative_work.editor? @viaf.iri
424
- @graph.insert RDF::Statement(creative_work.rdf_uri, RDF::SCHEMA.editor, oclc_auth.rdf_uri)
629
+ graph_insert_editor(creative_work.rdf_uri, oclc_auth.rdf_uri)
425
630
  end
426
631
  end
427
632
  # TODO: Is auth the subject of the work (as in biography) or both (as in autobiography)?
@@ -430,7 +635,7 @@ module Marc2LinkedData
430
635
  # Try to find the generic work entity for this example work.
431
636
  creative_work.get_works.each do |oclc_work_uri|
432
637
  oclc_work = OclcWork.new oclc_work_uri
433
- @graph.insert RDF::Statement(creative_work.rdf_uri, RDF::SCHEMA.exampleOfWork, oclc_work.rdf_uri)
638
+ graph_insert_exampleOfWork(creative_work.rdf_uri, oclc_work.rdf_uri)
434
639
  end
435
640
  end
436
641
  end
@@ -439,6 +644,9 @@ module Marc2LinkedData
439
644
 
440
645
  # TODO: use an institutional 'affiliation' entry, maybe 373? (optional field)
441
646
 
647
+ # BLOCK ----------------------------------------------------
648
+ # Graph methods
649
+
442
650
  def to_ttl
443
651
  graph.to_ttl
444
652
  end
@@ -463,27 +671,63 @@ module Marc2LinkedData
463
671
 
464
672
  # Get LOC control number and add catalog permalink? e.g.
465
673
  # http://lccn.loc.gov/n79046291
674
+ graph_insert_sameAs(@lib.rdf_uri, @loc.rdf_uri)
675
+ graph_insert_sameAs(@lib.rdf_uri, @viaf.rdf_uri) unless @viaf.nil?
676
+ graph_insert_sameAs(@lib.rdf_uri, @isni.rdf_uri) unless @isni.nil?
677
+ parse_auth_details
678
+ # Optional elaboration of authority data with OCLC identity and works.
679
+ get_oclc_links if @@config.get_oclc
680
+ # @@config.logger.info "Extracted #{@loc.id}"
681
+ @graph
682
+ end
466
683
 
467
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF::OWL.sameAs, @loc.rdf_uri)
468
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF::OWL.sameAs, @viaf.rdf_uri) unless @viaf.nil?
469
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF::OWL.sameAs, @isni.rdf_uri) unless @isni.nil?
470
- return @graph unless @@config.get_loc
684
+ def graph_insert(uriS, uriP, uriO)
685
+ @graph.insert RDF::Statement(uriS, uriP, uriO)
686
+ end
687
+ def graph_insert_sameAs(uriS, uriO)
688
+ graph_insert(uriS, RDF::OWL.sameAs, uriO)
689
+ end
690
+ def graph_insert_seeAlso(uriS, uriO)
691
+ graph_insert(uriS, RDF::RDFS.seeAlso, uriO)
692
+ end
693
+ def graph_insert_exampleOfWork(uriS, uriO)
694
+ graph_insert(uriS, RDF::SCHEMA.exampleOfWork, uriO)
695
+ end
696
+ def graph_insert_creator(uriS, uriO)
697
+ graph_insert(uriS, RDF::SCHEMA.creator, uriO)
698
+ end
699
+ def graph_insert_contributor(uriS, uriO)
700
+ graph_insert(uriS, RDF::SCHEMA.contributor, uriO)
701
+ end
702
+ def graph_insert_editor(uriS, uriO)
703
+ graph_insert(uriS, RDF::SCHEMA.editor, uriO)
704
+ end
705
+ def graph_insert_type(uriS, uriO)
706
+ graph_insert(uriS, RDF.type, uriO)
707
+ end
471
708
 
472
- # TODO: find codes in the marc record to differentiate the authority into
473
- # TODO: person, organization, event, etc. without getting LOC RDF.
709
+ # ----
710
+ # Methods that can use FOAF or SCHEMA or both (or neither?)
474
711
 
475
- if @loc.iri.to_s =~ /name/
476
- parse_loc_auth_name
477
- elsif @loc.iri.to_s =~ /subjects/
478
- parse_loc_auth_subject
479
- else
480
- binding.pry if @@config.debug
481
- end
482
- # Optional elaboration of authority data with OCLC identity and works.
483
- get_oclc_links if @@config.get_oclc
712
+ def graph_type_agent(uriS)
713
+ # Note: schema.org has no immediate parent for Person or Organization
714
+ graph_insert_type(uriS, RDF::FOAF.Agent) if @@config.use_foaf
715
+ graph_insert_type(uriS, RDF::SCHEMA.Thing) if @@config.use_schema
716
+ end
484
717
 
485
- @@config.logger.info "Extracted #{@loc.id}"
486
- @graph
718
+ def graph_insert_name(uriS, name)
719
+ graph_insert(uriS, RDF::FOAF.name, name) if @@config.use_foaf
720
+ graph_insert(uriS, RDF::SCHEMA.name, name) if @@config.use_schema
721
+ end
722
+
723
+ def graph_type_organization(uriS)
724
+ graph_insert_type(uriS, RDF::FOAF.Organization) if @@config.use_foaf
725
+ graph_insert_type(uriS, RDF::SCHEMA.Organization) if @@config.use_schema
726
+ end
727
+
728
+ def graph_type_person(uriS)
729
+ graph_insert_type(uriS, RDF::FOAF.Person) if @@config.use_foaf
730
+ graph_insert_type(uriS, RDF::SCHEMA.Person) if @@config.use_schema
487
731
  end
488
732
  end
489
733