marc2linkeddata 0.0.7 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/includes.rb ADDED
@@ -0,0 +1,44 @@
1
+ require 'dotenv'
2
+ Dotenv.load
3
+
4
+ require 'addressable/uri'
5
+ require 'json'
6
+ require 'rest_client'
7
+ RestClient.proxy = ENV['http_proxy'] unless ENV['http_proxy'].nil?
8
+ require 'thread'
9
+
10
+ require 'marc'
11
+ require 'linkeddata'
12
+ require 'rdf/4store'
13
+ require 'rdf/mongo'
14
+
15
+ require 'pry'
16
+ require 'pry-doc'
17
+ require 'ruby-progressbar'
18
+
19
+ require_relative 'marc2linkeddata/configuration'
20
+
21
+ require_relative 'marc2linkeddata/resource'
22
+ require_relative 'marc2linkeddata/isni'
23
+ require_relative 'marc2linkeddata/lib_auth'
24
+ require_relative 'marc2linkeddata/loc'
25
+ require_relative 'marc2linkeddata/viaf'
26
+
27
+ if ENV['SUL_CAP_ENABLED'].to_s.upcase == 'TRUE'
28
+ require_relative 'marc2linkeddata/cap'
29
+ end
30
+
31
+ require_relative 'marc2linkeddata/oclc_resource'
32
+ require_relative 'marc2linkeddata/oclc_identity'
33
+ require_relative 'marc2linkeddata/oclc_creative_work'
34
+ require_relative 'marc2linkeddata/oclc_work'
35
+
36
+ require_relative 'marc2linkeddata/sparql'
37
+ require_relative 'marc2linkeddata/sparql_dbpedia'
38
+ require_relative 'marc2linkeddata/sparql_pubmed'
39
+
40
+ require_relative 'marc2linkeddata/parseMarcAuthority'
41
+ #require_relative 'marc2linkeddata/parseMarcCatalog'
42
+
43
+
44
+
@@ -1,33 +1,5 @@
1
- require 'dotenv'
2
- Dotenv.load
3
-
4
- require 'addressable/uri'
5
- require 'json'
6
- require 'linkeddata'
7
- require 'marc'
8
- require 'rdf/4store'
9
- require 'ruby-progressbar'
10
-
11
- require 'pry'
12
- require 'pry-doc'
13
-
14
- require_relative 'marc2linkeddata/configuration'
15
-
16
- require_relative 'marc2linkeddata/resource'
17
- require_relative 'marc2linkeddata/isni'
18
- require_relative 'marc2linkeddata/lib_auth'
19
- require_relative 'marc2linkeddata/loc'
20
- require_relative 'marc2linkeddata/viaf'
21
-
22
- require_relative 'marc2linkeddata/oclc_resource'
23
- require_relative 'marc2linkeddata/oclc_identity'
24
- require_relative 'marc2linkeddata/oclc_creative_work'
25
- require_relative 'marc2linkeddata/oclc_work'
26
-
27
- require_relative 'marc2linkeddata/parseMarcAuthority'
28
- #require_relative 'marc2linkeddata/parseMarcCatalog'
29
- require_relative 'marc2linkeddata/sparql'
30
1
 
2
+ require_relative 'includes'
31
3
 
32
4
  module Marc2LinkedData
33
5
 
@@ -0,0 +1,15 @@
1
+ require_relative 'cap_db'
2
+
3
+ module Marc2LinkedData
4
+
5
+ class Cap
6
+
7
+ attr_accessor :db
8
+
9
+ def initialize
10
+ @db = Marc2LinkedData::CapDb.new
11
+ end
12
+ end
13
+
14
+ end
15
+
@@ -0,0 +1,44 @@
1
+ require 'logger'
2
+ require 'mysql'
3
+ require 'sequel'
4
+
5
+ module Marc2LinkedData
6
+
7
+ # An interface to an SQL database using Sequel
8
+ # @see http://sequel.jeremyevans.net/documentation.html Sequel RDoc
9
+ # @see http://sequel.jeremyevans.net/rdoc/files/README_rdoc.html Sequel README
10
+ # @see http://sequel.jeremyevans.net/rdoc/files/doc/code_order_rdoc.html Sequel code order
11
+ class CapDb
12
+
13
+ @@log = Logger.new('log/cap_db.log')
14
+
15
+ attr_accessor :db
16
+ attr_accessor :db_config
17
+
18
+ def self.log_model_info(m)
19
+ @@log.info "table: #{m.table_name}, columns: #{m.columns}, pk: #{m.primary_key}"
20
+ end
21
+
22
+ def initialize
23
+ @db_config = {}
24
+ @db_config['host'] = ENV['SUL_CAP_DB_HOST'] || 'localhost'
25
+ @db_config['port'] = ENV['SUL_CAP_DB_PORT'] || '3306'
26
+ @db_config['user'] = ENV['SUL_CAP_DB_USER'] || 'capUser'
27
+ @db_config['password'] = ENV['SUL_CAP_DB_PASSWORD'] || 'capPass'
28
+ @db_config['database'] = ENV['SUL_CAP_DB_DATABASE'] || 'cap'
29
+ options = @db_config.merge(
30
+ {
31
+ :encoding => 'utf8',
32
+ :max_connections => 10,
33
+ :logger => @@log
34
+ })
35
+ @db = Sequel.mysql(options)
36
+ @db.extension(:pagination)
37
+ # Ensure the connection is good on startup, raises exceptions on failure
38
+ puts "#{@db} connected: #{@db.test_connection}"
39
+ end
40
+
41
+ end
42
+
43
+ end
44
+
@@ -5,6 +5,9 @@ module Marc2LinkedData
5
5
 
6
6
  attr_accessor :debug
7
7
 
8
+ attr_accessor :threads
9
+ attr_accessor :thread_limit
10
+
8
11
  attr_accessor :field_auth_loc
9
12
  attr_accessor :field_auth_isni
10
13
  attr_accessor :field_auth_oclc
@@ -35,6 +38,8 @@ module Marc2LinkedData
35
38
 
36
39
  def initialize
37
40
  @debug = env_boolean('DEBUG')
41
+ @threads = env_boolean('THREADS')
42
+ @thread_limit = ENV['THREAD_LIMIT'].to_i || 25
38
43
 
39
44
  # logging
40
45
  log_file = ENV['LOG_FILE'] || 'marc2ld.log'
@@ -57,10 +57,14 @@ module Marc2LinkedData
57
57
  # obj.nil? ? false : true
58
58
  end
59
59
 
60
- def place?
60
+ def geographic?
61
61
  iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#Geographic' }.length > 0
62
62
  end
63
63
 
64
+ def uniform_title?
65
+ iri_types.filter {|s| s[:o] == 'http://www.loc.gov/mads/rdf/v1#Title' }.length > 0
66
+ end
67
+
64
68
  def get_oclc_identity
65
69
  # Try to get OCLC URI from LOC ID
66
70
  # http://oclc.org/developer/develop/web-services/worldcat-identities.en.html
@@ -143,7 +143,7 @@ module Marc2LinkedData
143
143
  # VIAF RSS feed for changes, e.g. http://viaf.org/viaf/181829329.rss
144
144
  field = get_fields(@@config.field_auth_viaf).first
145
145
  viaf_iri = get_iri(field, 'viaf.org')
146
- # If VIAF is not already in the MARC record, try to get from LOC.
146
+ # If VIAF is not already in the MARC record, try to get it from LOC.
147
147
  if viaf_iri.nil? && @@config.get_viaf
148
148
  viaf_iri = @loc.get_viaf rescue nil
149
149
  @@config.logger.debug 'Failed to resolve VIAF URI' if viaf_iri.nil?
@@ -185,6 +185,10 @@ module Marc2LinkedData
185
185
  }
186
186
  end
187
187
 
188
+
189
+ # BLOCK ----------------------------------------------------
190
+ # Parse fields
191
+
188
192
  def parse_008
189
193
  # http://www.loc.gov/marc/authority/concise/ad008.html
190
194
  field = get_fields('008').first
@@ -236,99 +240,288 @@ module Marc2LinkedData
236
240
  }
237
241
  end
238
242
 
239
- def parse_100
243
+ def field100
240
244
  # http://www.loc.gov/marc/authority/concise/ad100.html
245
+ # [#<MARC::Subfield:0x007f009d6a74e0 @code="a", @value="Abe, Eiichi,">,
246
+ # #<MARC::Subfield:0x007f009d6a7440 @code="d", @value="1927-">,
247
+ # #<MARC::Subfield:0x007f009d6a73a0 @code="t", @value="Hoppu dais\xC5\xAB.">,
248
+ # #<MARC::Subfield:0x007f009d6a7300 @code="l", @value="English">],
249
+ # @tag="100">
241
250
  begin
242
- # 100 is a personal name
251
+ # 100 is a personal name or name-title
252
+ return @field100 unless @field100.nil?
243
253
  field = get_fields('100').first
244
254
  # field = @record.fields.select {|f| f if f.tag == '100' }.first
245
255
  name = field.subfields.select {|f| f.code == 'a' }.first.value rescue ''
246
- name.force_encoding('UTF-8')
247
- rescue
248
- 'ERROR_PERSONAL_NAME'
256
+ date = field.subfields.select {|f| f.code == 'd' }.first.value rescue ''
257
+ title = field.subfields.select {|f| f.code == 't' }.first.value rescue ''
258
+ lang = field.subfields.select {|f| f.code == 'l' }.first.value rescue ''
259
+ @field100 = {
260
+ :name => name.force_encoding('UTF-8'),
261
+ :date => date,
262
+ :title => title.force_encoding('UTF-8'),
263
+ :lang => lang,
264
+ :error => nil
265
+ }
266
+ rescue => e
267
+ @@config.logger.debug "Failed to parse field 100 for #{get_id}: #{e.message}"
268
+ @field100 = {
269
+ :name => nil,
270
+ :date => nil,
271
+ :title => nil,
272
+ :lang => nil,
273
+ :error => 'ERROR_PERSON_NAME' #e.message
274
+ }
249
275
  end
250
276
  end
251
277
 
252
- def parse_110
278
+ def field110
253
279
  # http://www.loc.gov/marc/authority/concise/ad110.html
254
280
  begin
255
281
  # 110 is a corporate name
282
+ return @field110 unless @field110.nil?
256
283
  field = get_fields('110').first
257
284
  a = field.subfields.collect {|f| f.value if f.code == 'a' }.compact rescue []
258
285
  b = field.subfields.collect {|f| f.value if f.code == 'b' }.compact rescue []
259
286
  c = field.subfields.collect {|f| f.value if f.code == 'c' }.compact rescue []
260
287
  name = [a,b,c].flatten.join(' : ')
261
- name.force_encoding('UTF-8')
262
- rescue
263
- 'ERROR_CORPORATE_NAME'
288
+ @field110 = {
289
+ :name => name.force_encoding('UTF-8'),
290
+ :error => nil
291
+ }
292
+ rescue => e
293
+ @@config.logger.debug "Failed to parse field 110 for #{get_id}: #{e.message}"
294
+ @field110 = {
295
+ :name => nil,
296
+ :error => 'ERROR_CORPORATE_NAME' #e.message
297
+ }
264
298
  end
265
299
  end
266
300
 
267
- def parse_111
301
+ def field111
268
302
  # http://www.loc.gov/marc/authority/concise/ad111.html
303
+ # #<MARC::Subfield:0x007f43a50fd1e8 @code="a", @value="Joseph Priestley Symposium">,
304
+ # #<MARC::Subfield:0x007f43a50fd148 @code="d", @value="(1974 :">,
305
+ # #<MARC::Subfield:0x007f43a50fd0a8 @code="c", @value="Wilkes-Barre, Pa.)">],
306
+ # @tag="111">,
269
307
  begin
270
308
  # 111 is a meeting name
309
+ return @field111 unless @field111.nil?
271
310
  field = get_fields('111').first
272
- field = @record.fields.select {|f| f if f.tag == '111' }.first
273
- a = field.subfields.collect {|f| f.value if f.code == 'a' }.compact rescue []
274
- # TODO: incorporate additional subfields?
275
- # b = field.subfields.collect {|f| f.value if f.code == 'b' }.compact rescue []
276
- # c = field.subfields.collect {|f| f.value if f.code == 'c' }.compact rescue []
277
- # name = [a,b,c].flatten.join(' : ')
278
- # name.force_encoding('UTF-8')
279
- a.force_encoding('UTF-8')
280
- rescue
281
- 'ERROR_MEETING_NAME'
311
+ name = field.subfields.select {|f| f.code == 'a' }.first.value rescue ''
312
+ date = field.subfields.select {|f| f.code == 'd' }.first.value rescue ''
313
+ city = field.subfields.select {|f| f.code == 'c' }.first.value rescue ''
314
+ @field111 = {
315
+ :name => name.force_encoding('UTF-8'),
316
+ :date => date,
317
+ :city => city.force_encoding('UTF-8'),
318
+ :error => nil
319
+ }
320
+ rescue => e
321
+ @@config.logger.debug "Failed to parse field 111 for #{get_id}: #{e.message}"
322
+ @field111 = {
323
+ :name => nil,
324
+ :date => nil,
325
+ :city => nil,
326
+ :error => 'ERROR_MEETING_NAME'
327
+ }
328
+ end
329
+ end
330
+
331
+ def field130
332
+ # http://www.loc.gov/marc/authority/concise/ad151.html
333
+ # e.g. http://id.loc.gov/authorities/names/n79119331
334
+ # #<MARC::DataField:0x007f7f6bffe708
335
+ # @indicator1=" ",
336
+ # @indicator2="0",
337
+ # @subfields=[#<MARC::Subfield:0x007f7f6bffe208 @code="a", @value="Fair maid of the Exchange">],
338
+ # @tag="130">,
339
+ # plus a lot of 400 fields
340
+ begin
341
+ # 130 is a uniform title
342
+ return @field130 unless @field130.nil?
343
+ field = get_fields('130').first
344
+ title = field.subfields.collect {|f| f.value if f.code == 'a'}.first rescue ''
345
+ @field130 = {
346
+ :title => title.force_encoding('UTF-8'),
347
+ :error => nil
348
+ }
349
+ rescue => e
350
+ @@config.logger.debug "Failed to parse field 130 for #{get_id}: #{e.message}"
351
+ @field130 = {
352
+ :title => nil,
353
+ :error => 'ERROR_UNIFORM_TITLE'
354
+ }
282
355
  end
283
356
  end
284
357
 
285
- def parse_151
358
+ def field151
286
359
  # http://www.loc.gov/marc/authority/concise/ad151.html
360
+ # e.g. http://id.loc.gov/authorities/names/n79045127
287
361
  begin
288
362
  # 151 is a geographic name
363
+ return @field151 unless @field151.nil?
289
364
  field = get_fields('151').first
290
365
  name = field.subfields.collect {|f| f.value if f.code == 'a' }.first rescue ''
291
- name.force_encoding('UTF-8')
292
- rescue
293
- 'ERROR_PLACE_NAME'
366
+ @field151 = {
367
+ :name => name.force_encoding('UTF-8'),
368
+ :error => nil
369
+ }
370
+ rescue => e
371
+ @@config.logger.debug "Failed to parse field 151 for #{get_id}: #{e.message}"
372
+ @field151 = {
373
+ :name => nil,
374
+ :error => 'ERROR_PLACE_NAME'
375
+ }
294
376
  end
295
377
  end
296
378
 
297
- def parse_loc_auth_name
379
+
380
+ # BLOCK ----------------------------------------------------
381
+ # Authority record types
382
+
383
+ # TODO: other authority types?
384
+ # The MARC data differentiates them according to the tag number.
385
+ # Methods below ordered by field number.
386
+
387
+ # X00 - Personal Name
388
+ def person?
389
+ field = field100
390
+ field[:error].nil? && (! field[:name].empty?) && field[:title].empty?
391
+ end
392
+
393
+ # X00 - Name-Title
394
+ def name_title?
395
+ # e.g. http://id.loc.gov/authorities/names/n79044934
396
+ # if get_id == 'n79044934'.upcase
397
+ # binding.pry if @@config.debug
398
+ # end
399
+ field = field100
400
+ field[:error].nil? && (! field[:name].empty?) && (! field[:title].empty?)
401
+ end
402
+
403
+ # X10 - Corporate Name
404
+ def corporation?
405
+ field110[:error].nil?
406
+ end
407
+
408
+ # X11 - Meeting Name
409
+ def conference?
410
+ # e.g. http://id.loc.gov/authorities/names/n79044866
411
+ field111[:error].nil?
412
+ end
413
+
414
+ # X30 - Uniform Title
415
+ def uniform_title?
416
+ field130[:error].nil?
417
+ end
418
+
419
+ # X51 - Jurisdiction / Geographic Name
420
+ # - http://www.loc.gov/mads/rdf/v1#Geographic
421
+ def geographic?
422
+ # e.g. http://id.loc.gov/authorities/names/n79046135.html
423
+ field151[:error].nil?
424
+ end
425
+
426
+ # BLOCK ----------------------------------------------------
427
+ # Parse authority record
428
+
429
+ def parse_auth_details
430
+ if @loc.iri.to_s =~ /name/
431
+ if @@config.get_loc
432
+ # Retrieve and use LOC RDF
433
+ parse_auth_name_rdf
434
+ else
435
+ # Use only the MARC record, without RDF retrieval
436
+ parse_auth_name
437
+ end
438
+ elsif @loc.iri.to_s =~ /subjects/
439
+ # TODO: what to do with subjects?
440
+ binding.pry if @@config.debug
441
+ # parse_auth_subject_rdf
442
+ else
443
+ # What is this?
444
+ binding.pry if @@config.debug
445
+ end
446
+ end
447
+
448
+
449
+ # BLOCK ----------------------------------------------------
450
+ # Parse authority record without RDF
451
+
452
+ def parse_auth_name
298
453
  #
299
454
  # Create triples for various kinds of LOC authority.
300
- # At present, this relies on LOC RDF to differentiate
301
- # types of authorities. It should be possible to do this
302
- # from the MARC directly, if @@config.get_loc is false.
303
- #
304
- # The MARC data differentiates them according to the tag number.
305
- # The term 'name' refers to:
306
- # X00 - Personal Name
307
- # X10 - Corporate Name
308
- # X11 - Meeting Name
309
- # X30 - Uniform Title
310
- # X51 - Jurisdiction / Geographic Name
311
455
  #
312
- @@config.logger.warn "LOC URL: #{@loc.iri} DEPRECATED" if @loc.deprecated?
313
456
  name = ''
314
- if @loc.conference?
315
- # e.g. http://id.loc.gov/authorities/names/n79044866
316
- name = @loc.label || parse_111
317
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::SCHEMA.event)
318
- elsif @loc.corporation?
319
- name = @loc.label || parse_110
320
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::FOAF.Organization) if @@config.use_foaf
321
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::SCHEMA.Organization) if @@config.use_schema
322
- elsif @loc.name_title?
457
+ if person?
458
+ name = field100[:name]
459
+ graph_type_person(@lib.rdf_uri)
460
+
461
+ # TODO: find another way to get first and last names without VIAF
462
+ # # VIAF extracts first and last name, try to use them. Note
463
+ # # that VIAF uses schema:name, schema:givenName, and schema:familyName.
464
+ # if @@config.get_viaf && ! @viaf.nil?
465
+ # @viaf.family_names.each do |n|
466
+ # # ln = URI.encode(n)
467
+ # # TODO: try to get a language type, if VIAF provide it.
468
+ # # name = RDF::Literal.new(n, :language => :en)
469
+ # ln = RDF::Literal.new(n)
470
+ # @graph.insert RDF::Statement(@lib.rdf_uri, RDF::FOAF.familyName, ln) if @@config.use_foaf
471
+ # @graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.familyName, ln) if @@config.use_schema
472
+ # end
473
+ # @viaf.given_names.each do |n|
474
+ # # fn = URI.encode(n)
475
+ # # TODO: try to get a language type, if VIAF provide it.
476
+ # # name = RDF::Literal.new(n, :language => :en)
477
+ # fn = RDF::Literal.new(n)
478
+ # @graph.insert RDF::Statement(@lib.rdf_uri, RDF::FOAF.firstName, fn) if @@config.use_foaf
479
+ # @graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.givenName, fn) if @@config.use_schema
480
+ # end
481
+ # end
482
+ elsif name_title?
323
483
  # e.g. http://id.loc.gov/authorities/names/n79044934
324
- # Skipping these, because the person entity should be in
325
- # an additional record and we don't want the title content.
484
+ # http://viaf.org/viaf/182251325/rdf.xml
485
+ name = field100[:name]
486
+ graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#NameTitle'))
487
+ elsif corporation?
488
+ name = field110[:name]
489
+ graph_type_organization(@lib.rdf_uri)
490
+ elsif conference?
491
+ # e.g. http://id.loc.gov/authorities/names/n79044866
492
+ name = [field111[:name],field111[:date],field111[:city]].join('')
493
+ graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.event)
494
+ elsif uniform_title?
495
+ name = field130[:title] # use 'name' for code below, although it's a title
496
+ graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#Title'))
497
+ graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.title)
498
+ elsif geographic?
499
+ name = field151[:name] # use 'name' for code below, although it's a place
500
+ graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.Place)
501
+ else
502
+ # TODO: find out what type this is.
326
503
  binding.pry if @@config.debug
327
- return ''
328
- elsif @loc.person?
329
- name = @loc.label || parse_100
330
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::FOAF.Person) if @@config.use_foaf
331
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::SCHEMA.Person) if @@config.use_schema
504
+ name = ''
505
+ graph_type_agent(@lib.rdf_uri)
506
+ end
507
+ if name != ''
508
+ name = RDF::Literal.new(name)
509
+ graph_insert_name(@lib.rdf_uri, name)
510
+ end
511
+ end
512
+
513
+
514
+ # BLOCK ----------------------------------------------------
515
+ # Parse authority record using RDF
516
+
517
+ # Create triples for various kinds of LOC authority.
518
+ # This method relies on RDF data retrieval.
519
+ def parse_auth_name_rdf
520
+ @@config.logger.warn "#{@loc.iri} DEPRECATED" if @loc.deprecated?
521
+ name = ''
522
+ if @loc.person?
523
+ name = @loc.label || field100[:name]
524
+ graph_type_person(@lib.rdf_uri)
332
525
  # VIAF extracts first and last name, try to use them. Note
333
526
  # that VIAF uses schema:name, schema:givenName, and schema:familyName.
334
527
  if @@config.get_viaf && ! @viaf.nil?
@@ -349,29 +542,41 @@ module Marc2LinkedData
349
542
  @graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.givenName, fn) if @@config.use_schema
350
543
  end
351
544
  end
352
- elsif @loc.place?
545
+ elsif @loc.name_title?
546
+ # e.g. http://id.loc.gov/authorities/names/n79044934
547
+ # http://viaf.org/viaf/182251325/rdf.xml
548
+ name = @loc.label || field100[:name]
549
+ graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#NameTitle'))
550
+ elsif @loc.corporation?
551
+ name = @loc.label || field110[:name]
552
+ graph_type_organization(@lib.rdf_uri)
553
+ elsif @loc.conference?
554
+ # e.g. http://id.loc.gov/authorities/names/n79044866
555
+ name = @loc.label || [field111[:name],field111[:date],field111[:city]].join('')
556
+ graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.event)
557
+ elsif @loc.geographic?
353
558
  # e.g. http://id.loc.gov/authorities/names/n79045127
354
- name = @loc.label || parse_151
355
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::SCHEMA.Place)
559
+ name = @loc.label || field151[:name]
560
+ graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.Place)
561
+ elsif @loc.uniform_title?
562
+ name = field130[:title] # use 'name' for code below, although it's a title
563
+ graph_insert_type(@lib.rdf_uri, RDF::URI.new('http://www.loc.gov/mads/rdf/v1#Title'))
564
+ graph_insert_type(@lib.rdf_uri, RDF::SCHEMA.title)
356
565
  else
357
566
  # TODO: find out what type this is.
358
567
  binding.pry if @@config.debug
359
568
  name = @loc.label || ''
360
- # Note: schema.org has no immediate parent for Person or Organization
361
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::FOAF.Agent) if @@config.use_foaf
362
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF.type, RDF::SCHEMA.Thing) if @@config.use_schema
569
+ graph_type_agent(@lib.rdf_uri)
363
570
  end
364
571
  if name != ''
365
- # name_encoding = URI.encode(name)
366
572
  name = RDF::Literal.new(name)
367
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF::FOAF.name, name) if @@config.use_foaf
368
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF::SCHEMA.name, name) if @@config.use_schema
573
+ graph_insert_name(@lib.rdf_uri, name)
369
574
  end
370
575
  end
371
576
 
372
- def parse_loc_auth_subject
373
- # TODO: what to do with subjects?
374
- binding.pry if @@config.debug
577
+
578
+
579
+ def parse_auth_subject_rdf
375
580
  # The term 'subject' refers to:
376
581
  # X30 - Uniform Titles
377
582
  # X48 - Chronological Terms
@@ -400,7 +605,7 @@ module Marc2LinkedData
400
605
  # Try to get additional data from OCLC, using the RDFa
401
606
  # available in the OCLC identities pages.
402
607
  oclc_auth = OclcIdentity.new oclc_iri
403
- @graph.insert RDF::Statement(@loc.rdf_uri, RDF::OWL.sameAs, oclc_auth.rdf_uri)
608
+ graph_insert_sameAs(@loc.rdf_uri, oclc_auth.rdf_uri)
404
609
  oclc_auth.creative_works.each do |creative_work_uri|
405
610
  # Notes on work-around for OCLC data inconsistency:
406
611
  # RDFa for http://www.worldcat.org/identities/lccn-n79044798 contains:
@@ -411,17 +616,17 @@ module Marc2LinkedData
411
616
  #creative_work_iri = creative_work.to_s.gsub('worldcat.org','www.worldcat.org')
412
617
  #creative_work_iri = creative_work_iri.gsub('wwwwww','www') # in case it gets added already by OCLC
413
618
  creative_work = OclcCreativeWork.new creative_work_uri
414
- @graph.insert RDF::Statement(oclc_auth.rdf_uri, RDF::RDFS.seeAlso, creative_work.rdf_uri)
619
+ graph_insert_seeAlso(oclc_auth.rdf_uri, creative_work.rdf_uri)
415
620
  if @@config.oclc_auth2works
416
621
  # Try to use VIAF to relate auth to work as creator, contributor, editor, etc.
417
622
  # Note that this requires additional RDF retrieval for each work (slower processing).
418
623
  unless @viaf.nil?
419
624
  if creative_work.creator? @viaf.iri
420
- @graph.insert RDF::Statement(creative_work.rdf_uri, RDF::SCHEMA.creator, oclc_auth.rdf_uri)
625
+ graph_insert_creator(creative_work.rdf_uri, oclc_auth.rdf_uri)
421
626
  elsif creative_work.contributor? @viaf.iri
422
- @graph.insert RDF::Statement(creative_work.rdf_uri, RDF::SCHEMA.contributor, oclc_auth.rdf_uri)
627
+ graph_insert_contributor(creative_work.rdf_uri, oclc_auth.rdf_uri)
423
628
  elsif creative_work.editor? @viaf.iri
424
- @graph.insert RDF::Statement(creative_work.rdf_uri, RDF::SCHEMA.editor, oclc_auth.rdf_uri)
629
+ graph_insert_editor(creative_work.rdf_uri, oclc_auth.rdf_uri)
425
630
  end
426
631
  end
427
632
  # TODO: Is auth the subject of the work (as in biography) or both (as in autobiography)?
@@ -430,7 +635,7 @@ module Marc2LinkedData
430
635
  # Try to find the generic work entity for this example work.
431
636
  creative_work.get_works.each do |oclc_work_uri|
432
637
  oclc_work = OclcWork.new oclc_work_uri
433
- @graph.insert RDF::Statement(creative_work.rdf_uri, RDF::SCHEMA.exampleOfWork, oclc_work.rdf_uri)
638
+ graph_insert_exampleOfWork(creative_work.rdf_uri, oclc_work.rdf_uri)
434
639
  end
435
640
  end
436
641
  end
@@ -439,6 +644,9 @@ module Marc2LinkedData
439
644
 
440
645
  # TODO: use an institutional 'affiliation' entry, maybe 373? (optional field)
441
646
 
647
+ # BLOCK ----------------------------------------------------
648
+ # Graph methods
649
+
442
650
  def to_ttl
443
651
  graph.to_ttl
444
652
  end
@@ -463,27 +671,63 @@ module Marc2LinkedData
463
671
 
464
672
  # Get LOC control number and add catalog permalink? e.g.
465
673
  # http://lccn.loc.gov/n79046291
674
+ graph_insert_sameAs(@lib.rdf_uri, @loc.rdf_uri)
675
+ graph_insert_sameAs(@lib.rdf_uri, @viaf.rdf_uri) unless @viaf.nil?
676
+ graph_insert_sameAs(@lib.rdf_uri, @isni.rdf_uri) unless @isni.nil?
677
+ parse_auth_details
678
+ # Optional elaboration of authority data with OCLC identity and works.
679
+ get_oclc_links if @@config.get_oclc
680
+ # @@config.logger.info "Extracted #{@loc.id}"
681
+ @graph
682
+ end
466
683
 
467
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF::OWL.sameAs, @loc.rdf_uri)
468
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF::OWL.sameAs, @viaf.rdf_uri) unless @viaf.nil?
469
- @graph.insert RDF::Statement(@lib.rdf_uri, RDF::OWL.sameAs, @isni.rdf_uri) unless @isni.nil?
470
- return @graph unless @@config.get_loc
684
+ def graph_insert(uriS, uriP, uriO)
685
+ @graph.insert RDF::Statement(uriS, uriP, uriO)
686
+ end
687
+ def graph_insert_sameAs(uriS, uriO)
688
+ graph_insert(uriS, RDF::OWL.sameAs, uriO)
689
+ end
690
+ def graph_insert_seeAlso(uriS, uriO)
691
+ graph_insert(uriS, RDF::RDFS.seeAlso, uriO)
692
+ end
693
+ def graph_insert_exampleOfWork(uriS, uriO)
694
+ graph_insert(uriS, RDF::SCHEMA.exampleOfWork, uriO)
695
+ end
696
+ def graph_insert_creator(uriS, uriO)
697
+ graph_insert(uriS, RDF::SCHEMA.creator, uriO)
698
+ end
699
+ def graph_insert_contributor(uriS, uriO)
700
+ graph_insert(uriS, RDF::SCHEMA.contributor, uriO)
701
+ end
702
+ def graph_insert_editor(uriS, uriO)
703
+ graph_insert(uriS, RDF::SCHEMA.editor, uriO)
704
+ end
705
+ def graph_insert_type(uriS, uriO)
706
+ graph_insert(uriS, RDF.type, uriO)
707
+ end
471
708
 
472
- # TODO: find codes in the marc record to differentiate the authority into
473
- # TODO: person, organization, event, etc. without getting LOC RDF.
709
+ # ----
710
+ # Methods that can use FOAF or SCHEMA or both (or neither?)
474
711
 
475
- if @loc.iri.to_s =~ /name/
476
- parse_loc_auth_name
477
- elsif @loc.iri.to_s =~ /subjects/
478
- parse_loc_auth_subject
479
- else
480
- binding.pry if @@config.debug
481
- end
482
- # Optional elaboration of authority data with OCLC identity and works.
483
- get_oclc_links if @@config.get_oclc
712
+ def graph_type_agent(uriS)
713
+ # Note: schema.org has no immediate parent for Person or Organization
714
+ graph_insert_type(uriS, RDF::FOAF.Agent) if @@config.use_foaf
715
+ graph_insert_type(uriS, RDF::SCHEMA.Thing) if @@config.use_schema
716
+ end
484
717
 
485
- @@config.logger.info "Extracted #{@loc.id}"
486
- @graph
718
+ def graph_insert_name(uriS, name)
719
+ graph_insert(uriS, RDF::FOAF.name, name) if @@config.use_foaf
720
+ graph_insert(uriS, RDF::SCHEMA.name, name) if @@config.use_schema
721
+ end
722
+
723
+ def graph_type_organization(uriS)
724
+ graph_insert_type(uriS, RDF::FOAF.Organization) if @@config.use_foaf
725
+ graph_insert_type(uriS, RDF::SCHEMA.Organization) if @@config.use_schema
726
+ end
727
+
728
+ def graph_type_person(uriS)
729
+ graph_insert_type(uriS, RDF::FOAF.Person) if @@config.use_foaf
730
+ graph_insert_type(uriS, RDF::SCHEMA.Person) if @@config.use_schema
487
731
  end
488
732
  end
489
733