marc2linkeddata 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,131 @@
1
+
2
+ module Marc2LinkedData
3
+
4
+ class Resource
5
+
6
+ attr_accessor :iri
7
+ # attr_reader :config
8
+
9
+ @@config = nil
10
+
11
+ def initialize(uri=nil)
12
+ @@config ||= Marc2LinkedData.configuration
13
+ if uri =~ /\A#{URI::regexp}\z/
14
+ uri = Addressable::URI.parse(uri.to_s) rescue nil
15
+ end
16
+ # Strip off any trailing '/'
17
+ if uri.to_s.end_with? '/'
18
+ uri = uri.to_s.gsub(/\/$/,'')
19
+ uri = Addressable::URI.parse(uri.to_s) rescue nil
20
+ end
21
+ raise 'invalid uri' unless uri.instance_of? Addressable::URI
22
+ @iri = uri
23
+ end
24
+
25
+ def id
26
+ @iri.basename
27
+ end
28
+
29
+ # This method is often overloaded in subclasses because
30
+ # RDF services use variations in the URL 'extension' patterns; e.g.
31
+ # see Loc#rdf and Viaf#rdf
32
+ def rdf
33
+ return @rdf unless @rdf.nil?
34
+ # TODO: try to retrieve the rdf from a local triple store
35
+ # TODO: if local triple store fails, try remote source(s)
36
+ # TODO: if retrieved from a remote source, save the rdf to a local triple store
37
+ @rdf = get_rdf(@iri.to_s)
38
+ end
39
+
40
+ def get_rdf(uri4rdf)
41
+ tries = 0
42
+ begin
43
+ tries += 1
44
+ @rdf = RDF::Graph.load(uri4rdf)
45
+ rescue
46
+ retry if tries <= 2
47
+ binding.pry if @@config.debug
48
+ nil
49
+ end
50
+ end
51
+
52
+ def rdf_uri
53
+ RDF::URI.new(@iri)
54
+ end
55
+
56
+ def rdf_valid?
57
+ iri_types.length > 0
58
+ end
59
+
60
+ def iri_types
61
+ q = SPARQL.parse("SELECT * WHERE { <#{@iri}> a ?o }")
62
+ rdf.query(q)
63
+ end
64
+
65
+ def rdf_find_object(id)
66
+ # TODO: convert this to an RDF.rb graph query?
67
+ return nil unless rdf_valid?
68
+ rdf.each_statement do |s|
69
+ if s.subject == @iri.to_s
70
+ return s.object if s.object.to_s =~ Regexp.new(id, Regexp::IGNORECASE)
71
+ end
72
+ end
73
+ nil
74
+ end
75
+
76
+ def rdf_find_subject(id)
77
+ # TODO: convert this to an RDF.rb graph query?
78
+ return nil unless rdf_valid?
79
+ rdf.each_statement do |s|
80
+ return s.subject if s.subject.to_s =~ Regexp.new(id, Regexp::IGNORECASE)
81
+ end
82
+ nil
83
+ end
84
+
85
+ def resolve_external_auth(url)
86
+ begin
87
+ res = Marc2LinkedData.http_head_request(url)
88
+ case res.code
89
+ when '200'
90
+ @@config.logger.debug "Mapped #{@iri}\t-> #{url}"
91
+ return url
92
+ when '301'
93
+ #301 Moved Permanently
94
+ url = res['location']
95
+ @@config.logger.debug "Mapped #{@iri}\t-> #{url}"
96
+ return url
97
+ when '302','303'
98
+ #302 Moved Temporarily
99
+ #303 See Other
100
+ # Use the current URL, most get requests will follow a 302 or 303
101
+ @@config.logger.debug "Mapped #{@iri}\t-> #{url}"
102
+ return url
103
+ when '404'
104
+ @@config.logger.warn "#{@iri}\t// #{url}"
105
+ return nil
106
+ else
107
+ # WTF
108
+ binding.pry if @@config.debug
109
+ @@config.logger.error "unknown http response code (#{res.code}) for #{@iri}"
110
+ return nil
111
+ end
112
+ rescue
113
+ nil
114
+ end
115
+ end
116
+
117
+ def same_as
118
+ same_as_url = 'http://sameas.org/rdf?uri=' + URI.encode(@iri.to_s)
119
+ RDF::Graph.load(same_as_url)
120
+ end
121
+
122
+ def same_as_array
123
+ q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://www.w3.org/2002/07/owl#sameAs> ?o }")
124
+ same_as.query(q).collect {|s| s[:o] }
125
+ end
126
+
127
+ end
128
+
129
+ end
130
+
131
+
@@ -0,0 +1,55 @@
1
+ require 'base64'
2
+
3
+ module Marc2LinkedData
4
+
5
+ class Sparql
6
+
7
+ # https://github.com/ruby-rdf/sparql-client
8
+
9
+ # attr_reader :config
10
+ attr_reader :dbpedia
11
+ attr_reader :local_loc
12
+
13
+ def initialize
14
+ config = Marc2LinkedData.configuration
15
+ @dbpedia = SPARQL::Client.new('http://dbpedia.org/sparql')
16
+ # local LOC SPARQL client
17
+ auth_code = Base64.encode64("#{config.local_loc_user}:#{config.local_loc_pass}").chomp
18
+ headers = {
19
+ 'Accept' => 'application/sparql-results+json',
20
+ 'Authorization' => "Basic #{auth_code}",
21
+ }
22
+ @local_loc = SPARQL::Client.new(config.local_loc_url, {headers: headers} )
23
+ end
24
+
25
+ def local_loc_auth(auth_uri)
26
+ result = local_loc.query("SELECT * WHERE { <#{auth_uri}> ?p ?o }")
27
+ result.each_solution {|s| puts s.inspect }
28
+ binding.pry
29
+ end
30
+
31
+ # def sparql_dbpedia(query)
32
+ # dbpedia.query(query)
33
+ # # result = dbpedia.query('ASK WHERE { ?s ?p ?o }')
34
+ # # puts result.inspect #=> true or false
35
+ # # result = dbpedia.query('SELECT * WHERE { ?s ?p ?o } LIMIT 10')
36
+ # # result.each_solution {|s| puts s.inspect }
37
+ # end
38
+
39
+
40
+ # For reference, note that there is an allegrograph ruby gem, see
41
+ # https://github.com/emk/rdf-agraph
42
+
43
+ # For reference, note that there is a ruby gem for RDF on mongodb, see
44
+ # https://rubygems.org/gems/rdf-mongo
45
+
46
+
47
+
48
+
49
+
50
+
51
+ end
52
+
53
+ end
54
+
55
+
@@ -0,0 +1,48 @@
1
+ require_relative 'resource'
2
+
3
+ module Marc2LinkedData
4
+
5
+ class Viaf < Resource
6
+
7
+ PREFIX = 'http://viaf.org/viaf/'
8
+
9
+ # def id
10
+ # return nil if iri.nil?
11
+ # iri.path.gsub('viaf/','').gsub('/','')
12
+ # end
13
+
14
+ def rdf
15
+ return nil if iri.nil?
16
+ return @rdf unless @rdf.nil?
17
+ uri4rdf = iri.to_s + '/rdf.xml'
18
+ @rdf = get_rdf(uri4rdf)
19
+ end
20
+
21
+ def get_isni
22
+ return nil if iri.nil?
23
+ return nil unless rdf_valid?
24
+ return @isni_iri unless @isni_iri.nil?
25
+ # Try to get ISNI source for VIAF
26
+ # e.g. http://viaf.org/viaf/sourceID/ISNI%7C0000000109311081#skos:Concept
27
+ isni_iri = rdf_find_subject 'isni'
28
+ isni_src = URI.parse(isni_iri.to_s)
29
+ isni_iri = isni_src.path.sub('/viaf/sourceID/ISNI%7C','http://www.isni.org/isni/')
30
+ @isni_iri = resolve_external_auth(isni_iri)
31
+ end
32
+
33
+ def given_names
34
+ q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/givenName> ?o }")
35
+ names = rdf.query(q).collect {|s| s[:o].to_s}
36
+ names.to_set.to_a
37
+ end
38
+
39
+ def family_names
40
+ q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/familyName> ?o }")
41
+ names = rdf.query(q).collect {|s| s[:o].to_s}
42
+ names.to_set.to_a
43
+ end
44
+
45
+ end
46
+
47
+ end
48
+
@@ -0,0 +1,64 @@
1
+ require 'dotenv'
2
+ Dotenv.load
3
+
4
+ require 'addressable/uri'
5
+ require 'json'
6
+ require 'linkeddata'
7
+ require 'marc'
8
+ require 'rdf/4store'
9
+ require 'ruby-progressbar'
10
+
11
+ require 'pry'
12
+ require 'pry-doc'
13
+
14
+ require_relative 'marc2linkeddata/configuration'
15
+
16
+ require_relative 'marc2linkeddata/resource'
17
+ require_relative 'marc2linkeddata/isni'
18
+ require_relative 'marc2linkeddata/lib_auth'
19
+ require_relative 'marc2linkeddata/loc'
20
+ require_relative 'marc2linkeddata/viaf'
21
+
22
+ require_relative 'marc2linkeddata/oclc_resource'
23
+ require_relative 'marc2linkeddata/oclc_identity'
24
+ require_relative 'marc2linkeddata/oclc_creative_work'
25
+ require_relative 'marc2linkeddata/oclc_work'
26
+
27
+ require_relative 'marc2linkeddata/parseMarcAuthority'
28
+ #require_relative 'marc2linkeddata/parseMarcCatalog'
29
+ require_relative 'marc2linkeddata/sparql'
30
+
31
+
32
+ module Marc2LinkedData
33
+
34
+ # configuration at the module level, see
35
+ # http://brandonhilkert.com/blog/ruby-gem-configuration-patterns/
36
+
37
+ class << self
38
+ attr_writer :configuration
39
+ end
40
+
41
+ def self.configuration
42
+ @configuration ||= Configuration.new
43
+ end
44
+
45
+ def self.reset
46
+ @configuration = Configuration.new
47
+ end
48
+
49
+ def self.configure
50
+ yield(configuration)
51
+ end
52
+
53
+ def self.http_head_request(url)
54
+ uri = URI.parse(url)
55
+ Net::HTTP.start(uri.host, uri.port) {|http| req = Net::HTTP::Head.new(uri); http.request req }
56
+ end
57
+
58
+ def self.write_prefixes(file)
59
+ @configuration.prefixes.each_pair {|k,v| file.write "@prefix #{k}: <#{v}> .\n" }
60
+ file.write("\n\n")
61
+ end
62
+
63
+ end
64
+
@@ -0,0 +1,40 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib/', __FILE__)
3
+ $:.unshift lib unless $:.include?(lib)
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'marc2linkeddata'
7
+ s.version = '0.0.1'
8
+ s.licenses = ['Apache-2.0']
9
+
10
+ s.platform = Gem::Platform::RUBY
11
+ s.authors = ['Darren Weber',]
12
+ s.email = ['dlweber@stanford.edu']
13
+ s.summary = 'Convert Marc21 records to linked data, for use in SUL/DLSS projects'
14
+ s.description = 'Utilities for translation of Marc21 records to linked open data.'
15
+ s.homepage = 'https://github.com/darrenleeweber/marc2linkeddata'
16
+
17
+ s.required_rubygems_version = '>= 1.3.6'
18
+ s.required_ruby_version = '>= 2.1.0'
19
+
20
+ s.add_dependency 'addressable'
21
+ s.add_dependency 'linkeddata'
22
+ s.add_dependency 'marc'
23
+ s.add_dependency 'rdf-4store'
24
+ s.add_dependency 'ruby-progressbar'
25
+ s.add_dependency 'dotenv'
26
+
27
+ s.add_dependency 'hiredis'
28
+ s.add_dependency 'redis'
29
+
30
+ s.add_development_dependency 'pry'
31
+ s.add_development_dependency 'pry-doc'
32
+ s.add_development_dependency 'rspec'
33
+
34
+ s.files = `git ls-files`.split($/)
35
+ dev_files = ['.gitignore','bin/setup.sh','bin/test.sh']
36
+ dev_files.each {|f| s.files.delete f }
37
+
38
+ s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
39
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
40
+ end
@@ -0,0 +1,84 @@
1
+ require "spec_helper"
2
+
3
+ module Marc2LinkedData
4
+
5
+ describe Configuration do
6
+
7
+ describe '#debug' do
8
+ it 'default value is false' do
9
+ ENV['DEBUG'] = nil
10
+ config = Configuration.new
11
+ expect(config.debug).to be_falsey
12
+ end
13
+ end
14
+
15
+ describe '#debug=' do
16
+ it 'can set value' do
17
+ config = Configuration.new
18
+ config.debug = true
19
+ expect(config.debug).to be_truthy
20
+ end
21
+ end
22
+
23
+ describe '#redis4marc' do
24
+ it 'default value is false' do
25
+ config = Configuration.new
26
+ expect(config.redis4marc).to be_falsey
27
+ end
28
+ end
29
+
30
+ describe '#redis4marc=' do
31
+ it 'can set value' do
32
+ config = Configuration.new
33
+ config.redis4marc = true
34
+ expect(config.redis4marc).to be_truthy
35
+ end
36
+ end
37
+
38
+ describe '#redis_read' do
39
+ it 'default value is false' do
40
+ config = Configuration.new
41
+ expect(config.redis_read).to be_falsey
42
+ end
43
+ end
44
+
45
+ describe '#redis_read=' do
46
+ it 'can set value' do
47
+ config = Configuration.new
48
+ config.redis_read = true
49
+ expect(config.redis_read).to be_truthy
50
+ end
51
+ end
52
+
53
+ describe '#redis_write' do
54
+ it 'default value is false' do
55
+ config = Configuration.new
56
+ expect(config.redis_write).to be_falsey
57
+ end
58
+ end
59
+
60
+ describe '#redis_write=' do
61
+ it 'can set value' do
62
+ config = Configuration.new
63
+ config.redis_write = true
64
+ expect(config.redis_write).to be_truthy
65
+ end
66
+ end
67
+
68
+ describe '#prefixes' do
69
+ it 'default value is a hash' do
70
+ config = Configuration.new
71
+ expect(config.prefixes).to be_instance_of Hash
72
+ end
73
+ end
74
+
75
+ describe '#prefixes=' do
76
+ it 'can set value to hash' do
77
+ config = Configuration.new
78
+ config.prefixes = {}
79
+ expect(config.prefixes).to be_empty
80
+ end
81
+ end
82
+
83
+ end
84
+ end
@@ -0,0 +1,71 @@
1
+ require "spec_helper"
2
+
3
+ module Marc2LinkedData
4
+
5
+ describe Loc do
6
+
7
+ before :all do
8
+ # loc_urls = ['http://id.loc.gov/authorities/names/no99010609', 'http://id.loc.gov/authorities/names/no99010609/']
9
+ @loc_id = 'no99010609'
10
+ @loc_url = 'http://id.loc.gov/authorities/names/no99010609'
11
+ @loc = Loc.new @loc_url
12
+ @viaf_url = 'http://viaf.org/viaf/85312226'
13
+ end
14
+
15
+ before :each do
16
+ end
17
+
18
+ describe '#rdf' do
19
+ it 'should be an instance of RDF::Graph' do
20
+ expect(@loc.rdf.instance_of? RDF::Graph).to be_truthy
21
+ end
22
+ end
23
+
24
+ describe '#rdf_valid?' do
25
+ it 'should be true' do
26
+ expect(@loc.rdf_valid?).to be_truthy
27
+ end
28
+ end
29
+
30
+ describe '#same_as_array' do
31
+ it 'should be populated' do
32
+ expect(@loc.same_as_array.empty?).to be_falsey
33
+ end
34
+ end
35
+
36
+ describe '#get_viaf' do
37
+ it 'should equal the viaf url' do
38
+ expect(@loc.get_viaf).to eq(@viaf_url)
39
+ end
40
+ end
41
+
42
+ # TODO: add tests for different types of records, e.g.
43
+ # authorities:
44
+ # person, organisation, conference, etc.
45
+
46
+
47
+ after :each do
48
+ end
49
+
50
+ after :all do
51
+ @loc_id = nil
52
+ @loc_url = nil
53
+ @loc = nil
54
+ end
55
+ end
56
+ end
57
+
58
+
59
+ # # valid data (Berners-Lee, Tim)
60
+ # loc_iris = ['http://id.loc.gov/authorities/names/no99010609', 'http://id.loc.gov/authorities/names/no99010609/']
61
+ # viaf_iri = 'http://viaf.org/viaf/85312226'
62
+ # # valid data (Knuth, Donald Ervin)
63
+ # # loc_iris = ['http://id.loc.gov/authorities/names/n79135509', 'http://id.loc.gov/authorities/names/n79135509/']
64
+ # # viaf_iri = 'http://viaf.org/viaf/7466303'
65
+ # loc_iris.each do |iri|
66
+ # id = Addressable::URI.parse(iri).basename
67
+ # loc = Marc2LinkedData::Loc.new iri
68
+ # end
69
+ # # invalid data
70
+ # loc = Marc2LinkedData.Loc.new 'This is not an LOC IRI' rescue nil
71
+ # raise "Loc.initialize failed to raise error." unless loc.nil?
@@ -0,0 +1,53 @@
1
+ require "spec_helper"
2
+
3
+ module Marc2LinkedData
4
+
5
+ describe Resource do
6
+
7
+ before :all do
8
+ @auth_id = 'no99010609'
9
+ @auth_url = 'http://id.loc.gov/authorities/names/no99010609'
10
+ @auth = Resource.new @auth_url
11
+ end
12
+
13
+ before :each do
14
+ end
15
+
16
+ describe '#initialize' do
17
+ it 'should not raise error for a valid iri' do
18
+ # iri_valid = 'http://id.loc.gov/authorities/names/no99010609'
19
+ expect{Resource.new @auth_url}.not_to raise_error
20
+ end
21
+ it 'should raise error for an invalid iri' do
22
+ expect{Resource.new 'This is not a URL'}.to raise_error(RuntimeError)
23
+ end
24
+ end
25
+
26
+ describe '#id' do
27
+ it 'should equal the url basename' do
28
+ expect(@auth.id).to eq(@auth_id)
29
+ end
30
+ end
31
+
32
+ describe '#iri' do
33
+ it 'should equal the auth url' do
34
+ expect(@auth.iri.to_s).to eq(@auth_url)
35
+ end
36
+ it 'should be an instance of Addressable::URI' do
37
+ expect(@auth.iri.instance_of? Addressable::URI).to be_truthy
38
+ end
39
+ end
40
+
41
+ after :each do
42
+ end
43
+
44
+ after :all do
45
+ @auth_url = nil
46
+ @auth = nil
47
+ end
48
+
49
+ end
50
+
51
+ end
52
+
53
+
@@ -0,0 +1,53 @@
1
+ require 'spec_helper'
2
+
3
+ module Marc2LinkedData
4
+
5
+ describe Viaf do
6
+
7
+ before :all do
8
+ # valid data (Knuth, Donald Ervin)
9
+ @viaf_id = '7466303'
10
+ @viaf_url = 'http://viaf.org/viaf/7466303'
11
+ @viaf = Viaf.new @viaf_url
12
+ @isni_url = 'http://www.isni.org/isni/000000012119421X'
13
+ end
14
+
15
+ before :each do
16
+ end
17
+
18
+ describe '#rdf' do
19
+ it 'should be an instance of RDF::Graph' do
20
+ expect(@viaf.rdf.instance_of? RDF::Graph).to be_truthy
21
+ end
22
+ end
23
+
24
+ describe '#rdf_valid?' do
25
+ it 'should be true' do
26
+ expect(@viaf.rdf_valid?).to be_truthy
27
+ end
28
+ end
29
+
30
+ describe '#same_as_array' do
31
+ it 'should be populated' do
32
+ expect(@viaf.same_as_array.empty?).to be_falsey
33
+ end
34
+ end
35
+
36
+ describe '#get_isni' do
37
+ it 'should equal the isni url' do
38
+ expect(@viaf.get_isni).to eq(@isni_url)
39
+ end
40
+ end
41
+
42
+ after :each do
43
+ end
44
+
45
+ after :all do
46
+ @viaf_id = nil
47
+ @viaf_url = nil
48
+ @isni_url = nil
49
+ @viaf = nil
50
+ end
51
+ end
52
+ end
53
+
@@ -0,0 +1,39 @@
1
+ require 'spec_helper'
2
+
3
+ describe Marc2LinkedData do
4
+
5
+ describe "#configure" do
6
+ before :each do
7
+ Marc2LinkedData.configure do |config|
8
+ config.debug = true
9
+ end
10
+ end
11
+ it "returns a hash of options" do
12
+ config = Marc2LinkedData.configuration
13
+ expect(config).to be_instance_of Marc2LinkedData::Configuration
14
+ expect(config.debug).to be_truthy
15
+ end
16
+ after :each do
17
+ Marc2LinkedData.reset
18
+ end
19
+ end
20
+
21
+ describe ".reset" do
22
+ before :each do
23
+ Marc2LinkedData.configure do |config|
24
+ config.debug = true
25
+ end
26
+ end
27
+ it "resets the configuration" do
28
+ Marc2LinkedData.reset
29
+ config = Marc2LinkedData.configuration
30
+ expect(config).to be_instance_of Marc2LinkedData::Configuration
31
+ expect(config.debug).to be_falsey
32
+ end
33
+ after :each do
34
+ Marc2LinkedData.reset
35
+ end
36
+ end
37
+
38
+ end
39
+