marc2linkeddata 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,131 @@
1
+
2
+ module Marc2LinkedData
3
+
4
+ class Resource
5
+
6
+ attr_accessor :iri
7
+ # attr_reader :config
8
+
9
+ @@config = nil
10
+
11
+ def initialize(uri=nil)
12
+ @@config ||= Marc2LinkedData.configuration
13
+ if uri =~ /\A#{URI::regexp}\z/
14
+ uri = Addressable::URI.parse(uri.to_s) rescue nil
15
+ end
16
+ # Strip off any trailing '/'
17
+ if uri.to_s.end_with? '/'
18
+ uri = uri.to_s.gsub(/\/$/,'')
19
+ uri = Addressable::URI.parse(uri.to_s) rescue nil
20
+ end
21
+ raise 'invalid uri' unless uri.instance_of? Addressable::URI
22
+ @iri = uri
23
+ end
24
+
25
+ def id
26
+ @iri.basename
27
+ end
28
+
29
+ # This method is often overloaded in subclasses because
30
+ # RDF services use variations in the URL 'extension' patterns; e.g.
31
+ # see Loc#rdf and Viaf#rdf
32
+ def rdf
33
+ return @rdf unless @rdf.nil?
34
+ # TODO: try to retrieve the rdf from a local triple store
35
+ # TODO: if local triple store fails, try remote source(s)
36
+ # TODO: if retrieved from a remote source, save the rdf to a local triple store
37
+ @rdf = get_rdf(@iri.to_s)
38
+ end
39
+
40
+ def get_rdf(uri4rdf)
41
+ tries = 0
42
+ begin
43
+ tries += 1
44
+ @rdf = RDF::Graph.load(uri4rdf)
45
+ rescue
46
+ retry if tries <= 2
47
+ binding.pry if @@config.debug
48
+ nil
49
+ end
50
+ end
51
+
52
+ def rdf_uri
53
+ RDF::URI.new(@iri)
54
+ end
55
+
56
+ def rdf_valid?
57
+ iri_types.length > 0
58
+ end
59
+
60
+ def iri_types
61
+ q = SPARQL.parse("SELECT * WHERE { <#{@iri}> a ?o }")
62
+ rdf.query(q)
63
+ end
64
+
65
+ def rdf_find_object(id)
66
+ # TODO: convert this to an RDF.rb graph query?
67
+ return nil unless rdf_valid?
68
+ rdf.each_statement do |s|
69
+ if s.subject == @iri.to_s
70
+ return s.object if s.object.to_s =~ Regexp.new(id, Regexp::IGNORECASE)
71
+ end
72
+ end
73
+ nil
74
+ end
75
+
76
+ def rdf_find_subject(id)
77
+ # TODO: convert this to an RDF.rb graph query?
78
+ return nil unless rdf_valid?
79
+ rdf.each_statement do |s|
80
+ return s.subject if s.subject.to_s =~ Regexp.new(id, Regexp::IGNORECASE)
81
+ end
82
+ nil
83
+ end
84
+
85
+ def resolve_external_auth(url)
86
+ begin
87
+ res = Marc2LinkedData.http_head_request(url)
88
+ case res.code
89
+ when '200'
90
+ @@config.logger.debug "Mapped #{@iri}\t-> #{url}"
91
+ return url
92
+ when '301'
93
+ #301 Moved Permanently
94
+ url = res['location']
95
+ @@config.logger.debug "Mapped #{@iri}\t-> #{url}"
96
+ return url
97
+ when '302','303'
98
+ #302 Moved Temporarily
99
+ #303 See Other
100
+ # Use the current URL, most get requests will follow a 302 or 303
101
+ @@config.logger.debug "Mapped #{@iri}\t-> #{url}"
102
+ return url
103
+ when '404'
104
+ @@config.logger.warn "#{@iri}\t// #{url}"
105
+ return nil
106
+ else
107
+ # WTF
108
+ binding.pry if @@config.debug
109
+ @@config.logger.error "unknown http response code (#{res.code}) for #{@iri}"
110
+ return nil
111
+ end
112
+ rescue
113
+ nil
114
+ end
115
+ end
116
+
117
+ def same_as
118
+ same_as_url = 'http://sameas.org/rdf?uri=' + URI.encode(@iri.to_s)
119
+ RDF::Graph.load(same_as_url)
120
+ end
121
+
122
+ def same_as_array
123
+ q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://www.w3.org/2002/07/owl#sameAs> ?o }")
124
+ same_as.query(q).collect {|s| s[:o] }
125
+ end
126
+
127
+ end
128
+
129
+ end
130
+
131
+
@@ -0,0 +1,55 @@
1
+ require 'base64'
2
+
3
+ module Marc2LinkedData
4
+
5
+ class Sparql
6
+
7
+ # https://github.com/ruby-rdf/sparql-client
8
+
9
+ # attr_reader :config
10
+ attr_reader :dbpedia
11
+ attr_reader :local_loc
12
+
13
+ def initialize
14
+ config = Marc2LinkedData.configuration
15
+ @dbpedia = SPARQL::Client.new('http://dbpedia.org/sparql')
16
+ # local LOC SPARQL client
17
+ auth_code = Base64.encode64("#{config.local_loc_user}:#{config.local_loc_pass}").chomp
18
+ headers = {
19
+ 'Accept' => 'application/sparql-results+json',
20
+ 'Authorization' => "Basic #{auth_code}",
21
+ }
22
+ @local_loc = SPARQL::Client.new(config.local_loc_url, {headers: headers} )
23
+ end
24
+
25
+ def local_loc_auth(auth_uri)
26
+ result = local_loc.query("SELECT * WHERE { <#{auth_uri}> ?p ?o }")
27
+ result.each_solution {|s| puts s.inspect }
28
+ binding.pry
29
+ end
30
+
31
+ # def sparql_dbpedia(query)
32
+ # dbpedia.query(query)
33
+ # # result = dbpedia.query('ASK WHERE { ?s ?p ?o }')
34
+ # # puts result.inspect #=> true or false
35
+ # # result = dbpedia.query('SELECT * WHERE { ?s ?p ?o } LIMIT 10')
36
+ # # result.each_solution {|s| puts s.inspect }
37
+ # end
38
+
39
+
40
+ # For reference, note that there is an allegrograph ruby gem, see
41
+ # https://github.com/emk/rdf-agraph
42
+
43
+ # For reference, note that there is a ruby gem for RDF on mongodb, see
44
+ # https://rubygems.org/gems/rdf-mongo
45
+
46
+
47
+
48
+
49
+
50
+
51
+ end
52
+
53
+ end
54
+
55
+
@@ -0,0 +1,48 @@
1
+ require_relative 'resource'
2
+
3
+ module Marc2LinkedData
4
+
5
+ class Viaf < Resource
6
+
7
+ PREFIX = 'http://viaf.org/viaf/'
8
+
9
+ # def id
10
+ # return nil if iri.nil?
11
+ # iri.path.gsub('viaf/','').gsub('/','')
12
+ # end
13
+
14
+ def rdf
15
+ return nil if iri.nil?
16
+ return @rdf unless @rdf.nil?
17
+ uri4rdf = iri.to_s + '/rdf.xml'
18
+ @rdf = get_rdf(uri4rdf)
19
+ end
20
+
21
+ def get_isni
22
+ return nil if iri.nil?
23
+ return nil unless rdf_valid?
24
+ return @isni_iri unless @isni_iri.nil?
25
+ # Try to get ISNI source for VIAF
26
+ # e.g. http://viaf.org/viaf/sourceID/ISNI%7C0000000109311081#skos:Concept
27
+ isni_iri = rdf_find_subject 'isni'
28
+ isni_src = URI.parse(isni_iri.to_s)
29
+ isni_iri = isni_src.path.sub('/viaf/sourceID/ISNI%7C','http://www.isni.org/isni/')
30
+ @isni_iri = resolve_external_auth(isni_iri)
31
+ end
32
+
33
+ def given_names
34
+ q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/givenName> ?o }")
35
+ names = rdf.query(q).collect {|s| s[:o].to_s}
36
+ names.to_set.to_a
37
+ end
38
+
39
+ def family_names
40
+ q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://schema.org/familyName> ?o }")
41
+ names = rdf.query(q).collect {|s| s[:o].to_s}
42
+ names.to_set.to_a
43
+ end
44
+
45
+ end
46
+
47
+ end
48
+
@@ -0,0 +1,64 @@
1
+ require 'dotenv'
2
+ Dotenv.load
3
+
4
+ require 'addressable/uri'
5
+ require 'json'
6
+ require 'linkeddata'
7
+ require 'marc'
8
+ require 'rdf/4store'
9
+ require 'ruby-progressbar'
10
+
11
+ require 'pry'
12
+ require 'pry-doc'
13
+
14
+ require_relative 'marc2linkeddata/configuration'
15
+
16
+ require_relative 'marc2linkeddata/resource'
17
+ require_relative 'marc2linkeddata/isni'
18
+ require_relative 'marc2linkeddata/lib_auth'
19
+ require_relative 'marc2linkeddata/loc'
20
+ require_relative 'marc2linkeddata/viaf'
21
+
22
+ require_relative 'marc2linkeddata/oclc_resource'
23
+ require_relative 'marc2linkeddata/oclc_identity'
24
+ require_relative 'marc2linkeddata/oclc_creative_work'
25
+ require_relative 'marc2linkeddata/oclc_work'
26
+
27
+ require_relative 'marc2linkeddata/parseMarcAuthority'
28
+ #require_relative 'marc2linkeddata/parseMarcCatalog'
29
+ require_relative 'marc2linkeddata/sparql'
30
+
31
+
32
+ module Marc2LinkedData
33
+
34
+ # configuration at the module level, see
35
+ # http://brandonhilkert.com/blog/ruby-gem-configuration-patterns/
36
+
37
+ class << self
38
+ attr_writer :configuration
39
+ end
40
+
41
+ def self.configuration
42
+ @configuration ||= Configuration.new
43
+ end
44
+
45
+ def self.reset
46
+ @configuration = Configuration.new
47
+ end
48
+
49
+ def self.configure
50
+ yield(configuration)
51
+ end
52
+
53
+ def self.http_head_request(url)
54
+ uri = URI.parse(url)
55
+ Net::HTTP.start(uri.host, uri.port) {|http| req = Net::HTTP::Head.new(uri); http.request req }
56
+ end
57
+
58
+ def self.write_prefixes(file)
59
+ @configuration.prefixes.each_pair {|k,v| file.write "@prefix #{k}: <#{v}> .\n" }
60
+ file.write("\n\n")
61
+ end
62
+
63
+ end
64
+
@@ -0,0 +1,40 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib/', __FILE__)
3
+ $:.unshift lib unless $:.include?(lib)
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'marc2linkeddata'
7
+ s.version = '0.0.1'
8
+ s.licenses = ['Apache-2.0']
9
+
10
+ s.platform = Gem::Platform::RUBY
11
+ s.authors = ['Darren Weber',]
12
+ s.email = ['dlweber@stanford.edu']
13
+ s.summary = 'Convert Marc21 records to linked data, for use in SUL/DLSS projects'
14
+ s.description = 'Utilities for translation of Marc21 records to linked open data.'
15
+ s.homepage = 'https://github.com/darrenleeweber/marc2linkeddata'
16
+
17
+ s.required_rubygems_version = '>= 1.3.6'
18
+ s.required_ruby_version = '>= 2.1.0'
19
+
20
+ s.add_dependency 'addressable'
21
+ s.add_dependency 'linkeddata'
22
+ s.add_dependency 'marc'
23
+ s.add_dependency 'rdf-4store'
24
+ s.add_dependency 'ruby-progressbar'
25
+ s.add_dependency 'dotenv'
26
+
27
+ s.add_dependency 'hiredis'
28
+ s.add_dependency 'redis'
29
+
30
+ s.add_development_dependency 'pry'
31
+ s.add_development_dependency 'pry-doc'
32
+ s.add_development_dependency 'rspec'
33
+
34
+ s.files = `git ls-files`.split($/)
35
+ dev_files = ['.gitignore','bin/setup.sh','bin/test.sh']
36
+ dev_files.each {|f| s.files.delete f }
37
+
38
+ s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
39
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
40
+ end
@@ -0,0 +1,84 @@
1
+ require "spec_helper"
2
+
3
+ module Marc2LinkedData
4
+
5
+ describe Configuration do
6
+
7
+ describe '#debug' do
8
+ it 'default value is false' do
9
+ ENV['DEBUG'] = nil
10
+ config = Configuration.new
11
+ expect(config.debug).to be_falsey
12
+ end
13
+ end
14
+
15
+ describe '#debug=' do
16
+ it 'can set value' do
17
+ config = Configuration.new
18
+ config.debug = true
19
+ expect(config.debug).to be_truthy
20
+ end
21
+ end
22
+
23
+ describe '#redis4marc' do
24
+ it 'default value is false' do
25
+ config = Configuration.new
26
+ expect(config.redis4marc).to be_falsey
27
+ end
28
+ end
29
+
30
+ describe '#redis4marc=' do
31
+ it 'can set value' do
32
+ config = Configuration.new
33
+ config.redis4marc = true
34
+ expect(config.redis4marc).to be_truthy
35
+ end
36
+ end
37
+
38
+ describe '#redis_read' do
39
+ it 'default value is false' do
40
+ config = Configuration.new
41
+ expect(config.redis_read).to be_falsey
42
+ end
43
+ end
44
+
45
+ describe '#redis_read=' do
46
+ it 'can set value' do
47
+ config = Configuration.new
48
+ config.redis_read = true
49
+ expect(config.redis_read).to be_truthy
50
+ end
51
+ end
52
+
53
+ describe '#redis_write' do
54
+ it 'default value is false' do
55
+ config = Configuration.new
56
+ expect(config.redis_write).to be_falsey
57
+ end
58
+ end
59
+
60
+ describe '#redis_write=' do
61
+ it 'can set value' do
62
+ config = Configuration.new
63
+ config.redis_write = true
64
+ expect(config.redis_write).to be_truthy
65
+ end
66
+ end
67
+
68
+ describe '#prefixes' do
69
+ it 'default value is a hash' do
70
+ config = Configuration.new
71
+ expect(config.prefixes).to be_instance_of Hash
72
+ end
73
+ end
74
+
75
+ describe '#prefixes=' do
76
+ it 'can set value to hash' do
77
+ config = Configuration.new
78
+ config.prefixes = {}
79
+ expect(config.prefixes).to be_empty
80
+ end
81
+ end
82
+
83
+ end
84
+ end
@@ -0,0 +1,71 @@
1
+ require "spec_helper"
2
+
3
+ module Marc2LinkedData
4
+
5
+ describe Loc do
6
+
7
+ before :all do
8
+ # loc_urls = ['http://id.loc.gov/authorities/names/no99010609', 'http://id.loc.gov/authorities/names/no99010609/']
9
+ @loc_id = 'no99010609'
10
+ @loc_url = 'http://id.loc.gov/authorities/names/no99010609'
11
+ @loc = Loc.new @loc_url
12
+ @viaf_url = 'http://viaf.org/viaf/85312226'
13
+ end
14
+
15
+ before :each do
16
+ end
17
+
18
+ describe '#rdf' do
19
+ it 'should be an instance of RDF::Graph' do
20
+ expect(@loc.rdf.instance_of? RDF::Graph).to be_truthy
21
+ end
22
+ end
23
+
24
+ describe '#rdf_valid?' do
25
+ it 'should be true' do
26
+ expect(@loc.rdf_valid?).to be_truthy
27
+ end
28
+ end
29
+
30
+ describe '#same_as_array' do
31
+ it 'should be populated' do
32
+ expect(@loc.same_as_array.empty?).to be_falsey
33
+ end
34
+ end
35
+
36
+ describe '#get_viaf' do
37
+ it 'should equal the viaf url' do
38
+ expect(@loc.get_viaf).to eq(@viaf_url)
39
+ end
40
+ end
41
+
42
+ # TODO: add tests for different types of records, e.g.
43
+ # authorities:
44
+ # person, organisation, conference, etc.
45
+
46
+
47
+ after :each do
48
+ end
49
+
50
+ after :all do
51
+ @loc_id = nil
52
+ @loc_url = nil
53
+ @loc = nil
54
+ end
55
+ end
56
+ end
57
+
58
+
59
+ # # valid data (Berners-Lee, Tim)
60
+ # loc_iris = ['http://id.loc.gov/authorities/names/no99010609', 'http://id.loc.gov/authorities/names/no99010609/']
61
+ # viaf_iri = 'http://viaf.org/viaf/85312226'
62
+ # # valid data (Knuth, Donald Ervin)
63
+ # # loc_iris = ['http://id.loc.gov/authorities/names/n79135509', 'http://id.loc.gov/authorities/names/n79135509/']
64
+ # # viaf_iri = 'http://viaf.org/viaf/7466303'
65
+ # loc_iris.each do |iri|
66
+ # id = Addressable::URI.parse(iri).basename
67
+ # loc = Marc2LinkedData::Loc.new iri
68
+ # end
69
+ # # invalid data
70
+ # loc = Marc2LinkedData.Loc.new 'This is not an LOC IRI' rescue nil
71
+ # raise "Loc.initialize failed to raise error." unless loc.nil?
@@ -0,0 +1,53 @@
1
+ require "spec_helper"
2
+
3
+ module Marc2LinkedData
4
+
5
+ describe Resource do
6
+
7
+ before :all do
8
+ @auth_id = 'no99010609'
9
+ @auth_url = 'http://id.loc.gov/authorities/names/no99010609'
10
+ @auth = Resource.new @auth_url
11
+ end
12
+
13
+ before :each do
14
+ end
15
+
16
+ describe '#initialize' do
17
+ it 'should not raise error for a valid iri' do
18
+ # iri_valid = 'http://id.loc.gov/authorities/names/no99010609'
19
+ expect{Resource.new @auth_url}.not_to raise_error
20
+ end
21
+ it 'should raise error for an invalid iri' do
22
+ expect{Resource.new 'This is not a URL'}.to raise_error(RuntimeError)
23
+ end
24
+ end
25
+
26
+ describe '#id' do
27
+ it 'should equal the url basename' do
28
+ expect(@auth.id).to eq(@auth_id)
29
+ end
30
+ end
31
+
32
+ describe '#iri' do
33
+ it 'should equal the auth url' do
34
+ expect(@auth.iri.to_s).to eq(@auth_url)
35
+ end
36
+ it 'should be an instance of Addressable::URI' do
37
+ expect(@auth.iri.instance_of? Addressable::URI).to be_truthy
38
+ end
39
+ end
40
+
41
+ after :each do
42
+ end
43
+
44
+ after :all do
45
+ @auth_url = nil
46
+ @auth = nil
47
+ end
48
+
49
+ end
50
+
51
+ end
52
+
53
+
@@ -0,0 +1,53 @@
1
+ require 'spec_helper'
2
+
3
+ module Marc2LinkedData
4
+
5
+ describe Viaf do
6
+
7
+ before :all do
8
+ # valid data (Knuth, Donald Ervin)
9
+ @viaf_id = '7466303'
10
+ @viaf_url = 'http://viaf.org/viaf/7466303'
11
+ @viaf = Viaf.new @viaf_url
12
+ @isni_url = 'http://www.isni.org/isni/000000012119421X'
13
+ end
14
+
15
+ before :each do
16
+ end
17
+
18
+ describe '#rdf' do
19
+ it 'should be an instance of RDF::Graph' do
20
+ expect(@viaf.rdf.instance_of? RDF::Graph).to be_truthy
21
+ end
22
+ end
23
+
24
+ describe '#rdf_valid?' do
25
+ it 'should be true' do
26
+ expect(@viaf.rdf_valid?).to be_truthy
27
+ end
28
+ end
29
+
30
+ describe '#same_as_array' do
31
+ it 'should be populated' do
32
+ expect(@viaf.same_as_array.empty?).to be_falsey
33
+ end
34
+ end
35
+
36
+ describe '#get_isni' do
37
+ it 'should equal the isni url' do
38
+ expect(@viaf.get_isni).to eq(@isni_url)
39
+ end
40
+ end
41
+
42
+ after :each do
43
+ end
44
+
45
+ after :all do
46
+ @viaf_id = nil
47
+ @viaf_url = nil
48
+ @isni_url = nil
49
+ @viaf = nil
50
+ end
51
+ end
52
+ end
53
+
@@ -0,0 +1,39 @@
1
+ require 'spec_helper'
2
+
3
+ describe Marc2LinkedData do
4
+
5
+ describe "#configure" do
6
+ before :each do
7
+ Marc2LinkedData.configure do |config|
8
+ config.debug = true
9
+ end
10
+ end
11
+ it "returns a hash of options" do
12
+ config = Marc2LinkedData.configuration
13
+ expect(config).to be_instance_of Marc2LinkedData::Configuration
14
+ expect(config.debug).to be_truthy
15
+ end
16
+ after :each do
17
+ Marc2LinkedData.reset
18
+ end
19
+ end
20
+
21
+ describe ".reset" do
22
+ before :each do
23
+ Marc2LinkedData.configure do |config|
24
+ config.debug = true
25
+ end
26
+ end
27
+ it "resets the configuration" do
28
+ Marc2LinkedData.reset
29
+ config = Marc2LinkedData.configuration
30
+ expect(config).to be_instance_of Marc2LinkedData::Configuration
31
+ expect(config.debug).to be_falsey
32
+ end
33
+ after :each do
34
+ Marc2LinkedData.reset
35
+ end
36
+ end
37
+
38
+ end
39
+