cap-vivo-mapper 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.env_example +37 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +13 -0
- data/README.md +110 -0
- data/Rakefile +6 -0
- data/bin/cap2vivo +67 -0
- data/cap-vivo-mapper.gemspec +51 -0
- data/lib/cap.rb +32 -0
- data/lib/cap/client.rb +31 -0
- data/lib/cap/client/cap_client.rb +336 -0
- data/lib/cap/client/configuration.rb +55 -0
- data/lib/cap/configuration.rb +93 -0
- data/lib/cap/vivo.rb +34 -0
- data/lib/cap/vivo/configuration.rb +53 -0
- data/lib/cap/vivo/mapper.rb +123 -0
- data/lib/cap/vivo/version.rb +5 -0
- data/lib/cap/vivo/vivo-isf-public-1.6.owl +10220 -0
- metadata +302 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e938fd82abdc61cc4cf0a92494f51fc7e74d485d
|
4
|
+
data.tar.gz: 1887b0969c45d7e05b394f1da3b518f6f9bc9c76
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1491afbafa6105146cf1c556038062e9f6a7ec09e82db208ee25ad1d0be320748b1cc7ce8682b271558fe2d274a4b4ca01d145fa0ba47297d47345e74965d8e0
|
7
|
+
data.tar.gz: fddf639dfebae49a33d939c2d8e9deee62605985a5b2e6a25fc1066ffc94c6a6cb61bad1765cf5225174032ffe9ef70bcf840999feb217bb23c5e503acee5fb3
|
data/.env_example
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# https://github.com/bkeepers/dotenv is used for
|
2
|
+
# default configuration options. The values in
|
3
|
+
# this file do not replace existing values in
|
4
|
+
# the shell ENV.
|
5
|
+
|
6
|
+
# Uncomment and set values as required. See used settings in
|
7
|
+
# lib/*/configuration.rb
|
8
|
+
|
9
|
+
export DEBUG=false
|
10
|
+
|
11
|
+
export CAP_VIVO_LOG_FILE='log/cap_vivo_mapper.log'
|
12
|
+
|
13
|
+
# false = convert all CAP profiles into VIVO linked data, replacing any
|
14
|
+
# existing VIVO records
|
15
|
+
# true = do not replace existing VIVO records, only convert CAP profiles
|
16
|
+
# that are not already VIVO records
|
17
|
+
export CAP_VIVO_REPLACE=false
|
18
|
+
|
19
|
+
# Configure the client service
|
20
|
+
export CAP_API_URL='https://cap.example.com'
|
21
|
+
export CAP_API_ORGS='/api/cap/v1/orgs'
|
22
|
+
export CAP_API_PROFILES='/api/profiles/v1'
|
23
|
+
export CAP_API_SCHEMA='/api/cap/v1/schemas'
|
24
|
+
export CAP_API_SEARCH='/api/cap/v1/search'
|
25
|
+
export CAP_API_SEARCH_AC='/api/cap/v1/search/autocomplete'
|
26
|
+
export CAP_API_SEARCH_KW='/api/cap/v1/search/keyword'
|
27
|
+
export CAP_API_LOG_FILE='log/cap_vivo_mapper.log'
|
28
|
+
|
29
|
+
# Parameters for client authentication
|
30
|
+
export CAP_TOKEN_URI='https://authz.example.com/oauth/token'
|
31
|
+
export CAP_TOKEN_USER=user
|
32
|
+
export CAP_TOKEN_PASS=pass
|
33
|
+
export CAP_TOKEN_ACCESS=xyz.etc
|
34
|
+
|
35
|
+
# Parameters for CAP and VIVO data persistence
|
36
|
+
export CAP_REPO_4STORE='http://localhost:9000'
|
37
|
+
export CAP_REPO_MONGO='mongodb://127.0.0.1:27017/cap'
|
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright 2015 The Board of Trustees of the Leland Stanford Junior University.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
# Cap::Vivo::Mapper
|
2
|
+
|
3
|
+
This utility maps Stanford CAP profiles to VIVO.
|
4
|
+
|
5
|
+
## Initial Objectives
|
6
|
+
|
7
|
+
* Identification of how CAP overlaps with the basics of the VIVO-ISF model
|
8
|
+
* specifically as relates to People and their relationships
|
9
|
+
* e.g. the LODE and eagle-i views, plus docs and examples on the wiki
|
10
|
+
* A test case transformation
|
11
|
+
* A document mapping CAP person data to VIVO-ISF
|
12
|
+
* A json transform utility to implement the mapping
|
13
|
+
* Retrieving CAP profile data from the CAP API
|
14
|
+
* Also investigate CAP publication data
|
15
|
+
* consider mapping to both VIVO-ISF and simple BibFrame
|
16
|
+
|
17
|
+
### Stanford CAP Resources
|
18
|
+
|
19
|
+
- https://cap.stanford.edu/cap-api/console
|
20
|
+
|
21
|
+
### VIVO Resources
|
22
|
+
|
23
|
+
- https://wiki.duraspace.org/display/VIVO/VIVO
|
24
|
+
- https://wiki.duraspace.org/display/VIVO/Major+concepts+in+VIVO+to+get+you+started
|
25
|
+
- https://wiki.duraspace.org/display/VIVO/VIVO-ISF+Ontology
|
26
|
+
- http://www.vivoweb.org/download
|
27
|
+
|
28
|
+
## Installation
|
29
|
+
|
30
|
+
Add this line to your application's Gemfile:
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
gem 'cap-vivo-mapper'
|
34
|
+
```
|
35
|
+
|
36
|
+
And then execute:
|
37
|
+
|
38
|
+
$ bundle
|
39
|
+
|
40
|
+
Or install it yourself as:
|
41
|
+
|
42
|
+
$ gem install cap-vivo-mapper
|
43
|
+
|
44
|
+
## Usage
|
45
|
+
|
46
|
+
### Setup
|
47
|
+
|
48
|
+
#### MongoDB
|
49
|
+
|
50
|
+
```sh
|
51
|
+
# The following worked on an Ubuntu desktop system
|
52
|
+
sudo apt-get install mongodb
|
53
|
+
```
|
54
|
+
|
55
|
+
#### 4store
|
56
|
+
|
57
|
+
```sh
|
58
|
+
# The following worked on an Ubuntu desktop system
|
59
|
+
sudo apt-get install 4store
|
60
|
+
sudo 4store status
|
61
|
+
sudo service 4store stop
|
62
|
+
sudo service 4store status
|
63
|
+
# Only setup the backend once (it erases existing data)
|
64
|
+
sudo 4s-backend-setup cap_vivo
|
65
|
+
sudo 4s-backend cap_vivo
|
66
|
+
sudo 4s-httpd -h # describes the options used below
|
67
|
+
sudo 4s-httpd -p 9000 -U -s -1 cap_vivo
|
68
|
+
```
|
69
|
+
|
70
|
+
4store should be running a SPARQL server on the `cap_vivo` knowledge base; take a look at http://localhost:9000/status/.
|
71
|
+
|
72
|
+
### Configure and Run Conversion
|
73
|
+
|
74
|
+
Use the example configuration in
|
75
|
+
https://github.com/sul-dlss/cap-vivo-mapper/blob/master/.env_example
|
76
|
+
|
77
|
+
```sh
|
78
|
+
mkdir -p ~/tmp/cap_vivo/log
|
79
|
+
cd ~/tmp/cap_vivo
|
80
|
+
project='https://raw.githubusercontent.com/sul-dlss/cap-vivo-mapper'
|
81
|
+
wget ${project}/master/.env_example
|
82
|
+
cp .env_example .env
|
83
|
+
vim .env # hopefully this file is self explanatory
|
84
|
+
# If it's not already installed, install the the gem.
|
85
|
+
gem install cap-vivo-mapper
|
86
|
+
# Run it overnight, unless you have a high bandwidth connection to the
|
87
|
+
# CAP API and a fast system. So, watch it for any immediate failures;
|
88
|
+
# if it's running, then leave it overnight. The expected runtime is on
|
89
|
+
# the order of hours.
|
90
|
+
cap2vivo
|
91
|
+
```
|
92
|
+
|
93
|
+
|
94
|
+
## Development
|
95
|
+
|
96
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
97
|
+
|
98
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
99
|
+
|
100
|
+
## Contributing
|
101
|
+
|
102
|
+
Bug reports and pull requests are welcome on GitHub at
|
103
|
+
https://github.com/sul-dlss/cap-vivo-mapper.
|
104
|
+
|
105
|
+
## License
|
106
|
+
|
107
|
+
Copyright 2015 The Board of Trustees of the Leland Stanford Junior University.
|
108
|
+
|
109
|
+
The gem is available as open source under the terms of the [Apache 2 License](http://www.apache.org/licenses/LICENSE-2.0).
|
110
|
+
|
data/Rakefile
ADDED
data/bin/cap2vivo
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'bundler/setup'
|
4
|
+
|
5
|
+
require 'pry'
|
6
|
+
require 'cap'
|
7
|
+
require 'cap/client'
|
8
|
+
require 'cap/vivo'
|
9
|
+
|
10
|
+
# init configs
|
11
|
+
config = Cap.configuration
|
12
|
+
|
13
|
+
# client will load profile data from a local repo; if it is empty, it
|
14
|
+
# can be populated from the CAP API using `client.get_profiles`. The
|
15
|
+
# profile data is available in `client.profiles`.
|
16
|
+
client = Cap::Client::Client.new
|
17
|
+
client.get_profiles
|
18
|
+
|
19
|
+
# Convert all the CAP API profiles into VIVO linked data and
|
20
|
+
# store it in the CAP_REPO_4STORE triple store.
|
21
|
+
ids = client.profile_ids
|
22
|
+
if config.rdf_replace
|
23
|
+
puts 'Replacing all VIVO linked data'
|
24
|
+
config.rdf_repo.clear
|
25
|
+
else
|
26
|
+
puts 'Updating VIVO linked data'
|
27
|
+
q = 'SELECT ?person WHERE { ?person a <http://xmlns.com/foaf/0.1/Person> }'
|
28
|
+
r = config.rdf_repo.client.query(q)
|
29
|
+
vivo_uris = r.map {|s| s[:person]}
|
30
|
+
vivo_ids = vivo_uris.map {|uri| uri.to_s.split('/').last.to_i }
|
31
|
+
ids.delete_if {|id| vivo_ids.include? id }
|
32
|
+
end
|
33
|
+
puts "Processing #{ids.length} CAP profiles"
|
34
|
+
count = 0
|
35
|
+
start = Time.now.to_i
|
36
|
+
ids.each do |id|
|
37
|
+
count += 1
|
38
|
+
profile = client.profile(id)
|
39
|
+
begin
|
40
|
+
mapper = Cap::Vivo::Mapper.new profile
|
41
|
+
mapper.create_vivo
|
42
|
+
mapper.save
|
43
|
+
if count % 25 == 0
|
44
|
+
delay = Time.now.to_i - start
|
45
|
+
printf "\nmapped %4d of %d (%4d sec)\n", count, ids.length, delay.to_s
|
46
|
+
else
|
47
|
+
if count == 1
|
48
|
+
printf "Mapping %4d profiles:\n", ids.length
|
49
|
+
end
|
50
|
+
printf '.'
|
51
|
+
end
|
52
|
+
rescue => e
|
53
|
+
delay = Time.now.to_i - start
|
54
|
+
puts
|
55
|
+
printf "FAILED profileId %d (%d of %d)\n", id, count, ids.length
|
56
|
+
puts e.message
|
57
|
+
puts
|
58
|
+
end
|
59
|
+
end
|
60
|
+
puts
|
61
|
+
|
62
|
+
at_exit {
|
63
|
+
config.cap_repo.close if config.cap_repo.is_a? Daybreak::DB
|
64
|
+
client = nil
|
65
|
+
config = nil
|
66
|
+
sleep 2 # give it a chance to close
|
67
|
+
}
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'cap/vivo/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "cap-vivo-mapper"
|
8
|
+
spec.version = Cap::Vivo::VERSION
|
9
|
+
spec.authors = ["Darren L. Weber, Ph.D."]
|
10
|
+
spec.email = ["darren.weber@stanford.edu"]
|
11
|
+
|
12
|
+
spec.summary = %q{This utility maps Stanford CAP profiles to VIVO.}
|
13
|
+
spec.description = %q{This utility maps Stanford CAP profiles to VIVO.}
|
14
|
+
spec.homepage = 'https://github.com/sul-dlss/cap-vivo-mapper'
|
15
|
+
spec.licenses = ['Apache-2.0']
|
16
|
+
|
17
|
+
spec.add_dependency 'dotenv'
|
18
|
+
|
19
|
+
spec.add_dependency 'daybreak' # memory mapped file db
|
20
|
+
spec.add_dependency 'mongo'
|
21
|
+
|
22
|
+
spec.add_dependency 'linkeddata'
|
23
|
+
spec.add_dependency 'rdf-4store'
|
24
|
+
|
25
|
+
spec.add_dependency 'faraday'
|
26
|
+
spec.add_dependency 'faraday_middleware'
|
27
|
+
|
28
|
+
# Use pry for console and debug config
|
29
|
+
spec.add_development_dependency 'pry'
|
30
|
+
spec.add_development_dependency 'pry-doc'
|
31
|
+
|
32
|
+
spec.add_development_dependency 'bundler', '~> 1.10'
|
33
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
34
|
+
spec.add_development_dependency 'rspec'
|
35
|
+
spec.add_development_dependency 'vcr'
|
36
|
+
spec.add_development_dependency 'webmock'
|
37
|
+
|
38
|
+
spec.add_development_dependency 'coveralls'
|
39
|
+
spec.add_development_dependency 'guard'
|
40
|
+
spec.add_development_dependency 'guard-ctags-bundler'
|
41
|
+
|
42
|
+
git_files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
43
|
+
bin_files = %w(bin/console bin/ctags.rb bin/setup bin/test.rb)
|
44
|
+
dot_files = %w(.gitignore .travis.yml log/.gitignore)
|
45
|
+
|
46
|
+
spec.files = git_files - (bin_files + dot_files)
|
47
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
48
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
49
|
+
spec.require_paths = ["lib"]
|
50
|
+
|
51
|
+
end
|
data/lib/cap.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'dotenv'
|
2
|
+
Dotenv.load
|
3
|
+
|
4
|
+
require 'linkeddata'
|
5
|
+
require 'rdf/4store'
|
6
|
+
require 'daybreak'
|
7
|
+
require 'mongo'
|
8
|
+
require_relative 'cap/configuration'
|
9
|
+
|
10
|
+
# This is a utility working with Stanford CAP and VIVO data mappings.
|
11
|
+
# https://github.com/sul-dlss/cap-vivo-mapper
|
12
|
+
module Cap
|
13
|
+
|
14
|
+
# Configuration at the module level, see
|
15
|
+
# http://brandonhilkert.com/blog/ruby-gem-configuration-patterns/
|
16
|
+
class << self
|
17
|
+
attr_writer :configuration
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.configuration
|
21
|
+
@configuration ||= Configuration.new
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.reset
|
25
|
+
@configuration = Configuration.new
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.configure
|
29
|
+
yield(configuration)
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
data/lib/cap/client.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'dotenv'
|
2
|
+
Dotenv.load
|
3
|
+
|
4
|
+
require_relative 'client/configuration'
|
5
|
+
require_relative 'client/cap_client'
|
6
|
+
|
7
|
+
# This is a utility working with Stanford CAP.
|
8
|
+
# https://github.com/sul-dlss/cap-vivo-mapper
|
9
|
+
module Cap
|
10
|
+
module Client
|
11
|
+
|
12
|
+
# Configuration at the module level, see
|
13
|
+
# http://brandonhilkert.com/blog/ruby-gem-configuration-patterns/
|
14
|
+
class << self
|
15
|
+
attr_writer :configuration
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.configuration
|
19
|
+
@configuration ||= Configuration.new
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.reset
|
23
|
+
@configuration = Configuration.new
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.configure
|
27
|
+
yield(configuration)
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,336 @@
|
|
1
|
+
module Cap
|
2
|
+
module Client
|
3
|
+
|
4
|
+
require 'faraday'
|
5
|
+
require 'faraday_middleware'
|
6
|
+
|
7
|
+
# CAP Public Website https://profiles.stanford.edu
|
8
|
+
# Profiles API https://api.stanford.edu/profiles/v1
|
9
|
+
# Orgs API https://api.stanford.edu/cap/v1/orgs
|
10
|
+
# Search API https://api.stanford.edu/cap/v1/search
|
11
|
+
# Developer's API https://cap.stanford.edu/cap-api/console
|
12
|
+
|
13
|
+
class Client
|
14
|
+
|
15
|
+
JSON_CONTENT = 'application/json'
|
16
|
+
BSON_MAX = 16777216
|
17
|
+
|
18
|
+
attr_reader :config
|
19
|
+
attr_reader :cap_api
|
20
|
+
attr_reader :profiles
|
21
|
+
|
22
|
+
# Initialize a new client
|
23
|
+
def initialize
|
24
|
+
@config = Cap::Client.configuration
|
25
|
+
if Cap.configuration.cap_repo.is_a? Daybreak::DB
|
26
|
+
@profiles = Cap.configuration.cap_repo
|
27
|
+
elsif Cap.configuration.cap_repo.is_a? Mongo::Client
|
28
|
+
@profiles = Cap.configuration.cap_repo[:profiles]
|
29
|
+
@presentations = Cap.configuration.cap_repo[:presentations]
|
30
|
+
@publications = Cap.configuration.cap_repo[:publications]
|
31
|
+
@processed = Cap.configuration.cap_repo[:processed]
|
32
|
+
end
|
33
|
+
# CAP API
|
34
|
+
@cap_uri = 'https://api.stanford.edu'
|
35
|
+
@cap_profiles = '/profiles/v1'
|
36
|
+
@cap_orgs = '/cap/v1/orgs'
|
37
|
+
@cap_search = '/cap/v1/search'
|
38
|
+
@cap_api = Faraday.new(url: @cap_uri) do |f|
|
39
|
+
# f.use FaradayMiddleware::FollowRedirects, limit: 3
|
40
|
+
# f.use Faraday::Response::RaiseError # raise exceptions on 40x, 50x
|
41
|
+
# f.request :logger, @config.logger
|
42
|
+
f.request :json
|
43
|
+
f.response :json, :content_type => JSON_CONTENT
|
44
|
+
f.adapter Faraday.default_adapter
|
45
|
+
end
|
46
|
+
@cap_api.options.timeout = 90
|
47
|
+
@cap_api.options.open_timeout = 10
|
48
|
+
@cap_api.headers.merge!(json_payloads)
|
49
|
+
# Authentication
|
50
|
+
auth_uri = 'https://authz.stanford.edu/oauth/token'
|
51
|
+
@auth = Faraday.new(url: auth_uri) do |f|
|
52
|
+
f.request :url_encoded
|
53
|
+
f.response :json, :content_type => JSON_CONTENT
|
54
|
+
f.adapter Faraday.default_adapter
|
55
|
+
end
|
56
|
+
@auth.options.timeout = 30
|
57
|
+
@auth.options.open_timeout = 10
|
58
|
+
@auth.headers.merge!(json_payloads)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Reset authentication
|
62
|
+
def authenticate!
|
63
|
+
@access_expiry = nil
|
64
|
+
authenticate
|
65
|
+
end
|
66
|
+
|
67
|
+
def authenticate
|
68
|
+
if @access_expiry.to_i < Time.now.to_i
|
69
|
+
@access_code = nil
|
70
|
+
@auth.headers.delete :Authorization
|
71
|
+
@cap_api.headers.delete :Authorization
|
72
|
+
end
|
73
|
+
@access_code || begin
|
74
|
+
return false if @config.token_user.empty? && @config.token_pass.empty?
|
75
|
+
client = "#{@config.token_user}:#{@config.token_pass}"
|
76
|
+
auth_code = 'Basic ' + Base64.strict_encode64(client)
|
77
|
+
@auth.headers.merge!({ Authorization: auth_code })
|
78
|
+
response = @auth.get "?grant_type=client_credentials"
|
79
|
+
return false unless response.status == 200
|
80
|
+
access = response.body
|
81
|
+
return false if access['access_token'].nil?
|
82
|
+
@access_code = "Bearer #{access['access_token']}"
|
83
|
+
@access_expiry = Time.now.to_i + access['expires_in'].to_i
|
84
|
+
@cap_api.headers[:Authorization] = @access_code
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Get profiles from CAP API and store into local repo
|
89
|
+
def get_profiles
|
90
|
+
begin
|
91
|
+
if authenticate
|
92
|
+
page = 1
|
93
|
+
pages = 0
|
94
|
+
total = 0
|
95
|
+
begin
|
96
|
+
repo_clean
|
97
|
+
while true
|
98
|
+
params = "?p=#{page}&ps=100"
|
99
|
+
response = @cap_api.get "#{@cap_profiles}#{params}"
|
100
|
+
if response.status == 200
|
101
|
+
data = response.body
|
102
|
+
if data['firstPage']
|
103
|
+
pages = data['totalPages']
|
104
|
+
total = data['totalCount']
|
105
|
+
puts "Retrieved #{page} of #{pages} pages (#{total} profiles)."
|
106
|
+
else
|
107
|
+
puts "Retrieved #{page} of #{pages} pages."
|
108
|
+
end
|
109
|
+
profiles = data['values']
|
110
|
+
if @profiles.is_a? Daybreak::DB
|
111
|
+
profiles.each do |profile|
|
112
|
+
id = profile["profileId"]
|
113
|
+
@profiles[id] = profile
|
114
|
+
end
|
115
|
+
@profiles.flush
|
116
|
+
elsif @profiles.is_a? Mongo::Collection
|
117
|
+
# split out the publication data to accommodate the
|
118
|
+
# 16Mb limit on mongodb docs.
|
119
|
+
pubs_fields = ['doiId', 'doiUrl', 'webOfScienceId', 'webOfScienceUrl']
|
120
|
+
profiles.each do |profile|
|
121
|
+
id = profile['profileId']
|
122
|
+
presentations = profile.delete('presentations') || []
|
123
|
+
presentations.each {|p| p.delete('detail')}
|
124
|
+
pres = {'profileId' => id, 'presentations' => presentations}
|
125
|
+
begin
|
126
|
+
@presentations.insert_one(pres)
|
127
|
+
rescue
|
128
|
+
msg = "Profile #{id} presentations failed to save."
|
129
|
+
@config.logger.error msg
|
130
|
+
end
|
131
|
+
publications = profile.delete('publications') || []
|
132
|
+
publications.each do |p|
|
133
|
+
p.keys {|k| p.delete(k) unless pubs_fields.include? k }
|
134
|
+
end
|
135
|
+
pub = {'profileId' => id, 'publications' => publications}
|
136
|
+
begin
|
137
|
+
@publications.insert_one(pub)
|
138
|
+
rescue
|
139
|
+
msg = "Profile #{id} publications failed to save."
|
140
|
+
@config.logger.error msg
|
141
|
+
end
|
142
|
+
begin
|
143
|
+
@profiles.insert_one(profile)
|
144
|
+
rescue
|
145
|
+
msg = "Profile #{id} failed to save."
|
146
|
+
@config.logger.error msg
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
page += 1
|
151
|
+
break if data['lastPage']
|
152
|
+
else
|
153
|
+
msg = "Failed to GET profiles page #{page}: #{response.status}"
|
154
|
+
@config.logger.error msg
|
155
|
+
puts msg
|
156
|
+
break
|
157
|
+
end
|
158
|
+
end
|
159
|
+
rescue => e
|
160
|
+
msg = e.message
|
161
|
+
binding.pry if @config.debug
|
162
|
+
@config.logger.error msg
|
163
|
+
ensure
|
164
|
+
repo_commit(total)
|
165
|
+
end
|
166
|
+
else
|
167
|
+
msg = "Failed to authenticate"
|
168
|
+
@config.logger.error msg
|
169
|
+
end
|
170
|
+
rescue => e
|
171
|
+
msg = e.message
|
172
|
+
binding.pry if @config.debug
|
173
|
+
@config.logger.error(msg)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# def update_profiles
|
178
|
+
# # profile['profileId']
|
179
|
+
# # => 42005
|
180
|
+
# # [13] pry(main)> profile['profileId'].class
|
181
|
+
# # => Fixnum
|
182
|
+
# # [14] pry(main)> profile['lastModified']
|
183
|
+
# # => "2015-08-17T10:55:46.772-07:00"
|
184
|
+
# end
|
185
|
+
|
186
|
+
# @return ids [Array<Integer>] profile ids from local repo
|
187
|
+
def profile_ids
|
188
|
+
if @profiles.is_a? Daybreak::DB
|
189
|
+
@profiles.keys.map {|k| k.to_i}
|
190
|
+
elsif @profiles.is_a? Mongo::Collection
|
191
|
+
@profiles.find.projection({profileId:1}).map {|p| p['profileId'] }
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
# return profile data from local repo
|
196
|
+
# @param id [Integer] A profileId number
|
197
|
+
# @return profile [Hash|nil]
|
198
|
+
def profile(id)
|
199
|
+
if @profiles.is_a? Daybreak::DB
|
200
|
+
@profiles[id.to_s]
|
201
|
+
elsif @profiles.is_a? Mongo::Collection
|
202
|
+
@profiles.find({profileId: id}).first
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
# return presentation data from local repo
|
207
|
+
# @param id [Integer] A profileId number
|
208
|
+
# @return presentations [Array<Hash>|nil]
|
209
|
+
def presentation(id)
|
210
|
+
if @profiles.is_a? Daybreak::DB
|
211
|
+
begin
|
212
|
+
@profiles[id.to_s]['presentations']
|
213
|
+
rescue
|
214
|
+
nil
|
215
|
+
end
|
216
|
+
elsif @profiles.is_a? Mongo::Collection
|
217
|
+
@presentations.find({profileId: id}).first
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
# return publication data from local repo
|
222
|
+
# @param id [Integer] A profileId number
|
223
|
+
def publication(id)
|
224
|
+
if @profiles.is_a? Daybreak::DB
|
225
|
+
begin
|
226
|
+
@profiles[id.to_s]['publications']
|
227
|
+
rescue
|
228
|
+
nil
|
229
|
+
end
|
230
|
+
elsif @profiles.is_a? Mongo::Collection
|
231
|
+
@publications.find({profileId: id}).first
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
# A profile's processing data.
|
236
|
+
# @param id [Integer] A profileId number
|
237
|
+
def processed(id)
|
238
|
+
if @profiles.is_a? Daybreak::DB
|
239
|
+
begin
|
240
|
+
@profiles[id.to_s]['processed']
|
241
|
+
rescue
|
242
|
+
nil
|
243
|
+
end
|
244
|
+
elsif @processed.is_a? Mongo::Collection
|
245
|
+
@processed.find({profileId: id}).first
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
# Update a profile record with processing data.
|
250
|
+
# @param id [Integer] A profileId number
|
251
|
+
# @param data [Hash] Optional processing information
|
252
|
+
def process_update(id, data=nil)
|
253
|
+
if @profiles.is_a? Daybreak::DB
|
254
|
+
process_doc = {
|
255
|
+
lastModified: Time.now.to_i,
|
256
|
+
data: data
|
257
|
+
}
|
258
|
+
@profiles[id.to_s]['processed'] = process_doc
|
259
|
+
elsif @processed.is_a? Mongo::Collection
|
260
|
+
process_doc = {
|
261
|
+
profileId: id,
|
262
|
+
lastModified: Time.now.to_i,
|
263
|
+
data: data
|
264
|
+
}
|
265
|
+
@processed.insert_one(process_doc)
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
private
|
270
|
+
|
271
|
+
def repo_clean
|
272
|
+
if @profiles.is_a? Daybreak::DB
|
273
|
+
@profiles.clear
|
274
|
+
elsif @profiles.is_a? Mongo::Collection
|
275
|
+
@profiles.drop
|
276
|
+
@profiles.create
|
277
|
+
@presentations.drop
|
278
|
+
@presentations.create
|
279
|
+
@publications.drop
|
280
|
+
@publications.create
|
281
|
+
@processed.drop
|
282
|
+
@processed.create
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
def repo_commit(total)
|
287
|
+
if @profiles.is_a? Daybreak::DB
|
288
|
+
@profiles.flush
|
289
|
+
@profiles.compact
|
290
|
+
@profiles.load
|
291
|
+
puts "Stored #{@profiles.size} of #{total} profiles."
|
292
|
+
puts "Stored profiles to #{@profiles.class} at: #{@profiles.file}."
|
293
|
+
elsif @profiles.is_a? Mongo::Collection
|
294
|
+
indexes = {"profileId" => 1}
|
295
|
+
@profiles.indexes.create_one( indexes, :unique => true )
|
296
|
+
@presentations.indexes.create_one( indexes, :unique => true )
|
297
|
+
@publications.indexes.create_one( indexes, :unique => true )
|
298
|
+
@processed.indexes.create_one( indexes, :unique => true )
|
299
|
+
puts "Stored #{@profiles.find.count} of #{total} profiles."
|
300
|
+
puts "Stored profiles to #{@profiles.class} at: #{@profiles.namespace}."
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
# Migrate CAP API profile data from a Daybreak::DB into mongodb
|
305
|
+
def profiles_daybreak_to_mongo
|
306
|
+
mongo = Cap.configuration.cap_repo_mongo
|
307
|
+
mongo[:profiles].drop
|
308
|
+
db = Cap.configuration.cap_repo_daybreak
|
309
|
+
db.keys do |id|
|
310
|
+
profile = profiles[id]
|
311
|
+
mongo[:profiles].insert_one(profile)
|
312
|
+
end
|
313
|
+
mongo[:profiles].indexes.create_one({profileId:1}, :unique => true )
|
314
|
+
daybreak_matches_mongo?
|
315
|
+
end
|
316
|
+
|
317
|
+
# Validate a daybreak to mongo data transfer
|
318
|
+
def daybreak_matches_mongo?
|
319
|
+
mongo = Cap.configuration.cap_repo_mongo
|
320
|
+
profiles = Cap.configuration.cap_repo_daybreak
|
321
|
+
matches = profiles.keys.map do |id|
|
322
|
+
profile = profiles[id]
|
323
|
+
mongo_profile = mongo[:profiles].find(:profileId => id.to_i).first
|
324
|
+
mongo_profile.delete("_id")
|
325
|
+
mongo_profile == profile
|
326
|
+
end
|
327
|
+
matches.all? # should be true
|
328
|
+
end
|
329
|
+
|
330
|
+
def json_payloads
|
331
|
+
{ accept: JSON_CONTENT, content_type: JSON_CONTENT }
|
332
|
+
end
|
333
|
+
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|