cap-vivo-mapper 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.env_example +37 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +13 -0
- data/README.md +110 -0
- data/Rakefile +6 -0
- data/bin/cap2vivo +67 -0
- data/cap-vivo-mapper.gemspec +51 -0
- data/lib/cap.rb +32 -0
- data/lib/cap/client.rb +31 -0
- data/lib/cap/client/cap_client.rb +336 -0
- data/lib/cap/client/configuration.rb +55 -0
- data/lib/cap/configuration.rb +93 -0
- data/lib/cap/vivo.rb +34 -0
- data/lib/cap/vivo/configuration.rb +53 -0
- data/lib/cap/vivo/mapper.rb +123 -0
- data/lib/cap/vivo/version.rb +5 -0
- data/lib/cap/vivo/vivo-isf-public-1.6.owl +10220 -0
- metadata +302 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e938fd82abdc61cc4cf0a92494f51fc7e74d485d
|
4
|
+
data.tar.gz: 1887b0969c45d7e05b394f1da3b518f6f9bc9c76
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1491afbafa6105146cf1c556038062e9f6a7ec09e82db208ee25ad1d0be320748b1cc7ce8682b271558fe2d274a4b4ca01d145fa0ba47297d47345e74965d8e0
|
7
|
+
data.tar.gz: fddf639dfebae49a33d939c2d8e9deee62605985a5b2e6a25fc1066ffc94c6a6cb61bad1765cf5225174032ffe9ef70bcf840999feb217bb23c5e503acee5fb3
|
data/.env_example
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# https://github.com/bkeepers/dotenv is used for
|
2
|
+
# default configuration options. The values in
|
3
|
+
# this file do not replace existing values in
|
4
|
+
# the shell ENV.
|
5
|
+
|
6
|
+
# Uncomment and set values as required. See used settings in
|
7
|
+
# lib/*/configuration.rb
|
8
|
+
|
9
|
+
export DEBUG=false
|
10
|
+
|
11
|
+
export CAP_VIVO_LOG_FILE='log/cap_vivo_mapper.log'
|
12
|
+
|
13
|
+
# false = convert all CAP profiles into VIVO linked data, replacing any
|
14
|
+
# existing VIVO records
|
15
|
+
# true = do not replace existing VIVO records, only convert CAP profiles
|
16
|
+
# that are not already VIVO records
|
17
|
+
export CAP_VIVO_REPLACE=false
|
18
|
+
|
19
|
+
# Configure the client service
|
20
|
+
export CAP_API_URL='https://cap.example.com'
|
21
|
+
export CAP_API_ORGS='/api/cap/v1/orgs'
|
22
|
+
export CAP_API_PROFILES='/api/profiles/v1'
|
23
|
+
export CAP_API_SCHEMA='/api/cap/v1/schemas'
|
24
|
+
export CAP_API_SEARCH='/api/cap/v1/search'
|
25
|
+
export CAP_API_SEARCH_AC='/api/cap/v1/search/autocomplete'
|
26
|
+
export CAP_API_SEARCH_KW='/api/cap/v1/search/keyword'
|
27
|
+
export CAP_API_LOG_FILE='log/cap_vivo_mapper.log'
|
28
|
+
|
29
|
+
# Parameters for client authentication
|
30
|
+
export CAP_TOKEN_URI='https://authz.example.com/oauth/token'
|
31
|
+
export CAP_TOKEN_USER=user
|
32
|
+
export CAP_TOKEN_PASS=pass
|
33
|
+
export CAP_TOKEN_ACCESS=xyz.etc
|
34
|
+
|
35
|
+
# Parameters for CAP and VIVO data persistence
|
36
|
+
export CAP_REPO_4STORE='http://localhost:9000'
|
37
|
+
export CAP_REPO_MONGO='mongodb://127.0.0.1:27017/cap'
|
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright 2015 The Board of Trustees of the Leland Stanford Junior University.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
# Cap::Vivo::Mapper
|
2
|
+
|
3
|
+
This utility maps Stanford CAP profiles to VIVO.
|
4
|
+
|
5
|
+
## Initial Objectives
|
6
|
+
|
7
|
+
* Identification of how CAP overlaps with the basics of the VIVO-ISF model
|
8
|
+
* specifically as relates to People and their relationships
|
9
|
+
* e.g. the LODE and eagle-i views, plus docs and examples on the wiki
|
10
|
+
* A test case transformation
|
11
|
+
* A document mapping CAP person data to VIVO-ISF
|
12
|
+
* A json transform utility to implement the mapping
|
13
|
+
* Retrieving CAP profile data from the CAP API
|
14
|
+
* Also investigate CAP publication data
|
15
|
+
* consider mapping to both VIVO-ISF and simple BibFrame
|
16
|
+
|
17
|
+
### Stanford CAP Resources
|
18
|
+
|
19
|
+
- https://cap.stanford.edu/cap-api/console
|
20
|
+
|
21
|
+
### VIVO Resources
|
22
|
+
|
23
|
+
- https://wiki.duraspace.org/display/VIVO/VIVO
|
24
|
+
- https://wiki.duraspace.org/display/VIVO/Major+concepts+in+VIVO+to+get+you+started
|
25
|
+
- https://wiki.duraspace.org/display/VIVO/VIVO-ISF+Ontology
|
26
|
+
- http://www.vivoweb.org/download
|
27
|
+
|
28
|
+
## Installation
|
29
|
+
|
30
|
+
Add this line to your application's Gemfile:
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
gem 'cap-vivo-mapper'
|
34
|
+
```
|
35
|
+
|
36
|
+
And then execute:
|
37
|
+
|
38
|
+
$ bundle
|
39
|
+
|
40
|
+
Or install it yourself as:
|
41
|
+
|
42
|
+
$ gem install cap-vivo-mapper
|
43
|
+
|
44
|
+
## Usage
|
45
|
+
|
46
|
+
### Setup
|
47
|
+
|
48
|
+
#### MongoDB
|
49
|
+
|
50
|
+
```sh
|
51
|
+
# The following worked on an Ubuntu desktop system
|
52
|
+
sudo apt-get install mongodb
|
53
|
+
```
|
54
|
+
|
55
|
+
#### 4store
|
56
|
+
|
57
|
+
```sh
|
58
|
+
# The following worked on an Ubuntu desktop system
|
59
|
+
sudo apt-get install 4store
|
60
|
+
sudo 4store status
|
61
|
+
sudo service 4store stop
|
62
|
+
sudo service 4store status
|
63
|
+
# Only setup the backend once (it erases existing data)
|
64
|
+
sudo 4s-backend-setup cap_vivo
|
65
|
+
sudo 4s-backend cap_vivo
|
66
|
+
sudo 4s-httpd -h # describes the options used below
|
67
|
+
sudo 4s-httpd -p 9000 -U -s -1 cap_vivo
|
68
|
+
```
|
69
|
+
|
70
|
+
4store should be running a SPARQL server on the `cap_vivo` knowledge base; take a look at http://localhost:9000/status/.
|
71
|
+
|
72
|
+
### Configure and Run Conversion
|
73
|
+
|
74
|
+
Use the example configuration in
|
75
|
+
https://github.com/sul-dlss/cap-vivo-mapper/blob/master/.env_example
|
76
|
+
|
77
|
+
```sh
|
78
|
+
mkdir -p ~/tmp/cap_vivo/log
|
79
|
+
cd ~/tmp/cap_vivo
|
80
|
+
project='https://raw.githubusercontent.com/sul-dlss/cap-vivo-mapper'
|
81
|
+
wget ${project}/master/.env_example
|
82
|
+
cp .env_example .env
|
83
|
+
vim .env # hopefully this file is self explanatory
|
84
|
+
# If it's not already installed, install the the gem.
|
85
|
+
gem install cap-vivo-mapper
|
86
|
+
# Run it overnight, unless you have a high bandwidth connection to the
|
87
|
+
# CAP API and a fast system. So, watch it for any immediate failures;
|
88
|
+
# if it's running, then leave it overnight. The expected runtime is on
|
89
|
+
# the order of hours.
|
90
|
+
cap2vivo
|
91
|
+
```
|
92
|
+
|
93
|
+
|
94
|
+
## Development
|
95
|
+
|
96
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
97
|
+
|
98
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
99
|
+
|
100
|
+
## Contributing
|
101
|
+
|
102
|
+
Bug reports and pull requests are welcome on GitHub at
|
103
|
+
https://github.com/sul-dlss/cap-vivo-mapper.
|
104
|
+
|
105
|
+
## License
|
106
|
+
|
107
|
+
Copyright 2015 The Board of Trustees of the Leland Stanford Junior University.
|
108
|
+
|
109
|
+
The gem is available as open source under the terms of the [Apache 2 License](http://www.apache.org/licenses/LICENSE-2.0).
|
110
|
+
|
data/Rakefile
ADDED
data/bin/cap2vivo
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'bundler/setup'
|
4
|
+
|
5
|
+
require 'pry'
|
6
|
+
require 'cap'
|
7
|
+
require 'cap/client'
|
8
|
+
require 'cap/vivo'
|
9
|
+
|
10
|
+
# init configs
|
11
|
+
config = Cap.configuration
|
12
|
+
|
13
|
+
# client will load profile data from a local repo; if it is empty, it
|
14
|
+
# can be populated from the CAP API using `client.get_profiles`. The
|
15
|
+
# profile data is available in `client.profiles`.
|
16
|
+
client = Cap::Client::Client.new
|
17
|
+
client.get_profiles
|
18
|
+
|
19
|
+
# Convert all the CAP API profiles into VIVO linked data and
|
20
|
+
# store it in the CAP_REPO_4STORE triple store.
|
21
|
+
ids = client.profile_ids
|
22
|
+
if config.rdf_replace
|
23
|
+
puts 'Replacing all VIVO linked data'
|
24
|
+
config.rdf_repo.clear
|
25
|
+
else
|
26
|
+
puts 'Updating VIVO linked data'
|
27
|
+
q = 'SELECT ?person WHERE { ?person a <http://xmlns.com/foaf/0.1/Person> }'
|
28
|
+
r = config.rdf_repo.client.query(q)
|
29
|
+
vivo_uris = r.map {|s| s[:person]}
|
30
|
+
vivo_ids = vivo_uris.map {|uri| uri.to_s.split('/').last.to_i }
|
31
|
+
ids.delete_if {|id| vivo_ids.include? id }
|
32
|
+
end
|
33
|
+
puts "Processing #{ids.length} CAP profiles"
|
34
|
+
count = 0
|
35
|
+
start = Time.now.to_i
|
36
|
+
ids.each do |id|
|
37
|
+
count += 1
|
38
|
+
profile = client.profile(id)
|
39
|
+
begin
|
40
|
+
mapper = Cap::Vivo::Mapper.new profile
|
41
|
+
mapper.create_vivo
|
42
|
+
mapper.save
|
43
|
+
if count % 25 == 0
|
44
|
+
delay = Time.now.to_i - start
|
45
|
+
printf "\nmapped %4d of %d (%4d sec)\n", count, ids.length, delay.to_s
|
46
|
+
else
|
47
|
+
if count == 1
|
48
|
+
printf "Mapping %4d profiles:\n", ids.length
|
49
|
+
end
|
50
|
+
printf '.'
|
51
|
+
end
|
52
|
+
rescue => e
|
53
|
+
delay = Time.now.to_i - start
|
54
|
+
puts
|
55
|
+
printf "FAILED profileId %d (%d of %d)\n", id, count, ids.length
|
56
|
+
puts e.message
|
57
|
+
puts
|
58
|
+
end
|
59
|
+
end
|
60
|
+
puts
|
61
|
+
|
62
|
+
at_exit {
|
63
|
+
config.cap_repo.close if config.cap_repo.is_a? Daybreak::DB
|
64
|
+
client = nil
|
65
|
+
config = nil
|
66
|
+
sleep 2 # give it a chance to close
|
67
|
+
}
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'cap/vivo/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "cap-vivo-mapper"
|
8
|
+
spec.version = Cap::Vivo::VERSION
|
9
|
+
spec.authors = ["Darren L. Weber, Ph.D."]
|
10
|
+
spec.email = ["darren.weber@stanford.edu"]
|
11
|
+
|
12
|
+
spec.summary = %q{This utility maps Stanford CAP profiles to VIVO.}
|
13
|
+
spec.description = %q{This utility maps Stanford CAP profiles to VIVO.}
|
14
|
+
spec.homepage = 'https://github.com/sul-dlss/cap-vivo-mapper'
|
15
|
+
spec.licenses = ['Apache-2.0']
|
16
|
+
|
17
|
+
spec.add_dependency 'dotenv'
|
18
|
+
|
19
|
+
spec.add_dependency 'daybreak' # memory mapped file db
|
20
|
+
spec.add_dependency 'mongo'
|
21
|
+
|
22
|
+
spec.add_dependency 'linkeddata'
|
23
|
+
spec.add_dependency 'rdf-4store'
|
24
|
+
|
25
|
+
spec.add_dependency 'faraday'
|
26
|
+
spec.add_dependency 'faraday_middleware'
|
27
|
+
|
28
|
+
# Use pry for console and debug config
|
29
|
+
spec.add_development_dependency 'pry'
|
30
|
+
spec.add_development_dependency 'pry-doc'
|
31
|
+
|
32
|
+
spec.add_development_dependency 'bundler', '~> 1.10'
|
33
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
34
|
+
spec.add_development_dependency 'rspec'
|
35
|
+
spec.add_development_dependency 'vcr'
|
36
|
+
spec.add_development_dependency 'webmock'
|
37
|
+
|
38
|
+
spec.add_development_dependency 'coveralls'
|
39
|
+
spec.add_development_dependency 'guard'
|
40
|
+
spec.add_development_dependency 'guard-ctags-bundler'
|
41
|
+
|
42
|
+
git_files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
43
|
+
bin_files = %w(bin/console bin/ctags.rb bin/setup bin/test.rb)
|
44
|
+
dot_files = %w(.gitignore .travis.yml log/.gitignore)
|
45
|
+
|
46
|
+
spec.files = git_files - (bin_files + dot_files)
|
47
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
48
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
49
|
+
spec.require_paths = ["lib"]
|
50
|
+
|
51
|
+
end
|
data/lib/cap.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'dotenv'
|
2
|
+
Dotenv.load
|
3
|
+
|
4
|
+
require 'linkeddata'
|
5
|
+
require 'rdf/4store'
|
6
|
+
require 'daybreak'
|
7
|
+
require 'mongo'
|
8
|
+
require_relative 'cap/configuration'
|
9
|
+
|
10
|
+
# This is a utility working with Stanford CAP and VIVO data mappings.
|
11
|
+
# https://github.com/sul-dlss/cap-vivo-mapper
|
12
|
+
module Cap
|
13
|
+
|
14
|
+
# Configuration at the module level, see
|
15
|
+
# http://brandonhilkert.com/blog/ruby-gem-configuration-patterns/
|
16
|
+
class << self
|
17
|
+
attr_writer :configuration
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.configuration
|
21
|
+
@configuration ||= Configuration.new
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.reset
|
25
|
+
@configuration = Configuration.new
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.configure
|
29
|
+
yield(configuration)
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
data/lib/cap/client.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'dotenv'
|
2
|
+
Dotenv.load
|
3
|
+
|
4
|
+
require_relative 'client/configuration'
|
5
|
+
require_relative 'client/cap_client'
|
6
|
+
|
7
|
+
# This is a utility working with Stanford CAP.
|
8
|
+
# https://github.com/sul-dlss/cap-vivo-mapper
|
9
|
+
module Cap
|
10
|
+
module Client
|
11
|
+
|
12
|
+
# Configuration at the module level, see
|
13
|
+
# http://brandonhilkert.com/blog/ruby-gem-configuration-patterns/
|
14
|
+
class << self
|
15
|
+
attr_writer :configuration
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.configuration
|
19
|
+
@configuration ||= Configuration.new
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.reset
|
23
|
+
@configuration = Configuration.new
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.configure
|
27
|
+
yield(configuration)
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,336 @@
|
|
1
|
+
module Cap
|
2
|
+
module Client
|
3
|
+
|
4
|
+
require 'faraday'
|
5
|
+
require 'faraday_middleware'
|
6
|
+
|
7
|
+
# CAP Public Website https://profiles.stanford.edu
|
8
|
+
# Profiles API https://api.stanford.edu/profiles/v1
|
9
|
+
# Orgs API https://api.stanford.edu/cap/v1/orgs
|
10
|
+
# Search API https://api.stanford.edu/cap/v1/search
|
11
|
+
# Developer's API https://cap.stanford.edu/cap-api/console
|
12
|
+
|
13
|
+
class Client
|
14
|
+
|
15
|
+
JSON_CONTENT = 'application/json'
|
16
|
+
BSON_MAX = 16777216
|
17
|
+
|
18
|
+
attr_reader :config
|
19
|
+
attr_reader :cap_api
|
20
|
+
attr_reader :profiles
|
21
|
+
|
22
|
+
# Initialize a new client
|
23
|
+
def initialize
|
24
|
+
@config = Cap::Client.configuration
|
25
|
+
if Cap.configuration.cap_repo.is_a? Daybreak::DB
|
26
|
+
@profiles = Cap.configuration.cap_repo
|
27
|
+
elsif Cap.configuration.cap_repo.is_a? Mongo::Client
|
28
|
+
@profiles = Cap.configuration.cap_repo[:profiles]
|
29
|
+
@presentations = Cap.configuration.cap_repo[:presentations]
|
30
|
+
@publications = Cap.configuration.cap_repo[:publications]
|
31
|
+
@processed = Cap.configuration.cap_repo[:processed]
|
32
|
+
end
|
33
|
+
# CAP API
|
34
|
+
@cap_uri = 'https://api.stanford.edu'
|
35
|
+
@cap_profiles = '/profiles/v1'
|
36
|
+
@cap_orgs = '/cap/v1/orgs'
|
37
|
+
@cap_search = '/cap/v1/search'
|
38
|
+
@cap_api = Faraday.new(url: @cap_uri) do |f|
|
39
|
+
# f.use FaradayMiddleware::FollowRedirects, limit: 3
|
40
|
+
# f.use Faraday::Response::RaiseError # raise exceptions on 40x, 50x
|
41
|
+
# f.request :logger, @config.logger
|
42
|
+
f.request :json
|
43
|
+
f.response :json, :content_type => JSON_CONTENT
|
44
|
+
f.adapter Faraday.default_adapter
|
45
|
+
end
|
46
|
+
@cap_api.options.timeout = 90
|
47
|
+
@cap_api.options.open_timeout = 10
|
48
|
+
@cap_api.headers.merge!(json_payloads)
|
49
|
+
# Authentication
|
50
|
+
auth_uri = 'https://authz.stanford.edu/oauth/token'
|
51
|
+
@auth = Faraday.new(url: auth_uri) do |f|
|
52
|
+
f.request :url_encoded
|
53
|
+
f.response :json, :content_type => JSON_CONTENT
|
54
|
+
f.adapter Faraday.default_adapter
|
55
|
+
end
|
56
|
+
@auth.options.timeout = 30
|
57
|
+
@auth.options.open_timeout = 10
|
58
|
+
@auth.headers.merge!(json_payloads)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Reset authentication
|
62
|
+
def authenticate!
|
63
|
+
@access_expiry = nil
|
64
|
+
authenticate
|
65
|
+
end
|
66
|
+
|
67
|
+
def authenticate
|
68
|
+
if @access_expiry.to_i < Time.now.to_i
|
69
|
+
@access_code = nil
|
70
|
+
@auth.headers.delete :Authorization
|
71
|
+
@cap_api.headers.delete :Authorization
|
72
|
+
end
|
73
|
+
@access_code || begin
|
74
|
+
return false if @config.token_user.empty? && @config.token_pass.empty?
|
75
|
+
client = "#{@config.token_user}:#{@config.token_pass}"
|
76
|
+
auth_code = 'Basic ' + Base64.strict_encode64(client)
|
77
|
+
@auth.headers.merge!({ Authorization: auth_code })
|
78
|
+
response = @auth.get "?grant_type=client_credentials"
|
79
|
+
return false unless response.status == 200
|
80
|
+
access = response.body
|
81
|
+
return false if access['access_token'].nil?
|
82
|
+
@access_code = "Bearer #{access['access_token']}"
|
83
|
+
@access_expiry = Time.now.to_i + access['expires_in'].to_i
|
84
|
+
@cap_api.headers[:Authorization] = @access_code
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Get profiles from CAP API and store into local repo
|
89
|
+
def get_profiles
|
90
|
+
begin
|
91
|
+
if authenticate
|
92
|
+
page = 1
|
93
|
+
pages = 0
|
94
|
+
total = 0
|
95
|
+
begin
|
96
|
+
repo_clean
|
97
|
+
while true
|
98
|
+
params = "?p=#{page}&ps=100"
|
99
|
+
response = @cap_api.get "#{@cap_profiles}#{params}"
|
100
|
+
if response.status == 200
|
101
|
+
data = response.body
|
102
|
+
if data['firstPage']
|
103
|
+
pages = data['totalPages']
|
104
|
+
total = data['totalCount']
|
105
|
+
puts "Retrieved #{page} of #{pages} pages (#{total} profiles)."
|
106
|
+
else
|
107
|
+
puts "Retrieved #{page} of #{pages} pages."
|
108
|
+
end
|
109
|
+
profiles = data['values']
|
110
|
+
if @profiles.is_a? Daybreak::DB
|
111
|
+
profiles.each do |profile|
|
112
|
+
id = profile["profileId"]
|
113
|
+
@profiles[id] = profile
|
114
|
+
end
|
115
|
+
@profiles.flush
|
116
|
+
elsif @profiles.is_a? Mongo::Collection
|
117
|
+
# split out the publication data to accommodate the
|
118
|
+
# 16Mb limit on mongodb docs.
|
119
|
+
pubs_fields = ['doiId', 'doiUrl', 'webOfScienceId', 'webOfScienceUrl']
|
120
|
+
profiles.each do |profile|
|
121
|
+
id = profile['profileId']
|
122
|
+
presentations = profile.delete('presentations') || []
|
123
|
+
presentations.each {|p| p.delete('detail')}
|
124
|
+
pres = {'profileId' => id, 'presentations' => presentations}
|
125
|
+
begin
|
126
|
+
@presentations.insert_one(pres)
|
127
|
+
rescue
|
128
|
+
msg = "Profile #{id} presentations failed to save."
|
129
|
+
@config.logger.error msg
|
130
|
+
end
|
131
|
+
publications = profile.delete('publications') || []
|
132
|
+
publications.each do |p|
|
133
|
+
p.keys {|k| p.delete(k) unless pubs_fields.include? k }
|
134
|
+
end
|
135
|
+
pub = {'profileId' => id, 'publications' => publications}
|
136
|
+
begin
|
137
|
+
@publications.insert_one(pub)
|
138
|
+
rescue
|
139
|
+
msg = "Profile #{id} publications failed to save."
|
140
|
+
@config.logger.error msg
|
141
|
+
end
|
142
|
+
begin
|
143
|
+
@profiles.insert_one(profile)
|
144
|
+
rescue
|
145
|
+
msg = "Profile #{id} failed to save."
|
146
|
+
@config.logger.error msg
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
page += 1
|
151
|
+
break if data['lastPage']
|
152
|
+
else
|
153
|
+
msg = "Failed to GET profiles page #{page}: #{response.status}"
|
154
|
+
@config.logger.error msg
|
155
|
+
puts msg
|
156
|
+
break
|
157
|
+
end
|
158
|
+
end
|
159
|
+
rescue => e
|
160
|
+
msg = e.message
|
161
|
+
binding.pry if @config.debug
|
162
|
+
@config.logger.error msg
|
163
|
+
ensure
|
164
|
+
repo_commit(total)
|
165
|
+
end
|
166
|
+
else
|
167
|
+
msg = "Failed to authenticate"
|
168
|
+
@config.logger.error msg
|
169
|
+
end
|
170
|
+
rescue => e
|
171
|
+
msg = e.message
|
172
|
+
binding.pry if @config.debug
|
173
|
+
@config.logger.error(msg)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# def update_profiles
|
178
|
+
# # profile['profileId']
|
179
|
+
# # => 42005
|
180
|
+
# # [13] pry(main)> profile['profileId'].class
|
181
|
+
# # => Fixnum
|
182
|
+
# # [14] pry(main)> profile['lastModified']
|
183
|
+
# # => "2015-08-17T10:55:46.772-07:00"
|
184
|
+
# end
|
185
|
+
|
186
|
+
# @return ids [Array<Integer>] profile ids from local repo
|
187
|
+
def profile_ids
|
188
|
+
if @profiles.is_a? Daybreak::DB
|
189
|
+
@profiles.keys.map {|k| k.to_i}
|
190
|
+
elsif @profiles.is_a? Mongo::Collection
|
191
|
+
@profiles.find.projection({profileId:1}).map {|p| p['profileId'] }
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
# return profile data from local repo
|
196
|
+
# @param id [Integer] A profileId number
|
197
|
+
# @return profile [Hash|nil]
|
198
|
+
def profile(id)
|
199
|
+
if @profiles.is_a? Daybreak::DB
|
200
|
+
@profiles[id.to_s]
|
201
|
+
elsif @profiles.is_a? Mongo::Collection
|
202
|
+
@profiles.find({profileId: id}).first
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
# return presentation data from local repo
|
207
|
+
# @param id [Integer] A profileId number
|
208
|
+
# @return presentations [Array<Hash>|nil]
|
209
|
+
def presentation(id)
|
210
|
+
if @profiles.is_a? Daybreak::DB
|
211
|
+
begin
|
212
|
+
@profiles[id.to_s]['presentations']
|
213
|
+
rescue
|
214
|
+
nil
|
215
|
+
end
|
216
|
+
elsif @profiles.is_a? Mongo::Collection
|
217
|
+
@presentations.find({profileId: id}).first
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
# return publication data from local repo
|
222
|
+
# @param id [Integer] A profileId number
|
223
|
+
def publication(id)
|
224
|
+
if @profiles.is_a? Daybreak::DB
|
225
|
+
begin
|
226
|
+
@profiles[id.to_s]['publications']
|
227
|
+
rescue
|
228
|
+
nil
|
229
|
+
end
|
230
|
+
elsif @profiles.is_a? Mongo::Collection
|
231
|
+
@publications.find({profileId: id}).first
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
# A profile's processing data.
|
236
|
+
# @param id [Integer] A profileId number
|
237
|
+
def processed(id)
|
238
|
+
if @profiles.is_a? Daybreak::DB
|
239
|
+
begin
|
240
|
+
@profiles[id.to_s]['processed']
|
241
|
+
rescue
|
242
|
+
nil
|
243
|
+
end
|
244
|
+
elsif @processed.is_a? Mongo::Collection
|
245
|
+
@processed.find({profileId: id}).first
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
# Update a profile record with processing data.
|
250
|
+
# @param id [Integer] A profileId number
|
251
|
+
# @param data [Hash] Optional processing information
|
252
|
+
def process_update(id, data=nil)
|
253
|
+
if @profiles.is_a? Daybreak::DB
|
254
|
+
process_doc = {
|
255
|
+
lastModified: Time.now.to_i,
|
256
|
+
data: data
|
257
|
+
}
|
258
|
+
@profiles[id.to_s]['processed'] = process_doc
|
259
|
+
elsif @processed.is_a? Mongo::Collection
|
260
|
+
process_doc = {
|
261
|
+
profileId: id,
|
262
|
+
lastModified: Time.now.to_i,
|
263
|
+
data: data
|
264
|
+
}
|
265
|
+
@processed.insert_one(process_doc)
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
private
|
270
|
+
|
271
|
+
def repo_clean
|
272
|
+
if @profiles.is_a? Daybreak::DB
|
273
|
+
@profiles.clear
|
274
|
+
elsif @profiles.is_a? Mongo::Collection
|
275
|
+
@profiles.drop
|
276
|
+
@profiles.create
|
277
|
+
@presentations.drop
|
278
|
+
@presentations.create
|
279
|
+
@publications.drop
|
280
|
+
@publications.create
|
281
|
+
@processed.drop
|
282
|
+
@processed.create
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
def repo_commit(total)
|
287
|
+
if @profiles.is_a? Daybreak::DB
|
288
|
+
@profiles.flush
|
289
|
+
@profiles.compact
|
290
|
+
@profiles.load
|
291
|
+
puts "Stored #{@profiles.size} of #{total} profiles."
|
292
|
+
puts "Stored profiles to #{@profiles.class} at: #{@profiles.file}."
|
293
|
+
elsif @profiles.is_a? Mongo::Collection
|
294
|
+
indexes = {"profileId" => 1}
|
295
|
+
@profiles.indexes.create_one( indexes, :unique => true )
|
296
|
+
@presentations.indexes.create_one( indexes, :unique => true )
|
297
|
+
@publications.indexes.create_one( indexes, :unique => true )
|
298
|
+
@processed.indexes.create_one( indexes, :unique => true )
|
299
|
+
puts "Stored #{@profiles.find.count} of #{total} profiles."
|
300
|
+
puts "Stored profiles to #{@profiles.class} at: #{@profiles.namespace}."
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
# Migrate CAP API profile data from a Daybreak::DB into mongodb
|
305
|
+
def profiles_daybreak_to_mongo
|
306
|
+
mongo = Cap.configuration.cap_repo_mongo
|
307
|
+
mongo[:profiles].drop
|
308
|
+
db = Cap.configuration.cap_repo_daybreak
|
309
|
+
db.keys do |id|
|
310
|
+
profile = profiles[id]
|
311
|
+
mongo[:profiles].insert_one(profile)
|
312
|
+
end
|
313
|
+
mongo[:profiles].indexes.create_one({profileId:1}, :unique => true )
|
314
|
+
daybreak_matches_mongo?
|
315
|
+
end
|
316
|
+
|
317
|
+
# Validate a daybreak to mongo data transfer
|
318
|
+
def daybreak_matches_mongo?
|
319
|
+
mongo = Cap.configuration.cap_repo_mongo
|
320
|
+
profiles = Cap.configuration.cap_repo_daybreak
|
321
|
+
matches = profiles.keys.map do |id|
|
322
|
+
profile = profiles[id]
|
323
|
+
mongo_profile = mongo[:profiles].find(:profileId => id.to_i).first
|
324
|
+
mongo_profile.delete("_id")
|
325
|
+
mongo_profile == profile
|
326
|
+
end
|
327
|
+
matches.all? # should be true
|
328
|
+
end
|
329
|
+
|
330
|
+
def json_payloads
|
331
|
+
{ accept: JSON_CONTENT, content_type: JSON_CONTENT }
|
332
|
+
end
|
333
|
+
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|