cap-vivo-mapper 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e938fd82abdc61cc4cf0a92494f51fc7e74d485d
4
+ data.tar.gz: 1887b0969c45d7e05b394f1da3b518f6f9bc9c76
5
+ SHA512:
6
+ metadata.gz: 1491afbafa6105146cf1c556038062e9f6a7ec09e82db208ee25ad1d0be320748b1cc7ce8682b271558fe2d274a4b4ca01d145fa0ba47297d47345e74965d8e0
7
+ data.tar.gz: fddf639dfebae49a33d939c2d8e9deee62605985a5b2e6a25fc1066ffc94c6a6cb61bad1765cf5225174032ffe9ef70bcf840999feb217bb23c5e503acee5fb3
@@ -0,0 +1,37 @@
1
+ # https://github.com/bkeepers/dotenv is used for
2
+ # default configuration options. The values in
3
+ # this file do not replace existing values in
4
+ # the shell ENV.
5
+
6
+ # Uncomment and set values as required. See used settings in
7
+ # lib/*/configuration.rb
8
+
9
+ export DEBUG=false
10
+
11
+ export CAP_VIVO_LOG_FILE='log/cap_vivo_mapper.log'
12
+
13
+ # false = convert all CAP profiles into VIVO linked data, replacing any
14
+ # existing VIVO records
15
+ # true = do not replace existing VIVO records, only convert CAP profiles
16
+ # that are not already VIVO records
17
+ export CAP_VIVO_REPLACE=false
18
+
19
+ # Configure the client service
20
+ export CAP_API_URL='https://cap.example.com'
21
+ export CAP_API_ORGS='/api/cap/v1/orgs'
22
+ export CAP_API_PROFILES='/api/profiles/v1'
23
+ export CAP_API_SCHEMA='/api/cap/v1/schemas'
24
+ export CAP_API_SEARCH='/api/cap/v1/search'
25
+ export CAP_API_SEARCH_AC='/api/cap/v1/search/autocomplete'
26
+ export CAP_API_SEARCH_KW='/api/cap/v1/search/keyword'
27
+ export CAP_API_LOG_FILE='log/cap_vivo_mapper.log'
28
+
29
+ # Parameters for client authentication
30
+ export CAP_TOKEN_URI='https://authz.example.com/oauth/token'
31
+ export CAP_TOKEN_USER=user
32
+ export CAP_TOKEN_PASS=pass
33
+ export CAP_TOKEN_ACCESS=xyz.etc
34
+
35
+ # Parameters for CAP and VIVO data persistence
36
+ export CAP_REPO_4STORE='http://localhost:9000'
37
+ export CAP_REPO_MONGO='mongodb://127.0.0.1:27017/cap'
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cap-vivo-mapper.gemspec
4
+ gemspec
@@ -0,0 +1,13 @@
1
+ Copyright 2015 The Board of Trustees of the Leland Stanford Junior University.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -0,0 +1,110 @@
1
+ # Cap::Vivo::Mapper
2
+
3
+ This utility maps Stanford CAP profiles to VIVO.
4
+
5
+ ## Initial Objectives
6
+
7
+ * Identification of how CAP overlaps with the basics of the VIVO-ISF model
8
+ * specifically as relates to People and their relationships
9
+ * e.g. the LODE and eagle-i views, plus docs and examples on the wiki
10
+ * A test case transformation
11
+ * A document mapping CAP person data to VIVO-ISF
12
+ * A json transform utility to implement the mapping
13
+ * Retrieving CAP profile data from the CAP API
14
+ * Also investigate CAP publication data
15
+ * consider mapping to both VIVO-ISF and simple BibFrame
16
+
17
+ ### Stanford CAP Resources
18
+
19
+ - https://cap.stanford.edu/cap-api/console
20
+
21
+ ### VIVO Resources
22
+
23
+ - https://wiki.duraspace.org/display/VIVO/VIVO
24
+ - https://wiki.duraspace.org/display/VIVO/Major+concepts+in+VIVO+to+get+you+started
25
+ - https://wiki.duraspace.org/display/VIVO/VIVO-ISF+Ontology
26
+ - http://www.vivoweb.org/download
27
+
28
+ ## Installation
29
+
30
+ Add this line to your application's Gemfile:
31
+
32
+ ```ruby
33
+ gem 'cap-vivo-mapper'
34
+ ```
35
+
36
+ And then execute:
37
+
38
+ $ bundle
39
+
40
+ Or install it yourself as:
41
+
42
+ $ gem install cap-vivo-mapper
43
+
44
+ ## Usage
45
+
46
+ ### Setup
47
+
48
+ #### MongoDB
49
+
50
+ ```sh
51
+ # The following worked on an Ubuntu desktop system
52
+ sudo apt-get install mongodb
53
+ ```
54
+
55
+ #### 4store
56
+
57
+ ```sh
58
+ # The following worked on an Ubuntu desktop system
59
+ sudo apt-get install 4store
60
+ sudo 4store status
61
+ sudo service 4store stop
62
+ sudo service 4store status
63
+ # Only setup the backend once (it erases existing data)
64
+ sudo 4s-backend-setup cap_vivo
65
+ sudo 4s-backend cap_vivo
66
+ sudo 4s-httpd -h # describes the options used below
67
+ sudo 4s-httpd -p 9000 -U -s -1 cap_vivo
68
+ ```
69
+
70
+ 4store should be running a SPARQL server on the `cap_vivo` knowledge base; take a look at http://localhost:9000/status/.
71
+
72
+ ### Configure and Run Conversion
73
+
74
+ Use the example configuration in
75
+ https://github.com/sul-dlss/cap-vivo-mapper/blob/master/.env_example
76
+
77
+ ```sh
78
+ mkdir -p ~/tmp/cap_vivo/log
79
+ cd ~/tmp/cap_vivo
80
+ project='https://raw.githubusercontent.com/sul-dlss/cap-vivo-mapper'
81
+ wget ${project}/master/.env_example
82
+ cp .env_example .env
83
+ vim .env # hopefully this file is self explanatory
84
+ # If it's not already installed, install the the gem.
85
+ gem install cap-vivo-mapper
86
+ # Run it overnight, unless you have a high bandwidth connection to the
87
+ # CAP API and a fast system. So, watch it for any immediate failures;
88
+ # if it's running, then leave it overnight. The expected runtime is on
89
+ # the order of hours.
90
+ cap2vivo
91
+ ```
92
+
93
+
94
+ ## Development
95
+
96
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
97
+
98
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
99
+
100
+ ## Contributing
101
+
102
+ Bug reports and pull requests are welcome on GitHub at
103
+ https://github.com/sul-dlss/cap-vivo-mapper.
104
+
105
+ ## License
106
+
107
+ Copyright 2015 The Board of Trustees of the Leland Stanford Junior University.
108
+
109
+ The gem is available as open source under the terms of the [Apache 2 License](http://www.apache.org/licenses/LICENSE-2.0).
110
+
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bundler/setup'
4
+
5
+ require 'pry'
6
+ require 'cap'
7
+ require 'cap/client'
8
+ require 'cap/vivo'
9
+
10
+ # init configs
11
+ config = Cap.configuration
12
+
13
+ # client will load profile data from a local repo; if it is empty, it
14
+ # can be populated from the CAP API using `client.get_profiles`. The
15
+ # profile data is available in `client.profiles`.
16
+ client = Cap::Client::Client.new
17
+ client.get_profiles
18
+
19
+ # Convert all the CAP API profiles into VIVO linked data and
20
+ # store it in the CAP_REPO_4STORE triple store.
21
+ ids = client.profile_ids
22
+ if config.rdf_replace
23
+ puts 'Replacing all VIVO linked data'
24
+ config.rdf_repo.clear
25
+ else
26
+ puts 'Updating VIVO linked data'
27
+ q = 'SELECT ?person WHERE { ?person a <http://xmlns.com/foaf/0.1/Person> }'
28
+ r = config.rdf_repo.client.query(q)
29
+ vivo_uris = r.map {|s| s[:person]}
30
+ vivo_ids = vivo_uris.map {|uri| uri.to_s.split('/').last.to_i }
31
+ ids.delete_if {|id| vivo_ids.include? id }
32
+ end
33
+ puts "Processing #{ids.length} CAP profiles"
34
+ count = 0
35
+ start = Time.now.to_i
36
+ ids.each do |id|
37
+ count += 1
38
+ profile = client.profile(id)
39
+ begin
40
+ mapper = Cap::Vivo::Mapper.new profile
41
+ mapper.create_vivo
42
+ mapper.save
43
+ if count % 25 == 0
44
+ delay = Time.now.to_i - start
45
+ printf "\nmapped %4d of %d (%4d sec)\n", count, ids.length, delay.to_s
46
+ else
47
+ if count == 1
48
+ printf "Mapping %4d profiles:\n", ids.length
49
+ end
50
+ printf '.'
51
+ end
52
+ rescue => e
53
+ delay = Time.now.to_i - start
54
+ puts
55
+ printf "FAILED profileId %d (%d of %d)\n", id, count, ids.length
56
+ puts e.message
57
+ puts
58
+ end
59
+ end
60
+ puts
61
+
62
+ at_exit {
63
+ config.cap_repo.close if config.cap_repo.is_a? Daybreak::DB
64
+ client = nil
65
+ config = nil
66
+ sleep 2 # give it a chance to close
67
+ }
@@ -0,0 +1,51 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cap/vivo/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "cap-vivo-mapper"
8
+ spec.version = Cap::Vivo::VERSION
9
+ spec.authors = ["Darren L. Weber, Ph.D."]
10
+ spec.email = ["darren.weber@stanford.edu"]
11
+
12
+ spec.summary = %q{This utility maps Stanford CAP profiles to VIVO.}
13
+ spec.description = %q{This utility maps Stanford CAP profiles to VIVO.}
14
+ spec.homepage = 'https://github.com/sul-dlss/cap-vivo-mapper'
15
+ spec.licenses = ['Apache-2.0']
16
+
17
+ spec.add_dependency 'dotenv'
18
+
19
+ spec.add_dependency 'daybreak' # memory mapped file db
20
+ spec.add_dependency 'mongo'
21
+
22
+ spec.add_dependency 'linkeddata'
23
+ spec.add_dependency 'rdf-4store'
24
+
25
+ spec.add_dependency 'faraday'
26
+ spec.add_dependency 'faraday_middleware'
27
+
28
+ # Use pry for console and debug config
29
+ spec.add_development_dependency 'pry'
30
+ spec.add_development_dependency 'pry-doc'
31
+
32
+ spec.add_development_dependency 'bundler', '~> 1.10'
33
+ spec.add_development_dependency 'rake', '~> 10.0'
34
+ spec.add_development_dependency 'rspec'
35
+ spec.add_development_dependency 'vcr'
36
+ spec.add_development_dependency 'webmock'
37
+
38
+ spec.add_development_dependency 'coveralls'
39
+ spec.add_development_dependency 'guard'
40
+ spec.add_development_dependency 'guard-ctags-bundler'
41
+
42
+ git_files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
43
+ bin_files = %w(bin/console bin/ctags.rb bin/setup bin/test.rb)
44
+ dot_files = %w(.gitignore .travis.yml log/.gitignore)
45
+
46
+ spec.files = git_files - (bin_files + dot_files)
47
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
48
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
49
+ spec.require_paths = ["lib"]
50
+
51
+ end
@@ -0,0 +1,32 @@
1
+ require 'dotenv'
2
+ Dotenv.load
3
+
4
+ require 'linkeddata'
5
+ require 'rdf/4store'
6
+ require 'daybreak'
7
+ require 'mongo'
8
+ require_relative 'cap/configuration'
9
+
10
+ # This is a utility working with Stanford CAP and VIVO data mappings.
11
+ # https://github.com/sul-dlss/cap-vivo-mapper
12
+ module Cap
13
+
14
+ # Configuration at the module level, see
15
+ # http://brandonhilkert.com/blog/ruby-gem-configuration-patterns/
16
+ class << self
17
+ attr_writer :configuration
18
+ end
19
+
20
+ def self.configuration
21
+ @configuration ||= Configuration.new
22
+ end
23
+
24
+ def self.reset
25
+ @configuration = Configuration.new
26
+ end
27
+
28
+ def self.configure
29
+ yield(configuration)
30
+ end
31
+
32
+ end
@@ -0,0 +1,31 @@
1
+ require 'dotenv'
2
+ Dotenv.load
3
+
4
+ require_relative 'client/configuration'
5
+ require_relative 'client/cap_client'
6
+
7
+ # This is a utility working with Stanford CAP.
8
+ # https://github.com/sul-dlss/cap-vivo-mapper
9
+ module Cap
10
+ module Client
11
+
12
+ # Configuration at the module level, see
13
+ # http://brandonhilkert.com/blog/ruby-gem-configuration-patterns/
14
+ class << self
15
+ attr_writer :configuration
16
+ end
17
+
18
+ def self.configuration
19
+ @configuration ||= Configuration.new
20
+ end
21
+
22
+ def self.reset
23
+ @configuration = Configuration.new
24
+ end
25
+
26
+ def self.configure
27
+ yield(configuration)
28
+ end
29
+
30
+ end
31
+ end
@@ -0,0 +1,336 @@
1
+ module Cap
2
+ module Client
3
+
4
+ require 'faraday'
5
+ require 'faraday_middleware'
6
+
7
+ # CAP Public Website https://profiles.stanford.edu
8
+ # Profiles API https://api.stanford.edu/profiles/v1
9
+ # Orgs API https://api.stanford.edu/cap/v1/orgs
10
+ # Search API https://api.stanford.edu/cap/v1/search
11
+ # Developer's API https://cap.stanford.edu/cap-api/console
12
+
13
+ class Client
14
+
15
+ JSON_CONTENT = 'application/json'
16
+ BSON_MAX = 16777216
17
+
18
+ attr_reader :config
19
+ attr_reader :cap_api
20
+ attr_reader :profiles
21
+
22
+ # Initialize a new client
23
+ def initialize
24
+ @config = Cap::Client.configuration
25
+ if Cap.configuration.cap_repo.is_a? Daybreak::DB
26
+ @profiles = Cap.configuration.cap_repo
27
+ elsif Cap.configuration.cap_repo.is_a? Mongo::Client
28
+ @profiles = Cap.configuration.cap_repo[:profiles]
29
+ @presentations = Cap.configuration.cap_repo[:presentations]
30
+ @publications = Cap.configuration.cap_repo[:publications]
31
+ @processed = Cap.configuration.cap_repo[:processed]
32
+ end
33
+ # CAP API
34
+ @cap_uri = 'https://api.stanford.edu'
35
+ @cap_profiles = '/profiles/v1'
36
+ @cap_orgs = '/cap/v1/orgs'
37
+ @cap_search = '/cap/v1/search'
38
+ @cap_api = Faraday.new(url: @cap_uri) do |f|
39
+ # f.use FaradayMiddleware::FollowRedirects, limit: 3
40
+ # f.use Faraday::Response::RaiseError # raise exceptions on 40x, 50x
41
+ # f.request :logger, @config.logger
42
+ f.request :json
43
+ f.response :json, :content_type => JSON_CONTENT
44
+ f.adapter Faraday.default_adapter
45
+ end
46
+ @cap_api.options.timeout = 90
47
+ @cap_api.options.open_timeout = 10
48
+ @cap_api.headers.merge!(json_payloads)
49
+ # Authentication
50
+ auth_uri = 'https://authz.stanford.edu/oauth/token'
51
+ @auth = Faraday.new(url: auth_uri) do |f|
52
+ f.request :url_encoded
53
+ f.response :json, :content_type => JSON_CONTENT
54
+ f.adapter Faraday.default_adapter
55
+ end
56
+ @auth.options.timeout = 30
57
+ @auth.options.open_timeout = 10
58
+ @auth.headers.merge!(json_payloads)
59
+ end
60
+
61
+ # Reset authentication
62
+ def authenticate!
63
+ @access_expiry = nil
64
+ authenticate
65
+ end
66
+
67
+ def authenticate
68
+ if @access_expiry.to_i < Time.now.to_i
69
+ @access_code = nil
70
+ @auth.headers.delete :Authorization
71
+ @cap_api.headers.delete :Authorization
72
+ end
73
+ @access_code || begin
74
+ return false if @config.token_user.empty? && @config.token_pass.empty?
75
+ client = "#{@config.token_user}:#{@config.token_pass}"
76
+ auth_code = 'Basic ' + Base64.strict_encode64(client)
77
+ @auth.headers.merge!({ Authorization: auth_code })
78
+ response = @auth.get "?grant_type=client_credentials"
79
+ return false unless response.status == 200
80
+ access = response.body
81
+ return false if access['access_token'].nil?
82
+ @access_code = "Bearer #{access['access_token']}"
83
+ @access_expiry = Time.now.to_i + access['expires_in'].to_i
84
+ @cap_api.headers[:Authorization] = @access_code
85
+ end
86
+ end
87
+
88
+ # Get profiles from CAP API and store into local repo
89
+ def get_profiles
90
+ begin
91
+ if authenticate
92
+ page = 1
93
+ pages = 0
94
+ total = 0
95
+ begin
96
+ repo_clean
97
+ while true
98
+ params = "?p=#{page}&ps=100"
99
+ response = @cap_api.get "#{@cap_profiles}#{params}"
100
+ if response.status == 200
101
+ data = response.body
102
+ if data['firstPage']
103
+ pages = data['totalPages']
104
+ total = data['totalCount']
105
+ puts "Retrieved #{page} of #{pages} pages (#{total} profiles)."
106
+ else
107
+ puts "Retrieved #{page} of #{pages} pages."
108
+ end
109
+ profiles = data['values']
110
+ if @profiles.is_a? Daybreak::DB
111
+ profiles.each do |profile|
112
+ id = profile["profileId"]
113
+ @profiles[id] = profile
114
+ end
115
+ @profiles.flush
116
+ elsif @profiles.is_a? Mongo::Collection
117
+ # split out the publication data to accommodate the
118
+ # 16Mb limit on mongodb docs.
119
+ pubs_fields = ['doiId', 'doiUrl', 'webOfScienceId', 'webOfScienceUrl']
120
+ profiles.each do |profile|
121
+ id = profile['profileId']
122
+ presentations = profile.delete('presentations') || []
123
+ presentations.each {|p| p.delete('detail')}
124
+ pres = {'profileId' => id, 'presentations' => presentations}
125
+ begin
126
+ @presentations.insert_one(pres)
127
+ rescue
128
+ msg = "Profile #{id} presentations failed to save."
129
+ @config.logger.error msg
130
+ end
131
+ publications = profile.delete('publications') || []
132
+ publications.each do |p|
133
+ p.keys {|k| p.delete(k) unless pubs_fields.include? k }
134
+ end
135
+ pub = {'profileId' => id, 'publications' => publications}
136
+ begin
137
+ @publications.insert_one(pub)
138
+ rescue
139
+ msg = "Profile #{id} publications failed to save."
140
+ @config.logger.error msg
141
+ end
142
+ begin
143
+ @profiles.insert_one(profile)
144
+ rescue
145
+ msg = "Profile #{id} failed to save."
146
+ @config.logger.error msg
147
+ end
148
+ end
149
+ end
150
+ page += 1
151
+ break if data['lastPage']
152
+ else
153
+ msg = "Failed to GET profiles page #{page}: #{response.status}"
154
+ @config.logger.error msg
155
+ puts msg
156
+ break
157
+ end
158
+ end
159
+ rescue => e
160
+ msg = e.message
161
+ binding.pry if @config.debug
162
+ @config.logger.error msg
163
+ ensure
164
+ repo_commit(total)
165
+ end
166
+ else
167
+ msg = "Failed to authenticate"
168
+ @config.logger.error msg
169
+ end
170
+ rescue => e
171
+ msg = e.message
172
+ binding.pry if @config.debug
173
+ @config.logger.error(msg)
174
+ end
175
+ end
176
+
177
+ # def update_profiles
178
+ # # profile['profileId']
179
+ # # => 42005
180
+ # # [13] pry(main)> profile['profileId'].class
181
+ # # => Fixnum
182
+ # # [14] pry(main)> profile['lastModified']
183
+ # # => "2015-08-17T10:55:46.772-07:00"
184
+ # end
185
+
186
+ # @return ids [Array<Integer>] profile ids from local repo
187
+ def profile_ids
188
+ if @profiles.is_a? Daybreak::DB
189
+ @profiles.keys.map {|k| k.to_i}
190
+ elsif @profiles.is_a? Mongo::Collection
191
+ @profiles.find.projection({profileId:1}).map {|p| p['profileId'] }
192
+ end
193
+ end
194
+
195
+ # return profile data from local repo
196
+ # @param id [Integer] A profileId number
197
+ # @return profile [Hash|nil]
198
+ def profile(id)
199
+ if @profiles.is_a? Daybreak::DB
200
+ @profiles[id.to_s]
201
+ elsif @profiles.is_a? Mongo::Collection
202
+ @profiles.find({profileId: id}).first
203
+ end
204
+ end
205
+
206
+ # return presentation data from local repo
207
+ # @param id [Integer] A profileId number
208
+ # @return presentations [Array<Hash>|nil]
209
+ def presentation(id)
210
+ if @profiles.is_a? Daybreak::DB
211
+ begin
212
+ @profiles[id.to_s]['presentations']
213
+ rescue
214
+ nil
215
+ end
216
+ elsif @profiles.is_a? Mongo::Collection
217
+ @presentations.find({profileId: id}).first
218
+ end
219
+ end
220
+
221
+ # return publication data from local repo
222
+ # @param id [Integer] A profileId number
223
+ def publication(id)
224
+ if @profiles.is_a? Daybreak::DB
225
+ begin
226
+ @profiles[id.to_s]['publications']
227
+ rescue
228
+ nil
229
+ end
230
+ elsif @profiles.is_a? Mongo::Collection
231
+ @publications.find({profileId: id}).first
232
+ end
233
+ end
234
+
235
+ # A profile's processing data.
236
+ # @param id [Integer] A profileId number
237
+ def processed(id)
238
+ if @profiles.is_a? Daybreak::DB
239
+ begin
240
+ @profiles[id.to_s]['processed']
241
+ rescue
242
+ nil
243
+ end
244
+ elsif @processed.is_a? Mongo::Collection
245
+ @processed.find({profileId: id}).first
246
+ end
247
+ end
248
+
249
+ # Update a profile record with processing data.
250
+ # @param id [Integer] A profileId number
251
+ # @param data [Hash] Optional processing information
252
+ def process_update(id, data=nil)
253
+ if @profiles.is_a? Daybreak::DB
254
+ process_doc = {
255
+ lastModified: Time.now.to_i,
256
+ data: data
257
+ }
258
+ @profiles[id.to_s]['processed'] = process_doc
259
+ elsif @processed.is_a? Mongo::Collection
260
+ process_doc = {
261
+ profileId: id,
262
+ lastModified: Time.now.to_i,
263
+ data: data
264
+ }
265
+ @processed.insert_one(process_doc)
266
+ end
267
+ end
268
+
269
+ private
270
+
271
+ def repo_clean
272
+ if @profiles.is_a? Daybreak::DB
273
+ @profiles.clear
274
+ elsif @profiles.is_a? Mongo::Collection
275
+ @profiles.drop
276
+ @profiles.create
277
+ @presentations.drop
278
+ @presentations.create
279
+ @publications.drop
280
+ @publications.create
281
+ @processed.drop
282
+ @processed.create
283
+ end
284
+ end
285
+
286
+ def repo_commit(total)
287
+ if @profiles.is_a? Daybreak::DB
288
+ @profiles.flush
289
+ @profiles.compact
290
+ @profiles.load
291
+ puts "Stored #{@profiles.size} of #{total} profiles."
292
+ puts "Stored profiles to #{@profiles.class} at: #{@profiles.file}."
293
+ elsif @profiles.is_a? Mongo::Collection
294
+ indexes = {"profileId" => 1}
295
+ @profiles.indexes.create_one( indexes, :unique => true )
296
+ @presentations.indexes.create_one( indexes, :unique => true )
297
+ @publications.indexes.create_one( indexes, :unique => true )
298
+ @processed.indexes.create_one( indexes, :unique => true )
299
+ puts "Stored #{@profiles.find.count} of #{total} profiles."
300
+ puts "Stored profiles to #{@profiles.class} at: #{@profiles.namespace}."
301
+ end
302
+ end
303
+
304
+ # Migrate CAP API profile data from a Daybreak::DB into mongodb
305
+ def profiles_daybreak_to_mongo
306
+ mongo = Cap.configuration.cap_repo_mongo
307
+ mongo[:profiles].drop
308
+ db = Cap.configuration.cap_repo_daybreak
309
+ db.keys do |id|
310
+ profile = profiles[id]
311
+ mongo[:profiles].insert_one(profile)
312
+ end
313
+ mongo[:profiles].indexes.create_one({profileId:1}, :unique => true )
314
+ daybreak_matches_mongo?
315
+ end
316
+
317
+ # Validate a daybreak to mongo data transfer
318
+ def daybreak_matches_mongo?
319
+ mongo = Cap.configuration.cap_repo_mongo
320
+ profiles = Cap.configuration.cap_repo_daybreak
321
+ matches = profiles.keys.map do |id|
322
+ profile = profiles[id]
323
+ mongo_profile = mongo[:profiles].find(:profileId => id.to_i).first
324
+ mongo_profile.delete("_id")
325
+ mongo_profile == profile
326
+ end
327
+ matches.all? # should be true
328
+ end
329
+
330
+ def json_payloads
331
+ { accept: JSON_CONTENT, content_type: JSON_CONTENT }
332
+ end
333
+
334
+ end
335
+ end
336
+ end