cap-vivo-mapper 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e938fd82abdc61cc4cf0a92494f51fc7e74d485d
4
+ data.tar.gz: 1887b0969c45d7e05b394f1da3b518f6f9bc9c76
5
+ SHA512:
6
+ metadata.gz: 1491afbafa6105146cf1c556038062e9f6a7ec09e82db208ee25ad1d0be320748b1cc7ce8682b271558fe2d274a4b4ca01d145fa0ba47297d47345e74965d8e0
7
+ data.tar.gz: fddf639dfebae49a33d939c2d8e9deee62605985a5b2e6a25fc1066ffc94c6a6cb61bad1765cf5225174032ffe9ef70bcf840999feb217bb23c5e503acee5fb3
@@ -0,0 +1,37 @@
1
+ # https://github.com/bkeepers/dotenv is used for
2
+ # default configuration options. The values in
3
+ # this file do not replace existing values in
4
+ # the shell ENV.
5
+
6
+ # Uncomment and set values as required. See used settings in
7
+ # lib/*/configuration.rb
8
+
9
+ export DEBUG=false
10
+
11
+ export CAP_VIVO_LOG_FILE='log/cap_vivo_mapper.log'
12
+
13
+ # false = convert all CAP profiles into VIVO linked data, replacing any
14
+ # existing VIVO records
15
+ # true = do not replace existing VIVO records, only convert CAP profiles
16
+ # that are not already VIVO records
17
+ export CAP_VIVO_REPLACE=false
18
+
19
+ # Configure the client service
20
+ export CAP_API_URL='https://cap.example.com'
21
+ export CAP_API_ORGS='/api/cap/v1/orgs'
22
+ export CAP_API_PROFILES='/api/profiles/v1'
23
+ export CAP_API_SCHEMA='/api/cap/v1/schemas'
24
+ export CAP_API_SEARCH='/api/cap/v1/search'
25
+ export CAP_API_SEARCH_AC='/api/cap/v1/search/autocomplete'
26
+ export CAP_API_SEARCH_KW='/api/cap/v1/search/keyword'
27
+ export CAP_API_LOG_FILE='log/cap_vivo_mapper.log'
28
+
29
+ # Parameters for client authentication
30
+ export CAP_TOKEN_URI='https://authz.example.com/oauth/token'
31
+ export CAP_TOKEN_USER=user
32
+ export CAP_TOKEN_PASS=pass
33
+ export CAP_TOKEN_ACCESS=xyz.etc
34
+
35
+ # Parameters for CAP and VIVO data persistence
36
+ export CAP_REPO_4STORE='http://localhost:9000'
37
+ export CAP_REPO_MONGO='mongodb://127.0.0.1:27017/cap'
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cap-vivo-mapper.gemspec
4
+ gemspec
@@ -0,0 +1,13 @@
1
+ Copyright 2015 The Board of Trustees of the Leland Stanford Junior University.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -0,0 +1,110 @@
1
+ # Cap::Vivo::Mapper
2
+
3
+ This utility maps Stanford CAP profiles to VIVO.
4
+
5
+ ## Initial Objectives
6
+
7
+ * Identification of how CAP overlaps with the basics of the VIVO-ISF model
8
+ * specifically as relates to People and their relationships
9
+ * e.g. the LODE and eagle-i views, plus docs and examples on the wiki
10
+ * A test case transformation
11
+ * A document mapping CAP person data to VIVO-ISF
12
+ * A json transform utility to implement the mapping
13
+ * Retrieving CAP profile data from the CAP API
14
+ * Also investigate CAP publication data
15
+ * consider mapping to both VIVO-ISF and simple BibFrame
16
+
17
+ ### Stanford CAP Resources
18
+
19
+ - https://cap.stanford.edu/cap-api/console
20
+
21
+ ### VIVO Resources
22
+
23
+ - https://wiki.duraspace.org/display/VIVO/VIVO
24
+ - https://wiki.duraspace.org/display/VIVO/Major+concepts+in+VIVO+to+get+you+started
25
+ - https://wiki.duraspace.org/display/VIVO/VIVO-ISF+Ontology
26
+ - http://www.vivoweb.org/download
27
+
28
+ ## Installation
29
+
30
+ Add this line to your application's Gemfile:
31
+
32
+ ```ruby
33
+ gem 'cap-vivo-mapper'
34
+ ```
35
+
36
+ And then execute:
37
+
38
+ $ bundle
39
+
40
+ Or install it yourself as:
41
+
42
+ $ gem install cap-vivo-mapper
43
+
44
+ ## Usage
45
+
46
+ ### Setup
47
+
48
+ #### MongoDB
49
+
50
+ ```sh
51
+ # The following worked on an Ubuntu desktop system
52
+ sudo apt-get install mongodb
53
+ ```
54
+
55
+ #### 4store
56
+
57
+ ```sh
58
+ # The following worked on an Ubuntu desktop system
59
+ sudo apt-get install 4store
60
+ sudo 4store status
61
+ sudo service 4store stop
62
+ sudo service 4store status
63
+ # Only setup the backend once (it erases existing data)
64
+ sudo 4s-backend-setup cap_vivo
65
+ sudo 4s-backend cap_vivo
66
+ sudo 4s-httpd -h # describes the options used below
67
+ sudo 4s-httpd -p 9000 -U -s -1 cap_vivo
68
+ ```
69
+
70
+ 4store should be running a SPARQL server on the `cap_vivo` knowledge base; take a look at http://localhost:9000/status/.
71
+
72
+ ### Configure and Run Conversion
73
+
74
+ Use the example configuration in
75
+ https://github.com/sul-dlss/cap-vivo-mapper/blob/master/.env_example
76
+
77
+ ```sh
78
+ mkdir -p ~/tmp/cap_vivo/log
79
+ cd ~/tmp/cap_vivo
80
+ project='https://raw.githubusercontent.com/sul-dlss/cap-vivo-mapper'
81
+ wget ${project}/master/.env_example
82
+ cp .env_example .env
83
+ vim .env # hopefully this file is self explanatory
84
+ # If it's not already installed, install the the gem.
85
+ gem install cap-vivo-mapper
86
+ # Run it overnight, unless you have a high bandwidth connection to the
87
+ # CAP API and a fast system. So, watch it for any immediate failures;
88
+ # if it's running, then leave it overnight. The expected runtime is on
89
+ # the order of hours.
90
+ cap2vivo
91
+ ```
92
+
93
+
94
+ ## Development
95
+
96
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
97
+
98
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
99
+
100
+ ## Contributing
101
+
102
+ Bug reports and pull requests are welcome on GitHub at
103
+ https://github.com/sul-dlss/cap-vivo-mapper.
104
+
105
+ ## License
106
+
107
+ Copyright 2015 The Board of Trustees of the Leland Stanford Junior University.
108
+
109
+ The gem is available as open source under the terms of the [Apache 2 License](http://www.apache.org/licenses/LICENSE-2.0).
110
+
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bundler/setup'
4
+
5
+ require 'pry'
6
+ require 'cap'
7
+ require 'cap/client'
8
+ require 'cap/vivo'
9
+
10
+ # init configs
11
+ config = Cap.configuration
12
+
13
+ # client will load profile data from a local repo; if it is empty, it
14
+ # can be populated from the CAP API using `client.get_profiles`. The
15
+ # profile data is available in `client.profiles`.
16
+ client = Cap::Client::Client.new
17
+ client.get_profiles
18
+
19
+ # Convert all the CAP API profiles into VIVO linked data and
20
+ # store it in the CAP_REPO_4STORE triple store.
21
+ ids = client.profile_ids
22
+ if config.rdf_replace
23
+ puts 'Replacing all VIVO linked data'
24
+ config.rdf_repo.clear
25
+ else
26
+ puts 'Updating VIVO linked data'
27
+ q = 'SELECT ?person WHERE { ?person a <http://xmlns.com/foaf/0.1/Person> }'
28
+ r = config.rdf_repo.client.query(q)
29
+ vivo_uris = r.map {|s| s[:person]}
30
+ vivo_ids = vivo_uris.map {|uri| uri.to_s.split('/').last.to_i }
31
+ ids.delete_if {|id| vivo_ids.include? id }
32
+ end
33
+ puts "Processing #{ids.length} CAP profiles"
34
+ count = 0
35
+ start = Time.now.to_i
36
+ ids.each do |id|
37
+ count += 1
38
+ profile = client.profile(id)
39
+ begin
40
+ mapper = Cap::Vivo::Mapper.new profile
41
+ mapper.create_vivo
42
+ mapper.save
43
+ if count % 25 == 0
44
+ delay = Time.now.to_i - start
45
+ printf "\nmapped %4d of %d (%4d sec)\n", count, ids.length, delay.to_s
46
+ else
47
+ if count == 1
48
+ printf "Mapping %4d profiles:\n", ids.length
49
+ end
50
+ printf '.'
51
+ end
52
+ rescue => e
53
+ delay = Time.now.to_i - start
54
+ puts
55
+ printf "FAILED profileId %d (%d of %d)\n", id, count, ids.length
56
+ puts e.message
57
+ puts
58
+ end
59
+ end
60
+ puts
61
+
62
+ at_exit {
63
+ config.cap_repo.close if config.cap_repo.is_a? Daybreak::DB
64
+ client = nil
65
+ config = nil
66
+ sleep 2 # give it a chance to close
67
+ }
@@ -0,0 +1,51 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cap/vivo/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "cap-vivo-mapper"
8
+ spec.version = Cap::Vivo::VERSION
9
+ spec.authors = ["Darren L. Weber, Ph.D."]
10
+ spec.email = ["darren.weber@stanford.edu"]
11
+
12
+ spec.summary = %q{This utility maps Stanford CAP profiles to VIVO.}
13
+ spec.description = %q{This utility maps Stanford CAP profiles to VIVO.}
14
+ spec.homepage = 'https://github.com/sul-dlss/cap-vivo-mapper'
15
+ spec.licenses = ['Apache-2.0']
16
+
17
+ spec.add_dependency 'dotenv'
18
+
19
+ spec.add_dependency 'daybreak' # memory mapped file db
20
+ spec.add_dependency 'mongo'
21
+
22
+ spec.add_dependency 'linkeddata'
23
+ spec.add_dependency 'rdf-4store'
24
+
25
+ spec.add_dependency 'faraday'
26
+ spec.add_dependency 'faraday_middleware'
27
+
28
+ # Use pry for console and debug config
29
+ spec.add_development_dependency 'pry'
30
+ spec.add_development_dependency 'pry-doc'
31
+
32
+ spec.add_development_dependency 'bundler', '~> 1.10'
33
+ spec.add_development_dependency 'rake', '~> 10.0'
34
+ spec.add_development_dependency 'rspec'
35
+ spec.add_development_dependency 'vcr'
36
+ spec.add_development_dependency 'webmock'
37
+
38
+ spec.add_development_dependency 'coveralls'
39
+ spec.add_development_dependency 'guard'
40
+ spec.add_development_dependency 'guard-ctags-bundler'
41
+
42
+ git_files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
43
+ bin_files = %w(bin/console bin/ctags.rb bin/setup bin/test.rb)
44
+ dot_files = %w(.gitignore .travis.yml log/.gitignore)
45
+
46
+ spec.files = git_files - (bin_files + dot_files)
47
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
48
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
49
+ spec.require_paths = ["lib"]
50
+
51
+ end
@@ -0,0 +1,32 @@
1
+ require 'dotenv'
2
+ Dotenv.load
3
+
4
+ require 'linkeddata'
5
+ require 'rdf/4store'
6
+ require 'daybreak'
7
+ require 'mongo'
8
+ require_relative 'cap/configuration'
9
+
10
+ # This is a utility working with Stanford CAP and VIVO data mappings.
11
+ # https://github.com/sul-dlss/cap-vivo-mapper
12
+ module Cap
13
+
14
+ # Configuration at the module level, see
15
+ # http://brandonhilkert.com/blog/ruby-gem-configuration-patterns/
16
+ class << self
17
+ attr_writer :configuration
18
+ end
19
+
20
+ def self.configuration
21
+ @configuration ||= Configuration.new
22
+ end
23
+
24
+ def self.reset
25
+ @configuration = Configuration.new
26
+ end
27
+
28
+ def self.configure
29
+ yield(configuration)
30
+ end
31
+
32
+ end
@@ -0,0 +1,31 @@
1
+ require 'dotenv'
2
+ Dotenv.load
3
+
4
+ require_relative 'client/configuration'
5
+ require_relative 'client/cap_client'
6
+
7
+ # This is a utility working with Stanford CAP.
8
+ # https://github.com/sul-dlss/cap-vivo-mapper
9
+ module Cap
10
+ module Client
11
+
12
+ # Configuration at the module level, see
13
+ # http://brandonhilkert.com/blog/ruby-gem-configuration-patterns/
14
+ class << self
15
+ attr_writer :configuration
16
+ end
17
+
18
+ def self.configuration
19
+ @configuration ||= Configuration.new
20
+ end
21
+
22
+ def self.reset
23
+ @configuration = Configuration.new
24
+ end
25
+
26
+ def self.configure
27
+ yield(configuration)
28
+ end
29
+
30
+ end
31
+ end
@@ -0,0 +1,336 @@
1
+ module Cap
2
+ module Client
3
+
4
+ require 'faraday'
5
+ require 'faraday_middleware'
6
+
7
+ # CAP Public Website https://profiles.stanford.edu
8
+ # Profiles API https://api.stanford.edu/profiles/v1
9
+ # Orgs API https://api.stanford.edu/cap/v1/orgs
10
+ # Search API https://api.stanford.edu/cap/v1/search
11
+ # Developer's API https://cap.stanford.edu/cap-api/console
12
+
13
+ class Client
14
+
15
+ JSON_CONTENT = 'application/json'
16
+ BSON_MAX = 16777216
17
+
18
+ attr_reader :config
19
+ attr_reader :cap_api
20
+ attr_reader :profiles
21
+
22
+ # Initialize a new client
23
+ def initialize
24
+ @config = Cap::Client.configuration
25
+ if Cap.configuration.cap_repo.is_a? Daybreak::DB
26
+ @profiles = Cap.configuration.cap_repo
27
+ elsif Cap.configuration.cap_repo.is_a? Mongo::Client
28
+ @profiles = Cap.configuration.cap_repo[:profiles]
29
+ @presentations = Cap.configuration.cap_repo[:presentations]
30
+ @publications = Cap.configuration.cap_repo[:publications]
31
+ @processed = Cap.configuration.cap_repo[:processed]
32
+ end
33
+ # CAP API
34
+ @cap_uri = 'https://api.stanford.edu'
35
+ @cap_profiles = '/profiles/v1'
36
+ @cap_orgs = '/cap/v1/orgs'
37
+ @cap_search = '/cap/v1/search'
38
+ @cap_api = Faraday.new(url: @cap_uri) do |f|
39
+ # f.use FaradayMiddleware::FollowRedirects, limit: 3
40
+ # f.use Faraday::Response::RaiseError # raise exceptions on 40x, 50x
41
+ # f.request :logger, @config.logger
42
+ f.request :json
43
+ f.response :json, :content_type => JSON_CONTENT
44
+ f.adapter Faraday.default_adapter
45
+ end
46
+ @cap_api.options.timeout = 90
47
+ @cap_api.options.open_timeout = 10
48
+ @cap_api.headers.merge!(json_payloads)
49
+ # Authentication
50
+ auth_uri = 'https://authz.stanford.edu/oauth/token'
51
+ @auth = Faraday.new(url: auth_uri) do |f|
52
+ f.request :url_encoded
53
+ f.response :json, :content_type => JSON_CONTENT
54
+ f.adapter Faraday.default_adapter
55
+ end
56
+ @auth.options.timeout = 30
57
+ @auth.options.open_timeout = 10
58
+ @auth.headers.merge!(json_payloads)
59
+ end
60
+
61
+ # Reset authentication
62
+ def authenticate!
63
+ @access_expiry = nil
64
+ authenticate
65
+ end
66
+
67
+ def authenticate
68
+ if @access_expiry.to_i < Time.now.to_i
69
+ @access_code = nil
70
+ @auth.headers.delete :Authorization
71
+ @cap_api.headers.delete :Authorization
72
+ end
73
+ @access_code || begin
74
+ return false if @config.token_user.empty? && @config.token_pass.empty?
75
+ client = "#{@config.token_user}:#{@config.token_pass}"
76
+ auth_code = 'Basic ' + Base64.strict_encode64(client)
77
+ @auth.headers.merge!({ Authorization: auth_code })
78
+ response = @auth.get "?grant_type=client_credentials"
79
+ return false unless response.status == 200
80
+ access = response.body
81
+ return false if access['access_token'].nil?
82
+ @access_code = "Bearer #{access['access_token']}"
83
+ @access_expiry = Time.now.to_i + access['expires_in'].to_i
84
+ @cap_api.headers[:Authorization] = @access_code
85
+ end
86
+ end
87
+
88
+ # Get profiles from CAP API and store into local repo
89
+ def get_profiles
90
+ begin
91
+ if authenticate
92
+ page = 1
93
+ pages = 0
94
+ total = 0
95
+ begin
96
+ repo_clean
97
+ while true
98
+ params = "?p=#{page}&ps=100"
99
+ response = @cap_api.get "#{@cap_profiles}#{params}"
100
+ if response.status == 200
101
+ data = response.body
102
+ if data['firstPage']
103
+ pages = data['totalPages']
104
+ total = data['totalCount']
105
+ puts "Retrieved #{page} of #{pages} pages (#{total} profiles)."
106
+ else
107
+ puts "Retrieved #{page} of #{pages} pages."
108
+ end
109
+ profiles = data['values']
110
+ if @profiles.is_a? Daybreak::DB
111
+ profiles.each do |profile|
112
+ id = profile["profileId"]
113
+ @profiles[id] = profile
114
+ end
115
+ @profiles.flush
116
+ elsif @profiles.is_a? Mongo::Collection
117
+ # split out the publication data to accommodate the
118
+ # 16Mb limit on mongodb docs.
119
+ pubs_fields = ['doiId', 'doiUrl', 'webOfScienceId', 'webOfScienceUrl']
120
+ profiles.each do |profile|
121
+ id = profile['profileId']
122
+ presentations = profile.delete('presentations') || []
123
+ presentations.each {|p| p.delete('detail')}
124
+ pres = {'profileId' => id, 'presentations' => presentations}
125
+ begin
126
+ @presentations.insert_one(pres)
127
+ rescue
128
+ msg = "Profile #{id} presentations failed to save."
129
+ @config.logger.error msg
130
+ end
131
+ publications = profile.delete('publications') || []
132
+ publications.each do |p|
133
+ p.keys {|k| p.delete(k) unless pubs_fields.include? k }
134
+ end
135
+ pub = {'profileId' => id, 'publications' => publications}
136
+ begin
137
+ @publications.insert_one(pub)
138
+ rescue
139
+ msg = "Profile #{id} publications failed to save."
140
+ @config.logger.error msg
141
+ end
142
+ begin
143
+ @profiles.insert_one(profile)
144
+ rescue
145
+ msg = "Profile #{id} failed to save."
146
+ @config.logger.error msg
147
+ end
148
+ end
149
+ end
150
+ page += 1
151
+ break if data['lastPage']
152
+ else
153
+ msg = "Failed to GET profiles page #{page}: #{response.status}"
154
+ @config.logger.error msg
155
+ puts msg
156
+ break
157
+ end
158
+ end
159
+ rescue => e
160
+ msg = e.message
161
+ binding.pry if @config.debug
162
+ @config.logger.error msg
163
+ ensure
164
+ repo_commit(total)
165
+ end
166
+ else
167
+ msg = "Failed to authenticate"
168
+ @config.logger.error msg
169
+ end
170
+ rescue => e
171
+ msg = e.message
172
+ binding.pry if @config.debug
173
+ @config.logger.error(msg)
174
+ end
175
+ end
176
+
177
+ # def update_profiles
178
+ # # profile['profileId']
179
+ # # => 42005
180
+ # # [13] pry(main)> profile['profileId'].class
181
+ # # => Fixnum
182
+ # # [14] pry(main)> profile['lastModified']
183
+ # # => "2015-08-17T10:55:46.772-07:00"
184
+ # end
185
+
186
+ # @return ids [Array<Integer>] profile ids from local repo
187
+ def profile_ids
188
+ if @profiles.is_a? Daybreak::DB
189
+ @profiles.keys.map {|k| k.to_i}
190
+ elsif @profiles.is_a? Mongo::Collection
191
+ @profiles.find.projection({profileId:1}).map {|p| p['profileId'] }
192
+ end
193
+ end
194
+
195
+ # return profile data from local repo
196
+ # @param id [Integer] A profileId number
197
+ # @return profile [Hash|nil]
198
+ def profile(id)
199
+ if @profiles.is_a? Daybreak::DB
200
+ @profiles[id.to_s]
201
+ elsif @profiles.is_a? Mongo::Collection
202
+ @profiles.find({profileId: id}).first
203
+ end
204
+ end
205
+
206
+ # return presentation data from local repo
207
+ # @param id [Integer] A profileId number
208
+ # @return presentations [Array<Hash>|nil]
209
+ def presentation(id)
210
+ if @profiles.is_a? Daybreak::DB
211
+ begin
212
+ @profiles[id.to_s]['presentations']
213
+ rescue
214
+ nil
215
+ end
216
+ elsif @profiles.is_a? Mongo::Collection
217
+ @presentations.find({profileId: id}).first
218
+ end
219
+ end
220
+
221
+ # return publication data from local repo
222
+ # @param id [Integer] A profileId number
223
+ def publication(id)
224
+ if @profiles.is_a? Daybreak::DB
225
+ begin
226
+ @profiles[id.to_s]['publications']
227
+ rescue
228
+ nil
229
+ end
230
+ elsif @profiles.is_a? Mongo::Collection
231
+ @publications.find({profileId: id}).first
232
+ end
233
+ end
234
+
235
+ # A profile's processing data.
236
+ # @param id [Integer] A profileId number
237
+ def processed(id)
238
+ if @profiles.is_a? Daybreak::DB
239
+ begin
240
+ @profiles[id.to_s]['processed']
241
+ rescue
242
+ nil
243
+ end
244
+ elsif @processed.is_a? Mongo::Collection
245
+ @processed.find({profileId: id}).first
246
+ end
247
+ end
248
+
249
+ # Update a profile record with processing data.
250
+ # @param id [Integer] A profileId number
251
+ # @param data [Hash] Optional processing information
252
+ def process_update(id, data=nil)
253
+ if @profiles.is_a? Daybreak::DB
254
+ process_doc = {
255
+ lastModified: Time.now.to_i,
256
+ data: data
257
+ }
258
+ @profiles[id.to_s]['processed'] = process_doc
259
+ elsif @processed.is_a? Mongo::Collection
260
+ process_doc = {
261
+ profileId: id,
262
+ lastModified: Time.now.to_i,
263
+ data: data
264
+ }
265
+ @processed.insert_one(process_doc)
266
+ end
267
+ end
268
+
269
+ private
270
+
271
+ def repo_clean
272
+ if @profiles.is_a? Daybreak::DB
273
+ @profiles.clear
274
+ elsif @profiles.is_a? Mongo::Collection
275
+ @profiles.drop
276
+ @profiles.create
277
+ @presentations.drop
278
+ @presentations.create
279
+ @publications.drop
280
+ @publications.create
281
+ @processed.drop
282
+ @processed.create
283
+ end
284
+ end
285
+
286
+ def repo_commit(total)
287
+ if @profiles.is_a? Daybreak::DB
288
+ @profiles.flush
289
+ @profiles.compact
290
+ @profiles.load
291
+ puts "Stored #{@profiles.size} of #{total} profiles."
292
+ puts "Stored profiles to #{@profiles.class} at: #{@profiles.file}."
293
+ elsif @profiles.is_a? Mongo::Collection
294
+ indexes = {"profileId" => 1}
295
+ @profiles.indexes.create_one( indexes, :unique => true )
296
+ @presentations.indexes.create_one( indexes, :unique => true )
297
+ @publications.indexes.create_one( indexes, :unique => true )
298
+ @processed.indexes.create_one( indexes, :unique => true )
299
+ puts "Stored #{@profiles.find.count} of #{total} profiles."
300
+ puts "Stored profiles to #{@profiles.class} at: #{@profiles.namespace}."
301
+ end
302
+ end
303
+
304
+ # Migrate CAP API profile data from a Daybreak::DB into mongodb
305
+ def profiles_daybreak_to_mongo
306
+ mongo = Cap.configuration.cap_repo_mongo
307
+ mongo[:profiles].drop
308
+ db = Cap.configuration.cap_repo_daybreak
309
+ db.keys do |id|
310
+ profile = profiles[id]
311
+ mongo[:profiles].insert_one(profile)
312
+ end
313
+ mongo[:profiles].indexes.create_one({profileId:1}, :unique => true )
314
+ daybreak_matches_mongo?
315
+ end
316
+
317
+ # Validate a daybreak to mongo data transfer
318
+ def daybreak_matches_mongo?
319
+ mongo = Cap.configuration.cap_repo_mongo
320
+ profiles = Cap.configuration.cap_repo_daybreak
321
+ matches = profiles.keys.map do |id|
322
+ profile = profiles[id]
323
+ mongo_profile = mongo[:profiles].find(:profileId => id.to_i).first
324
+ mongo_profile.delete("_id")
325
+ mongo_profile == profile
326
+ end
327
+ matches.all? # should be true
328
+ end
329
+
330
+ def json_payloads
331
+ { accept: JSON_CONTENT, content_type: JSON_CONTENT }
332
+ end
333
+
334
+ end
335
+ end
336
+ end