cdm_migrator 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +40 -0
- data/Rakefile +38 -0
- data/app/assets/config/cdm_migrator_manifest.js +2 -0
- data/app/assets/javascripts/cdm_migrator/application.js +13 -0
- data/app/assets/stylesheets/cdm_migrator/application.css +15 -0
- data/app/controllers/cdm_migrator/application_controller.rb +5 -0
- data/app/controllers/cdm_migrator/cdm_controller.rb +137 -0
- data/app/controllers/cdm_migrator/csv_controller.rb +156 -0
- data/app/helpers/cdm_migrator/application_helper.rb +4 -0
- data/app/jobs/cdm_migrator/application_job.rb +4 -0
- data/app/jobs/csv_upload_job.rb +123 -0
- data/app/mailers/cdm_migrator/application_mailer.rb +6 -0
- data/app/models/cdm_migrator/application_record.rb +5 -0
- data/app/views/cdm_migrator/cdm/collection.html.erb +9 -0
- data/app/views/cdm_migrator/cdm/mappings.html.erb +33 -0
- data/app/views/cdm_migrator/csv/upload.html.erb +10 -0
- data/app/views/layouts/cdm_migrator/application.html.erb +14 -0
- data/config/routes.rb +9 -0
- data/lib/cdm_migrator.rb +5 -0
- data/lib/cdm_migrator/engine.rb +22 -0
- data/lib/cdm_migrator/version.rb +3 -0
- data/lib/generators/cdm_migrator/install/install_generator.rb +13 -0
- data/lib/generators/cdm_migrator/install/templates/config/cdm_migrator.yml +2 -0
- data/lib/tasks/cdm_migrator_tasks.rake +4 -0
- metadata +141 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2cb6d14f6b72fcb888ba38ef4c24f9dc96969c8e
|
4
|
+
data.tar.gz: bf6639ced340661bac4ab51b2b3d929abea5fb14
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f0e73d559e13093202e19db2f3802fc2195d7314b0a844445d1eeba588f460c028e109bd01640f03b6523807e0f74ebe361efd69a69a6ae6dd9e89ca6cef5fd1
|
7
|
+
data.tar.gz: c07586b7eb329bc7646cfec6ad418ff1d669b88263139999d78f66c59a417c00f5639551ddf875b6cee056b06cbea151bf5b36ad40aaf3575b9259d616a84210
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright 2017 sephirothkod
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# CdmMigrator
|
2
|
+
This migrator is designed to export individual ContentDM collections to a CSV for refining. It then allows batch importing into Hyrax via a CSV upload.
|
3
|
+
|
4
|
+
## Installation
|
5
|
+
Add this line to your application's Gemfile:
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
gem 'cdm_migrator'
|
9
|
+
```
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
```bash
|
13
|
+
$ bundle
|
14
|
+
```
|
15
|
+
|
16
|
+
Or install it yourself as:
|
17
|
+
```bash
|
18
|
+
$ gem install cdm_migrator
|
19
|
+
```
|
20
|
+
|
21
|
+
Finally, run:
|
22
|
+
```bash
|
23
|
+
$ rails g cdm_migrator:install
|
24
|
+
```
|
25
|
+
|
26
|
+
## Usage
|
27
|
+
1. Add your ContentDM url and api port to the cdm_migrator.yml file.
|
28
|
+
2. Navigate to the *cdm_migrator/cdm/collection* url to select your contentdm collection and what type of work you want to export it to and click "choose mappings".
|
29
|
+
3. Map the ContentDM fields to your Hyrax work and file fields\* and click "generate CSV".
|
30
|
+
4. Refine the CSV as you see fit.
|
31
|
+
5. Navigate to the *cdm_migrator/csv/upload* url; choose your multi-value seperator (default is |) and upload your CSV file.
|
32
|
+
6. Done.
|
33
|
+
|
34
|
+
\* cdm_migrator uses the generated Hyrax forms (ex. Hyrax::Forms::GenericWorkForm) in your host application to obtain it's terms for mapping. If you have added terms to your FileSet model extend the Hyrax::Forms::FileSetEditForm with FileSetForm in your host application so that the changes will be detected by the migrator.
|
35
|
+
|
36
|
+
## Contributing
|
37
|
+
Contribution directions go here.
|
38
|
+
|
39
|
+
## License
|
40
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
begin
|
2
|
+
require 'bundler/setup'
|
3
|
+
rescue LoadError
|
4
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'engine_cart/rake_task'
|
8
|
+
require 'rdoc/task'
|
9
|
+
|
10
|
+
RDoc::Task.new(:rdoc) do |rdoc|
|
11
|
+
rdoc.rdoc_dir = 'rdoc'
|
12
|
+
rdoc.title = 'CdmMigrator'
|
13
|
+
rdoc.options << '--line-numbers'
|
14
|
+
rdoc.rdoc_files.include('README.md')
|
15
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
16
|
+
end
|
17
|
+
|
18
|
+
#APP_RAKEFILE = File.expand_path("../test/dummy/Rakefile", __FILE__)
|
19
|
+
#load 'rails/tasks/engine.rake'
|
20
|
+
|
21
|
+
|
22
|
+
load 'rails/tasks/statistics.rake'
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
require 'bundler/gem_tasks'
|
27
|
+
|
28
|
+
require 'rake/testtask'
|
29
|
+
|
30
|
+
Rake::TestTask.new(:test) do |t|
|
31
|
+
t.libs << 'lib'
|
32
|
+
t.libs << 'test'
|
33
|
+
t.pattern = 'test/**/*_test.rb'
|
34
|
+
t.verbose = false
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
task default: :test
|
@@ -0,0 +1,13 @@
|
|
1
|
+
// This is a manifest file that'll be compiled into application.js, which will include all the files
|
2
|
+
// listed below.
|
3
|
+
//
|
4
|
+
// Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
|
5
|
+
// or any plugin's vendor/assets/javascripts directory can be referenced here using a relative path.
|
6
|
+
//
|
7
|
+
// It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
|
8
|
+
// compiled file. JavaScript code in this file should be added after the last require_* statement.
|
9
|
+
//
|
10
|
+
// Read Sprockets README (https://github.com/rails/sprockets#sprockets-directives) for details
|
11
|
+
// about supported directives.
|
12
|
+
//
|
13
|
+
//= require_tree .
|
@@ -0,0 +1,15 @@
|
|
1
|
+
/*
|
2
|
+
* This is a manifest file that'll be compiled into application.css, which will include all the files
|
3
|
+
* listed below.
|
4
|
+
*
|
5
|
+
* Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
|
6
|
+
* or any plugin's vendor/assets/stylesheets directory can be referenced here using a relative path.
|
7
|
+
*
|
8
|
+
* You're free to add application-wide styles to this file and they'll appear at the bottom of the
|
9
|
+
* compiled file so the styles you add here take precedence over styles defined in any other CSS/SCSS
|
10
|
+
* files in this directory. Styles in this file should be added after the last require_* statement.
|
11
|
+
* It is generally better to create a new file per style scope.
|
12
|
+
*
|
13
|
+
*= require_tree .
|
14
|
+
*= require_self
|
15
|
+
*/
|
@@ -0,0 +1,137 @@
|
|
1
|
+
module CdmMigrator
|
2
|
+
class CdmController < ApplicationController
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
super
|
6
|
+
@cdm_url = CdmMigrator::Engine.config["cdm_url"]
|
7
|
+
@cdm_port = CdmMigrator::Engine.config["cdm_port"]
|
8
|
+
@cdm_dirs = CdmMigrator::Engine.config["cdm_dirs"]
|
9
|
+
end
|
10
|
+
|
11
|
+
before_action :set_exclusive_fields, only: [:generate, :mappings]
|
12
|
+
|
13
|
+
def secondary_terms file_form
|
14
|
+
file_form.terms - file_form.required_fields -
|
15
|
+
[:visibility_during_embargo, :embargo_release_date,
|
16
|
+
:visibility_after_embargo, :visibility_during_lease,
|
17
|
+
:lease_expiration_date, :visibility_after_lease, :visibility,
|
18
|
+
:thumbnail_id, :representative_id, :ordered_member_ids,
|
19
|
+
:collection_ids, :in_works_ids, :admin_set_id]
|
20
|
+
end
|
21
|
+
|
22
|
+
def set_exclusive_fields
|
23
|
+
#Module.const_get "Hyrax::GenericWorkForm" rescue false #.split('::').inject(Object) {|o,c| o.const_get c}
|
24
|
+
file_form = Module.const_get("Hyrax::FileSetForm") rescue nil || Module.const_get("Hyrax::Forms::FileSetEditForm")
|
25
|
+
work_form = Module.const_get("Hyrax::#{params[:work]}Form") rescue nil || Module.const_get("Hyrax::Forms::WorkForm")
|
26
|
+
@terms = file_form.required_fields + secondary_terms(file_form)
|
27
|
+
@work_only = work_form.required_fields + work_form.new(params[:work].constantize.new,nil,nil).secondary_terms - @terms
|
28
|
+
end
|
29
|
+
|
30
|
+
def generate
|
31
|
+
@h_to_c = {}
|
32
|
+
@c_to_h = {}
|
33
|
+
params[:mappings].each do |key, mapping|
|
34
|
+
if !mapping['hydra'].empty?
|
35
|
+
@c_to_h[mapping['cdm']] = mapping['hydra']
|
36
|
+
@h_to_c[mapping['hydra']] ||= []
|
37
|
+
@h_to_c[mapping['hydra']] << mapping['cdm']
|
38
|
+
elsif !mapping['hydrac'].empty?
|
39
|
+
@c_to_h[mapping['cdm']] = mapping['hydrac']
|
40
|
+
@h_to_c[mapping['hydrac']] ||= []
|
41
|
+
@h_to_c[mapping['hydrac']] << mapping['cdm']
|
42
|
+
end
|
43
|
+
end
|
44
|
+
json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery/#{params[:collection]}/0/0/filetype/1024/0/0/0/0/0/1/0/json")).body)
|
45
|
+
total_recs = json["pager"]["total"].to_i
|
46
|
+
if total_recs > 1024
|
47
|
+
start = 1
|
48
|
+
records = []
|
49
|
+
[0..(total_recs/1024)].each do |index|
|
50
|
+
start = (index*1024) + 1
|
51
|
+
json = JSON.parse(Net::HTTP.get_response(URI.parse("http://#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery/#{params[:collection]}/0/0/filetype/1024/#{start}/0/0/0/0/1/0/json")).body)
|
52
|
+
records << json["records"].map { |rec| [rec['pointer'], rec['filetype']] }
|
53
|
+
end
|
54
|
+
else
|
55
|
+
records = json["records"].map { |rec| [rec['pointer'], rec['filetype']] }
|
56
|
+
end
|
57
|
+
headers = CSV.generate_line (['object_type','url']+@terms+@work_only)
|
58
|
+
csv_lines = [] << headers
|
59
|
+
records.each do |rec|
|
60
|
+
if rec.last == "cpd"
|
61
|
+
json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo/#{params[:collection]}/#{rec.first}/json")).body)
|
62
|
+
csv_lines << create_line("GenericWork","",json)
|
63
|
+
json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCompoundObjectInfo/#{params[:collection]}/#{rec.first}/json")).body)
|
64
|
+
rec_pages = json['page'] || json['node']['page']
|
65
|
+
rec_pages.each do |child|
|
66
|
+
child_json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo/#{params[:collection]}/#{child['pageptr']}/json")).body)
|
67
|
+
url = "file://#{file_path(child['pageptr'])}"
|
68
|
+
url = "#{@cdm_url}/utils/getfile/collection/#{params[:collection]}/id/#{rec.first}/filename/#{child['pageptr']}.#{child['find']}" unless params[:file_system]=="true" #"file://#{file_path(rec.first)}"
|
69
|
+
csv_lines << create_line("File",url,child_json)
|
70
|
+
end
|
71
|
+
else
|
72
|
+
json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo/#{params[:collection]}/#{rec.first}/json")).body)
|
73
|
+
csv_lines << create_line("GenericWork","",json)
|
74
|
+
url = "file://#{file_path(rec.first)}"
|
75
|
+
url = "#{@cdm_url}/utils/getfile/collection/#{params[:collection]}/id/#{rec.first}/filename/#{rec.first}.#{rec.last}" unless params[:file_system]=="true" #"file://#{file_path(rec.first)}"
|
76
|
+
csv_lines << create_line("File",url,{})
|
77
|
+
end
|
78
|
+
end
|
79
|
+
render plain: csv_lines.join, content_type: 'text/csv'
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
def mappings
|
84
|
+
json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCollectionFieldInfo/"+params['collection']+'/json')).body)
|
85
|
+
@cdm_terms = json.collect { |c| [c['name'],c['nick']] }
|
86
|
+
get_dirs
|
87
|
+
end
|
88
|
+
|
89
|
+
def collection
|
90
|
+
json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCollectionList/json")).body)
|
91
|
+
@collections = json.collect { |c| [c['name'],c['secondary_alias']] }
|
92
|
+
@available_concerns = Hyrax.config.curation_concerns.map { |c| [c.to_s, c.to_s]}
|
93
|
+
end
|
94
|
+
|
95
|
+
protected
|
96
|
+
|
97
|
+
def create_line type, url, json
|
98
|
+
line = [] << type
|
99
|
+
line << url
|
100
|
+
(@terms+@work_only).each do |term|
|
101
|
+
content = []
|
102
|
+
unless @h_to_c[term.to_s].nil?
|
103
|
+
@h_to_c[term.to_s].each do |cdm_term|
|
104
|
+
content << json[cdm_term] unless json[cdm_term].nil?
|
105
|
+
end
|
106
|
+
content.delete_if(&:empty?)
|
107
|
+
end
|
108
|
+
if content.nil? || content.empty? || content == [{}]
|
109
|
+
line << ""
|
110
|
+
else
|
111
|
+
line << content.join('|')
|
112
|
+
end
|
113
|
+
end
|
114
|
+
CSV.generate_line line
|
115
|
+
end
|
116
|
+
|
117
|
+
def file_path pointer
|
118
|
+
file_types = ['tif','jpg','mp4','mp3']
|
119
|
+
files = []
|
120
|
+
file_types.each do |type|
|
121
|
+
files << Dir.glob("#{params['mappings_url']}/**/#{pointer}_*#{type}")
|
122
|
+
end
|
123
|
+
files.each do |file|
|
124
|
+
return file.first if file.count > 0
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def get_dirs
|
129
|
+
@dirs = []
|
130
|
+
@cdm_dirs.each do |name, dir|
|
131
|
+
ent = Dir.entries(dir).select {|entry| File.directory? File.join(dir,entry) and !(entry =='.' || entry == '..') }
|
132
|
+
ent = ent.map { |url| ["#{name}/#{url}", "#{dir}/#{url}"] }
|
133
|
+
@dirs += ent
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,156 @@
|
|
1
|
+
module CdmMigrator
|
2
|
+
class CsvController < ApplicationController
|
3
|
+
|
4
|
+
def generate
|
5
|
+
headers = ['type','url']
|
6
|
+
skip = ["id", "head", "tail", "depositor", "date_uploaded", "date_modified", "import_url", "thumbnail_id", "embargo_id", "lease_id", "access_control_id", "representative_id"]
|
7
|
+
GenericWork.new.attributes.each do |key, val|
|
8
|
+
headers << "work_#{key}" unless skip.include? key
|
9
|
+
end
|
10
|
+
FileSet.new.attributes.each do |key, val|
|
11
|
+
headers << "file_#{key}" unless skip.include? key
|
12
|
+
end
|
13
|
+
fname = "template_#{DateTime.now.to_i}"
|
14
|
+
render plain: CSV.generate { |csv| csv << headers }, content_type: 'text/csv'
|
15
|
+
end
|
16
|
+
|
17
|
+
def upload
|
18
|
+
#byebug
|
19
|
+
end
|
20
|
+
|
21
|
+
def create
|
22
|
+
#byebug
|
23
|
+
csv = CSV.parse(File.read(params[:csv_import][:csv_file].path), headers: true, encoding: 'utf-8')
|
24
|
+
CsvUploadJob.perform_later(params[:csv_import][:csv_file].path, params[:csv_import][:mvs], @current_user)
|
25
|
+
#perform(params[:csv_import][:csv_file].path, params[:csv_import][:mvs], @current_user)
|
26
|
+
flash[:notice] = "csv successfully uploaded"
|
27
|
+
redirect_to "/csv/upload"
|
28
|
+
end
|
29
|
+
|
30
|
+
def perform(csv, mvs, current_user)
|
31
|
+
@csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
|
32
|
+
@mvs = mvs
|
33
|
+
@current_user = current_user
|
34
|
+
@works = []
|
35
|
+
@files = {}
|
36
|
+
@csv.each do |row|
|
37
|
+
type = row.first.last
|
38
|
+
if type.nil?
|
39
|
+
next
|
40
|
+
elsif(type.include? "Work")
|
41
|
+
@works << row
|
42
|
+
@files[@works.length] = []
|
43
|
+
elsif(type.include? "File")
|
44
|
+
row.delete("object_type")
|
45
|
+
@files[@works.length] << row
|
46
|
+
end
|
47
|
+
end
|
48
|
+
create_works
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def create_file_from_url(url, file_name, work, file_data)
|
54
|
+
::FileSet.new(import_url: url, label: file_name) do |fs|
|
55
|
+
fs.save
|
56
|
+
actor = Hyrax::Actors::FileSetActor.new(fs, @current_user)
|
57
|
+
actor.create_metadata#(work, visibility: work.visibility)
|
58
|
+
actor.attach_file_to_work(work)
|
59
|
+
#byebug
|
60
|
+
fs.attributes = file_data
|
61
|
+
fs.save!
|
62
|
+
uri = URI.parse(url.gsub(' ','%20'))
|
63
|
+
if uri.scheme == 'file'
|
64
|
+
IngestLocalFileJob.perform_later(fs, uri.path.gsub('%20',' '), @current_user)
|
65
|
+
else
|
66
|
+
ImportUrlJob.perform_later(fs, log(actor.user))
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
#
|
71
|
+
def load_metadata(fs, file_array)
|
72
|
+
file_array.each do |line|
|
73
|
+
fileset = fs
|
74
|
+
index = -1
|
75
|
+
line.each do |data|
|
76
|
+
index = index + 1
|
77
|
+
next if index==0
|
78
|
+
if @csv.headers[index] == "visibility"
|
79
|
+
fileset.visibility = data
|
80
|
+
elsif @csv.headers[index] == "depositor"
|
81
|
+
fileset.depositor = data
|
82
|
+
else
|
83
|
+
data_arr = data.split @mvs
|
84
|
+
fileset[@csv.headers[index]] = data_arr
|
85
|
+
end
|
86
|
+
end
|
87
|
+
fileset.save
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def create_works
|
92
|
+
index = 1
|
93
|
+
@works.each do |work_data|
|
94
|
+
work = Object.const_get(work_data.first.last).new#delete("object_type")).new
|
95
|
+
status_after, embargo_date, lease_date = nil, nil, nil
|
96
|
+
final_work_data = create_data work_data, "Hyrax::GenericWorkForm", work
|
97
|
+
work.apply_depositor_metadata(@current_user)
|
98
|
+
work.attributes = final_work_data
|
99
|
+
work.save
|
100
|
+
create_files(work, index)
|
101
|
+
index+=1
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def create_data data, type, object
|
106
|
+
final_data = {}
|
107
|
+
accepted_terms = Object.const_get(type).required_fields + Object.const_get(type).secondary_terms
|
108
|
+
data.each do |key, att|
|
109
|
+
if(att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym) )
|
110
|
+
next
|
111
|
+
elsif(object.send(key).nil?)
|
112
|
+
final_data[key] = att
|
113
|
+
else
|
114
|
+
final_data[key] = att.split @mvs
|
115
|
+
end
|
116
|
+
end
|
117
|
+
final_data
|
118
|
+
end
|
119
|
+
|
120
|
+
def create_lease visibility, status_after, date
|
121
|
+
lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
|
122
|
+
visibility_after_lease: @status_after, lease_expiration_date: @lease_date)
|
123
|
+
lease.save
|
124
|
+
end
|
125
|
+
|
126
|
+
def create_embargo visibility
|
127
|
+
embargo = Hydra::AccessControls::Embargo.new
|
128
|
+
embargo.visibility_during_embargo = visibility
|
129
|
+
embargo.visibility_after_embargo = @status_after
|
130
|
+
embargo.embargo_release_date = @embargo_date
|
131
|
+
embargo.save
|
132
|
+
end
|
133
|
+
|
134
|
+
def create_files(work, index)
|
135
|
+
file = FileSet.new
|
136
|
+
@files[index].each do |file_data|
|
137
|
+
url = file_data.delete('url')
|
138
|
+
title = file_data.delete('title')
|
139
|
+
final_file_data = create_data file_data, "Hyrax::FileSetForm", file
|
140
|
+
create_file_from_url(url, title, work, final_file_data)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# def log(user)
|
145
|
+
# CurationConcerns::Operation.create!(user: user,
|
146
|
+
# operation_type: "Attach Remote File")
|
147
|
+
# end
|
148
|
+
|
149
|
+
|
150
|
+
|
151
|
+
def log(user)
|
152
|
+
Hyrax::Operation.create!(user: user,
|
153
|
+
operation_type: "Attach Remote File")
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
class CsvUploadJob < ActiveJob::Base
|
2
|
+
queue_as Hyrax.config.ingest_queue_name
|
3
|
+
|
4
|
+
|
5
|
+
def perform(csv, mvs, current_user)
|
6
|
+
@csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
|
7
|
+
@mvs = mvs
|
8
|
+
@current_user = current_user
|
9
|
+
@works = []
|
10
|
+
@files = {}
|
11
|
+
@csv.each do |row|
|
12
|
+
type = row.first.last
|
13
|
+
if type.nil?
|
14
|
+
next
|
15
|
+
elsif(type.include? "Work")
|
16
|
+
@works << row
|
17
|
+
@files[@works.length] = []
|
18
|
+
elsif(type.include? "File")
|
19
|
+
row.delete("object_type")
|
20
|
+
@files[@works.length] << row
|
21
|
+
end
|
22
|
+
end
|
23
|
+
create_works
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def create_file_from_url(url, file_name, work, file_data)
|
29
|
+
::FileSet.new(import_url: url, label: file_name) do |fs|
|
30
|
+
fs.save
|
31
|
+
actor = Hyrax::Actors::FileSetActor.new(fs, @current_user)
|
32
|
+
actor.create_metadata#(work, visibility: work.visibility)
|
33
|
+
actor.attach_file_to_work(work)
|
34
|
+
#byebug
|
35
|
+
fs.attributes = file_data
|
36
|
+
fs.save!
|
37
|
+
uri = URI.parse(url.gsub(' ','%20'))
|
38
|
+
if uri.scheme == 'file'
|
39
|
+
IngestLocalFileJob.perform_later(fs, uri.path.gsub('%20',' '), @current_user)
|
40
|
+
else
|
41
|
+
ImportUrlJob.perform_later(fs, log(actor.user))
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
#
|
46
|
+
def load_metadata(fs, file_array)
|
47
|
+
file_array.each do |line|
|
48
|
+
fileset = fs
|
49
|
+
index = -1
|
50
|
+
line.each do |data|
|
51
|
+
index = index + 1
|
52
|
+
next if index==0
|
53
|
+
if @csv.headers[index] == "visibility"
|
54
|
+
fileset.visibility = data
|
55
|
+
elsif @csv.headers[index] == "depositor"
|
56
|
+
fileset.depositor = data
|
57
|
+
else
|
58
|
+
data_arr = data.split @mvs
|
59
|
+
fileset[@csv.headers[index]] = data_arr
|
60
|
+
end
|
61
|
+
end
|
62
|
+
fileset.save
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def create_works
|
67
|
+
index = 1
|
68
|
+
@works.each do |work_data|
|
69
|
+
work = Object.const_get(work_data.first.last).new#delete("object_type")).new
|
70
|
+
status_after, embargo_date, lease_date = nil, nil, nil
|
71
|
+
final_work_data = create_data work_data, "Hyrax::GenericWorkForm", work
|
72
|
+
work.apply_depositor_metadata(@current_user)
|
73
|
+
work.attributes = final_work_data
|
74
|
+
work.save
|
75
|
+
create_files(work, index)
|
76
|
+
index+=1
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def create_data data, type, object
|
81
|
+
final_data = {}
|
82
|
+
accepted_terms = Object.const_get(type).required_fields + Object.const_get(type).secondary_terms
|
83
|
+
data.each do |key, att|
|
84
|
+
if(att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym) )
|
85
|
+
next
|
86
|
+
elsif(object.send(key).nil?)
|
87
|
+
final_data[key] = att
|
88
|
+
else
|
89
|
+
final_data[key] = att.split @mvs
|
90
|
+
end
|
91
|
+
end
|
92
|
+
final_data
|
93
|
+
end
|
94
|
+
|
95
|
+
def create_lease visibility, status_after, date
|
96
|
+
lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
|
97
|
+
visibility_after_lease: @status_after, lease_expiration_date: @lease_date)
|
98
|
+
lease.save
|
99
|
+
end
|
100
|
+
|
101
|
+
def create_embargo visibility
|
102
|
+
embargo = Hydra::AccessControls::Embargo.new
|
103
|
+
embargo.visibility_during_embargo = visibility
|
104
|
+
embargo.visibility_after_embargo = @status_after
|
105
|
+
embargo.embargo_release_date = @embargo_date
|
106
|
+
embargo.save
|
107
|
+
end
|
108
|
+
|
109
|
+
def create_files(work, index)
|
110
|
+
file = FileSet.new
|
111
|
+
@files[index].each do |file_data|
|
112
|
+
url = file_data.delete('url')
|
113
|
+
title = file_data.delete('title')
|
114
|
+
final_file_data = create_data file_data, "Hyrax::FileSetForm", file
|
115
|
+
create_file_from_url(url, title, work, final_file_data)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def log(user)
|
120
|
+
Hyrax::Operation.create!(user: user,
|
121
|
+
operation_type: "Attach Remote File")
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<%= form_tag cdm_mappings_path, method: "get" do %>
|
2
|
+
<%= select_tag 'collection', options_for_select(@collections) %>
|
3
|
+
<br />
|
4
|
+
<%= select_tag 'work', options_for_select(@available_concerns) %>
|
5
|
+
<br />
|
6
|
+
Use Local File Storage: <%= check_box_tag 'file_system', 'true', false %>
|
7
|
+
<br />
|
8
|
+
<%= submit_tag 'choose mappings' %>
|
9
|
+
<% end %>
|
@@ -0,0 +1,33 @@
|
|
1
|
+
<style type="text/css" media="screen">
|
2
|
+
|
3
|
+
table{
|
4
|
+
border-collapse:collapse;
|
5
|
+
border:1px solid black;
|
6
|
+
}
|
7
|
+
|
8
|
+
table td{
|
9
|
+
border:1px solid black;
|
10
|
+
}
|
11
|
+
</style>
|
12
|
+
<%= form_tag cdm_generate_path+".csv" do %>
|
13
|
+
<%= hidden_field_tag "collection", params[:collection] %>
|
14
|
+
<%= hidden_field_tag "file_system", params[:file_system] %>
|
15
|
+
<table>
|
16
|
+
<% @cdm_terms.each_with_index do |t,i| %>
|
17
|
+
<tr>
|
18
|
+
<td>
|
19
|
+
<%= label_tag t.first %>
|
20
|
+
<%= hidden_field_tag "mappings[#{i}][cdm]", t.last %>
|
21
|
+
</td>
|
22
|
+
<td>
|
23
|
+
<h3>Choose one</h3>
|
24
|
+
<%= label_tag 'applies to boths compound objects and children:' %><%= select_tag "mappings[#{i}][hydra]", options_for_select(@terms), include_blank: true %><br>
|
25
|
+
<%= label_tag 'applies only to compound objects: ' %><%= select_tag "mappings[#{i}][hydrac]", options_for_select(@work_only), include_blank: true %>
|
26
|
+
</td>
|
27
|
+
</tr>
|
28
|
+
<% end %>
|
29
|
+
</table>
|
30
|
+
<%= select_tag "mappings_url", options_for_select(@dirs) %>
|
31
|
+
<%= hidden_field_tag "work", params[:work] %>
|
32
|
+
<%= submit_tag 'generate csv' %>
|
33
|
+
<% end %>
|
@@ -0,0 +1,10 @@
|
|
1
|
+
<%= form_for :csv_import, url: "/csv/upload" do |f| %>
|
2
|
+
<%= f.label 'Multi-value Separator:' %>
|
3
|
+
<%= f.text_field 'mvs' %>
|
4
|
+
<br />
|
5
|
+
<%= f.file_field 'csv_file' %>
|
6
|
+
<br />
|
7
|
+
<%= f.submit 'Save' %>
|
8
|
+
<% end %>
|
9
|
+
<br />
|
10
|
+
<%= link_to "template", "/csv/generate.csv" %>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<title>Cdm migrator</title>
|
5
|
+
<%= stylesheet_link_tag "cdm_migrator/application", media: "all" %>
|
6
|
+
<%= javascript_include_tag "cdm_migrator/application" %>
|
7
|
+
<%= csrf_meta_tags %>
|
8
|
+
</head>
|
9
|
+
<body>
|
10
|
+
|
11
|
+
<%= yield %>
|
12
|
+
|
13
|
+
</body>
|
14
|
+
</html>
|
data/config/routes.rb
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
CdmMigrator::Engine.routes.draw do
|
2
|
+
get '/csv/upload', to: 'csv#upload'
|
3
|
+
post '/csv/upload', to: 'csv#create'
|
4
|
+
get '/csv/generate', to: 'csv#generate'
|
5
|
+
|
6
|
+
get 'cdm/collection', to: 'cdm#collection'
|
7
|
+
get 'cdm/mappings/', to: 'cdm#mappings', as: 'cdm_mappings'
|
8
|
+
post 'cdm/generate/', to: 'cdm#generate', as: 'cdm_generate'
|
9
|
+
end
|
data/lib/cdm_migrator.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'hyrax'
|
2
|
+
|
3
|
+
module CdmMigrator
|
4
|
+
class Engine < ::Rails::Engine
|
5
|
+
|
6
|
+
isolate_namespace CdmMigrator
|
7
|
+
class << self
|
8
|
+
|
9
|
+
def config
|
10
|
+
file = File.open(File.join(::Rails.root, "/config/cdm_migrator.yml"))
|
11
|
+
@config ||= YAML.safe_load(file)
|
12
|
+
end
|
13
|
+
# loads a yml file with the configuration options
|
14
|
+
#
|
15
|
+
# @param file [String] path to the yml file
|
16
|
+
#
|
17
|
+
def load_config(file)
|
18
|
+
@config = YAML.load_file(file)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class CdmMigrator::InstallGenerator < Rails::Generators::Base
|
2
|
+
source_root File.expand_path('../templates', __FILE__)
|
3
|
+
|
4
|
+
def inject_routes
|
5
|
+
insert_into_file "config/routes.rb", after: ".draw do" do
|
6
|
+
%(\n mount CdmMigrator::Engine => '/cdm_migrator'\n)
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
def inject_content_dm_yml
|
11
|
+
copy_file "config/cdm_migrator.yml", "config/cdm_migrator.yml"
|
12
|
+
end
|
13
|
+
end
|
metadata
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cdm_migrator
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- sephirothkod
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-08-08 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rails
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '5.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '5.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: hyrax
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.0.0.rc1
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.0.0.rc1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: engine_cart
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.1'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.1'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: therubyracer
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec-rails
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: Pulls ContentDM metadata and object links into a CSV. Then allows you
|
84
|
+
to upload that CSV into Hyrax for automatic ingest. The CSV intermediate step is
|
85
|
+
to allow for data refining or upload to another system.
|
86
|
+
email:
|
87
|
+
- bjustice@uvic.ca
|
88
|
+
executables: []
|
89
|
+
extensions: []
|
90
|
+
extra_rdoc_files: []
|
91
|
+
files:
|
92
|
+
- MIT-LICENSE
|
93
|
+
- README.md
|
94
|
+
- Rakefile
|
95
|
+
- app/assets/config/cdm_migrator_manifest.js
|
96
|
+
- app/assets/javascripts/cdm_migrator/application.js
|
97
|
+
- app/assets/stylesheets/cdm_migrator/application.css
|
98
|
+
- app/controllers/cdm_migrator/application_controller.rb
|
99
|
+
- app/controllers/cdm_migrator/cdm_controller.rb
|
100
|
+
- app/controllers/cdm_migrator/csv_controller.rb
|
101
|
+
- app/helpers/cdm_migrator/application_helper.rb
|
102
|
+
- app/jobs/cdm_migrator/application_job.rb
|
103
|
+
- app/jobs/csv_upload_job.rb
|
104
|
+
- app/mailers/cdm_migrator/application_mailer.rb
|
105
|
+
- app/models/cdm_migrator/application_record.rb
|
106
|
+
- app/views/cdm_migrator/cdm/collection.html.erb
|
107
|
+
- app/views/cdm_migrator/cdm/mappings.html.erb
|
108
|
+
- app/views/cdm_migrator/csv/upload.html.erb
|
109
|
+
- app/views/layouts/cdm_migrator/application.html.erb
|
110
|
+
- config/routes.rb
|
111
|
+
- lib/cdm_migrator.rb
|
112
|
+
- lib/cdm_migrator/engine.rb
|
113
|
+
- lib/cdm_migrator/version.rb
|
114
|
+
- lib/generators/cdm_migrator/install/install_generator.rb
|
115
|
+
- lib/generators/cdm_migrator/install/templates/config/cdm_migrator.yml
|
116
|
+
- lib/tasks/cdm_migrator_tasks.rake
|
117
|
+
homepage: https://github.com/UVicLibrary/cdm_migrator
|
118
|
+
licenses:
|
119
|
+
- MIT
|
120
|
+
metadata: {}
|
121
|
+
post_install_message:
|
122
|
+
rdoc_options: []
|
123
|
+
require_paths:
|
124
|
+
- lib
|
125
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - ">="
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '0'
|
130
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
131
|
+
requirements:
|
132
|
+
- - ">="
|
133
|
+
- !ruby/object:Gem::Version
|
134
|
+
version: '0'
|
135
|
+
requirements: []
|
136
|
+
rubyforge_project:
|
137
|
+
rubygems_version: 2.4.5
|
138
|
+
signing_key:
|
139
|
+
specification_version: 4
|
140
|
+
summary: ContentDM to Hyrax migrator.
|
141
|
+
test_files: []
|