cdm_migrator 3.2.1 → 3.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/MIT-LICENSE +20 -20
- data/README.md +41 -41
- data/Rakefile +38 -38
- data/app/assets/config/cdm_migrator_manifest.js +2 -2
- data/app/assets/javascripts/cdm_migrator/application.js +13 -13
- data/app/assets/stylesheets/cdm_migrator/application.css +15 -15
- data/app/assets/stylesheets/cdm_migrator/csv_checker.css +36 -36
- data/app/controllers/cdm_migrator/application_controller.rb +10 -10
- data/app/controllers/cdm_migrator/cdm_controller.rb +216 -213
- data/app/controllers/cdm_migrator/csv_controller.rb +428 -408
- data/app/helpers/cdm_migrator/application_helper.rb +4 -4
- data/app/jobs/cdm_migrator/application_job.rb +4 -4
- data/app/jobs/cdm_migrator/batch_create_files_job.rb +32 -20
- data/app/jobs/cdm_migrator/batch_create_files_with_ordered_members_job.rb +45 -0
- data/app/jobs/cdm_migrator/batch_create_works_job.rb +20 -14
- data/app/jobs/cdm_migrator/cdm_ingest_files_job.rb +35 -35
- data/app/jobs/cdm_migrator/create_work_job.rb +36 -25
- data/app/jobs/cdm_migrator/restart_upload_from_middle_job.rb +36 -0
- data/app/jobs/cdm_migrator/update_object_job.rb +10 -10
- data/app/mailers/cdm_migrator/application_mailer.rb +6 -6
- data/app/models/cdm_migrator/application_record.rb +5 -5
- data/app/models/cdm_migrator/batch_ingest.rb +33 -33
- data/app/models/cdm_migrator/ingest_work.rb +7 -16
- data/app/views/cdm_migrator/cdm/collection.html.erb +11 -11
- data/app/views/cdm_migrator/cdm/mappings.html.erb +54 -53
- data/app/views/cdm_migrator/csv/_batches_list.html.erb +4 -4
- data/app/views/cdm_migrator/csv/_default_group.html.erb +17 -17
- data/app/views/cdm_migrator/csv/_error_list.html.erb +20 -20
- data/app/views/cdm_migrator/csv/_list_batches.html.erb +21 -21
- data/app/views/cdm_migrator/csv/_tabs.html.erb +8 -8
- data/app/views/cdm_migrator/csv/csv_checker.html.erb +45 -45
- data/app/views/cdm_migrator/csv/edit.html.erb +17 -18
- data/app/views/cdm_migrator/csv/index.html.erb +19 -19
- data/app/views/cdm_migrator/csv/upload.html.erb +18 -18
- data/app/views/layouts/cdm_migrator/application.html.erb +14 -14
- data/config/routes.rb +19 -19
- data/db/migrate/20191211193859_create_batch_ingests.rb +19 -19
- data/db/migrate/20191212192315_create_ingest_works.rb +18 -18
- data/lib/cdm_migrator/engine.rb +29 -29
- data/lib/cdm_migrator/version.rb +3 -3
- data/lib/cdm_migrator.rb +5 -5
- data/lib/generators/cdm_migrator/install/install_generator.rb +79 -79
- data/lib/generators/cdm_migrator/install/templates/config/cdm_migrator.yml +53 -50
- data/lib/generators/cdm_migrator/install/templates/presenters/hyku/menu_presenter.rb +47 -47
- data/lib/generators/cdm_migrator/install/templates/presenters/hyrax/menu_presenter.rb +66 -66
- data/lib/generators/cdm_migrator/install/templates/sidebar/_tasks.html.erb +55 -55
- data/lib/tasks/cdm_migrator_tasks.rake +4 -4
- metadata +7 -5
@@ -1,213 +1,216 @@
|
|
1
|
-
module CdmMigrator
|
2
|
-
class CdmController < ApplicationController
|
3
|
-
helper_method :default_page_title, :admin_host?, :available_translations, :available_works
|
4
|
-
layout 'hyrax/dashboard' if Hyrax
|
5
|
-
require 'csv'
|
6
|
-
|
7
|
-
before_action :load_yaml
|
8
|
-
before_action :set_exclusive_fields, only: [:generate, :mappings]
|
9
|
-
skip_before_action :verify_authenticity_token
|
10
|
-
|
11
|
-
def generate
|
12
|
-
@h_to_c = {}
|
13
|
-
@c_to_h = {}
|
14
|
-
params[:mappings].each do |key, mapping|
|
15
|
-
if !mapping['hydra'].empty?
|
16
|
-
@c_to_h[mapping['cdm']] = mapping['hydra']
|
17
|
-
@h_to_c[mapping['hydra']] ||= []
|
18
|
-
@h_to_c[mapping['hydra']] << mapping['cdm']
|
19
|
-
elsif !mapping['hydrac'].empty?
|
20
|
-
@c_to_h[mapping['cdm']] = mapping['hydrac']
|
21
|
-
@h_to_c[mapping['hydrac']] ||= []
|
22
|
-
@h_to_c[mapping['hydrac']] << mapping['cdm']
|
23
|
-
end
|
24
|
-
end
|
25
|
-
json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery#{params[:collection]}/0/0/filetype/1024/0/0/0/0/0/1/0/json")).body)
|
26
|
-
total_recs = json['pager']['total'].to_i
|
27
|
-
if total_recs > 1024
|
28
|
-
start = 1
|
29
|
-
records = []
|
30
|
-
[0..(total_recs/1024)].each do |index|
|
31
|
-
start = (index*1024) + 1
|
32
|
-
json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery#{params[:collection]}/0/0/filetype/1024/#{start}/0/0/0/0/1/0/json")).body)
|
33
|
-
records << json['records'].map { |rec| [rec['pointer'], rec['filetype']] }
|
34
|
-
end
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
rec_pages
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
@
|
116
|
-
@
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
unless
|
142
|
-
@available_concerns +=
|
143
|
-
end
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
:
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
@terms =
|
169
|
-
@work_only =
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
end
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
end
|
1
|
+
module CdmMigrator
|
2
|
+
class CdmController < ApplicationController
|
3
|
+
helper_method :default_page_title, :admin_host?, :available_translations, :available_works
|
4
|
+
layout 'hyrax/dashboard' if Hyrax
|
5
|
+
require 'csv'
|
6
|
+
|
7
|
+
before_action :load_yaml
|
8
|
+
before_action :set_exclusive_fields, only: [:generate, :mappings]
|
9
|
+
skip_before_action :verify_authenticity_token
|
10
|
+
|
11
|
+
def generate
|
12
|
+
@h_to_c = {}
|
13
|
+
@c_to_h = {}
|
14
|
+
params[:mappings].each do |key, mapping|
|
15
|
+
if !mapping['hydra'].empty?
|
16
|
+
@c_to_h[mapping['cdm']] = mapping['hydra']
|
17
|
+
@h_to_c[mapping['hydra']] ||= []
|
18
|
+
@h_to_c[mapping['hydra']] << mapping['cdm']
|
19
|
+
elsif !mapping['hydrac'].empty?
|
20
|
+
@c_to_h[mapping['cdm']] = mapping['hydrac']
|
21
|
+
@h_to_c[mapping['hydrac']] ||= []
|
22
|
+
@h_to_c[mapping['hydrac']] << mapping['cdm']
|
23
|
+
end
|
24
|
+
end
|
25
|
+
json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery#{params[:collection]}/0/0/filetype/1024/0/0/0/0/0/1/0/json")).body)
|
26
|
+
total_recs = json['pager']['total'].to_i
|
27
|
+
if total_recs > 1024
|
28
|
+
start = 1
|
29
|
+
records = []
|
30
|
+
[*0..(total_recs/1024)].each do |index|
|
31
|
+
start = (index*1024) + 1
|
32
|
+
json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery#{params[:collection]}/0/0/filetype/1024/#{start}/0/0/0/0/1/0/json")).body)
|
33
|
+
records << json['records'].map { |rec| [rec['pointer'], rec['filetype']] }
|
34
|
+
end
|
35
|
+
records = records.flatten(1)
|
36
|
+
else
|
37
|
+
records = json['records'].map { |rec| [rec['pointer'], rec['filetype']] }
|
38
|
+
end
|
39
|
+
headers = ::CSV.generate_line (['object_type','url']+@terms+@work_only)
|
40
|
+
csv_lines = [] << headers
|
41
|
+
records.each do |rec|
|
42
|
+
if rec.last == 'cpd'
|
43
|
+
json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo#{params[:collection]}/#{rec.first}/json")).body)
|
44
|
+
csv_lines << create_line(params[:work],'',json)
|
45
|
+
json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCompoundObjectInfo#{params[:collection]}/#{rec.first}/json")).body)
|
46
|
+
rec_pages = json['page'] || json['node']['page']
|
47
|
+
# Fix bug for compound objects with a single page/child: Cdm Migrator expects an array and gets a hash instead, so we wrap the hash in an array
|
48
|
+
rec_pages = Array.wrap(rec_pages) if rec_pages.class == Hash
|
49
|
+
rec_pages.each do |child|
|
50
|
+
child_json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo#{params[:collection]}/#{child['pageptr']}/json")).body)
|
51
|
+
url = api_check rec, child
|
52
|
+
csv_lines << create_line('File',url,child_json)
|
53
|
+
end
|
54
|
+
else
|
55
|
+
json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo#{params[:collection]}/#{rec.first}/json")).body)
|
56
|
+
csv_lines << create_line(params[:work],'',json)
|
57
|
+
url = api_check rec
|
58
|
+
csv_lines << create_line('File',url,{})
|
59
|
+
end
|
60
|
+
end
|
61
|
+
render plain: csv_lines.join, content_type: 'text/csv'
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
def mappings
|
66
|
+
json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCollectionFieldInfo"+params['collection']+'/json')).body)
|
67
|
+
@cdm_terms = json.collect { |c| [c['name'],c['nick']] }
|
68
|
+
get_dirs if @cdm_dirs
|
69
|
+
@yaml = YAML.load_file(params['template'].tempfile) if params.has_key? 'template'
|
70
|
+
end
|
71
|
+
|
72
|
+
def collection
|
73
|
+
json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCollectionList/json")).body)
|
74
|
+
@collections = json.collect { |c| [c['name'],c['alias']] }
|
75
|
+
load_concerns
|
76
|
+
end
|
77
|
+
|
78
|
+
def template
|
79
|
+
hashed = params[:mappings].permit!.to_h
|
80
|
+
template = {}
|
81
|
+
hashed.each do |k,v|
|
82
|
+
template[v['cdm']] = {'hydra' => v['hydra'], 'hydrac' => v['hydrac']}
|
83
|
+
end
|
84
|
+
render plain: template.to_yaml, content_type: 'text/yaml'
|
85
|
+
end
|
86
|
+
|
87
|
+
protected
|
88
|
+
|
89
|
+
def available_works
|
90
|
+
@available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
|
91
|
+
end
|
92
|
+
|
93
|
+
def default_page_title
|
94
|
+
'CDM Mapping'
|
95
|
+
end
|
96
|
+
|
97
|
+
def admin_host?
|
98
|
+
false unless Settings.multitenancy.enabled rescue nil
|
99
|
+
end
|
100
|
+
|
101
|
+
def available_translations
|
102
|
+
{
|
103
|
+
'en' => 'English',
|
104
|
+
'fr' => 'French'
|
105
|
+
}
|
106
|
+
end
|
107
|
+
|
108
|
+
def load_yaml
|
109
|
+
stripped_url = request.base_url.dup.gsub(/https?:\/\//, '').gsub(/:[0-9]*/,'')
|
110
|
+
if CdmMigrator::Engine.config['tenant_settings'].key? stripped_url
|
111
|
+
tenant = CdmMigrator::Engine.config['tenant_settings'][stripped_url]['cdm_api']
|
112
|
+
else
|
113
|
+
tenant = CdmMigrator::Engine.config['tenant_settings']['default']['cdm_api']
|
114
|
+
end
|
115
|
+
@cdm_url = tenant['url']
|
116
|
+
@cdm_port = tenant['port']
|
117
|
+
@cdm_dirs = tenant['dirs'] || false
|
118
|
+
@cdm_api = tenant['type']
|
119
|
+
@default_fields = CdmMigrator::Engine.config['default_fields']
|
120
|
+
end
|
121
|
+
|
122
|
+
def api_check rec, child=nil
|
123
|
+
cisoptr = child ? child['pageptr'] : rec.first
|
124
|
+
filename = child ? child['pagefile'] : "#{rec.first}.#{rec.last}"
|
125
|
+
|
126
|
+
if params[:file_system]=='true'
|
127
|
+
"file://#{file_path(cisoptr)}"
|
128
|
+
elsif @cdm_api == 'server'
|
129
|
+
"#{@cdm_url}:#{@cdm_port}/cgi-bin/showfile.exe?CISOROOT=#{params[:collection]}&CISOPTR=#{cisoptr}"
|
130
|
+
else
|
131
|
+
"#{@cdm_url}/utils/getfile/collection#{params[:collection]}/id/#{cisoptr}/filename/#{filename}"
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def standalone
|
136
|
+
Hyrax rescue nil
|
137
|
+
end
|
138
|
+
|
139
|
+
def load_concerns
|
140
|
+
@available_concerns = []
|
141
|
+
unless @default_fields.nil?
|
142
|
+
@available_concerns += [['DefaultWork', 'DefaultWork']]
|
143
|
+
end
|
144
|
+
unless standalone.nil?
|
145
|
+
@available_concerns += Hyrax.config.curation_concerns.map { |c| [c.to_s, c.to_s]}
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def work_form
|
150
|
+
Module.const_get("Hyrax::#{params[:work]}Form") rescue nil || Module.const_get('Hyrax::Forms::WorkForm')
|
151
|
+
end
|
152
|
+
|
153
|
+
def file_form
|
154
|
+
Module.const_get('Hyrax::FileSetForm') rescue nil || Module.const_get('Hyrax::Forms::FileSetEditForm')
|
155
|
+
end
|
156
|
+
|
157
|
+
def secondary_terms form_name
|
158
|
+
form_name.terms - form_name.required_fields -
|
159
|
+
[:visibility_during_embargo, :embargo_release_date,
|
160
|
+
:visibility_after_embargo, :visibility_during_lease,
|
161
|
+
:lease_expiration_date, :visibility_after_lease, :visibility,
|
162
|
+
:thumbnail_id, :representative_id, :ordered_member_ids,
|
163
|
+
:collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
|
164
|
+
end
|
165
|
+
|
166
|
+
def set_exclusive_fields
|
167
|
+
if params[:work] != 'DefaultWork'
|
168
|
+
@terms = file_form.required_fields + secondary_terms(file_form)
|
169
|
+
@work_only = (secondary_terms work_form) - @terms
|
170
|
+
else
|
171
|
+
@terms = @default_fields
|
172
|
+
@work_only = []
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def create_line type, url, json
|
177
|
+
line = [] << type
|
178
|
+
line << url
|
179
|
+
(@terms+@work_only).each do |term|
|
180
|
+
content = []
|
181
|
+
unless @h_to_c[term.to_s].nil?
|
182
|
+
@h_to_c[term.to_s].each do |cdm_term|
|
183
|
+
content << json[cdm_term] unless json[cdm_term].nil?
|
184
|
+
end
|
185
|
+
content.delete_if(&:empty?)
|
186
|
+
end
|
187
|
+
if content.nil? || content.empty? || content == [{}]
|
188
|
+
line << ''
|
189
|
+
else
|
190
|
+
line << content.join('|')
|
191
|
+
end
|
192
|
+
end
|
193
|
+
::CSV.generate_line line
|
194
|
+
end
|
195
|
+
|
196
|
+
def file_path pointer
|
197
|
+
file_types = ['tif','jpg','mp4','mp3']
|
198
|
+
files = []
|
199
|
+
file_types.each do |type|
|
200
|
+
files << Dir.glob("#{params['mappings_url']}/**/#{pointer}_*#{type}")
|
201
|
+
end
|
202
|
+
files.each do |file|
|
203
|
+
return file.first if file.count > 0
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def get_dirs
|
208
|
+
@dirs = []
|
209
|
+
@cdm_dirs.each do |name, dir|
|
210
|
+
ent = Dir.entries(dir).select {|entry| File.directory? File.join(dir,entry) and !(entry =='.' || entry == '..') }
|
211
|
+
ent = ent.map { |url| ["#{name}/#{url}", "#{dir}/#{url}"] }
|
212
|
+
@dirs += ent
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|