cdm_migrator 3.2.1 → 3.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/MIT-LICENSE +20 -20
  3. data/README.md +41 -41
  4. data/Rakefile +38 -38
  5. data/app/assets/config/cdm_migrator_manifest.js +2 -2
  6. data/app/assets/javascripts/cdm_migrator/application.js +13 -13
  7. data/app/assets/stylesheets/cdm_migrator/application.css +15 -15
  8. data/app/assets/stylesheets/cdm_migrator/csv_checker.css +36 -36
  9. data/app/controllers/cdm_migrator/application_controller.rb +10 -10
  10. data/app/controllers/cdm_migrator/cdm_controller.rb +216 -213
  11. data/app/controllers/cdm_migrator/csv_controller.rb +428 -408
  12. data/app/helpers/cdm_migrator/application_helper.rb +4 -4
  13. data/app/jobs/cdm_migrator/application_job.rb +4 -4
  14. data/app/jobs/cdm_migrator/batch_create_files_job.rb +32 -20
  15. data/app/jobs/cdm_migrator/batch_create_files_with_ordered_members_job.rb +45 -0
  16. data/app/jobs/cdm_migrator/batch_create_works_job.rb +20 -14
  17. data/app/jobs/cdm_migrator/cdm_ingest_files_job.rb +35 -35
  18. data/app/jobs/cdm_migrator/create_work_job.rb +36 -25
  19. data/app/jobs/cdm_migrator/restart_upload_from_middle_job.rb +36 -0
  20. data/app/jobs/cdm_migrator/update_object_job.rb +10 -10
  21. data/app/mailers/cdm_migrator/application_mailer.rb +6 -6
  22. data/app/models/cdm_migrator/application_record.rb +5 -5
  23. data/app/models/cdm_migrator/batch_ingest.rb +33 -33
  24. data/app/models/cdm_migrator/ingest_work.rb +7 -16
  25. data/app/views/cdm_migrator/cdm/collection.html.erb +11 -11
  26. data/app/views/cdm_migrator/cdm/mappings.html.erb +54 -53
  27. data/app/views/cdm_migrator/csv/_batches_list.html.erb +4 -4
  28. data/app/views/cdm_migrator/csv/_default_group.html.erb +17 -17
  29. data/app/views/cdm_migrator/csv/_error_list.html.erb +20 -20
  30. data/app/views/cdm_migrator/csv/_list_batches.html.erb +21 -21
  31. data/app/views/cdm_migrator/csv/_tabs.html.erb +8 -8
  32. data/app/views/cdm_migrator/csv/csv_checker.html.erb +45 -45
  33. data/app/views/cdm_migrator/csv/edit.html.erb +17 -18
  34. data/app/views/cdm_migrator/csv/index.html.erb +19 -19
  35. data/app/views/cdm_migrator/csv/upload.html.erb +18 -18
  36. data/app/views/layouts/cdm_migrator/application.html.erb +14 -14
  37. data/config/routes.rb +19 -19
  38. data/db/migrate/20191211193859_create_batch_ingests.rb +21 -19
  39. data/db/migrate/20191212192315_create_ingest_works.rb +18 -18
  40. data/lib/cdm_migrator/engine.rb +29 -29
  41. data/lib/cdm_migrator/version.rb +3 -3
  42. data/lib/cdm_migrator.rb +5 -5
  43. data/lib/generators/cdm_migrator/install/install_generator.rb +79 -79
  44. data/lib/generators/cdm_migrator/install/templates/config/cdm_migrator.yml +53 -50
  45. data/lib/generators/cdm_migrator/install/templates/presenters/hyku/menu_presenter.rb +47 -47
  46. data/lib/generators/cdm_migrator/install/templates/presenters/hyrax/menu_presenter.rb +66 -66
  47. data/lib/generators/cdm_migrator/install/templates/sidebar/_tasks.html.erb +55 -55
  48. data/lib/tasks/cdm_migrator_tasks.rake +4 -4
  49. metadata +7 -5
@@ -1,213 +1,216 @@
1
- module CdmMigrator
2
- class CdmController < ApplicationController
3
- helper_method :default_page_title, :admin_host?, :available_translations, :available_works
4
- layout 'hyrax/dashboard' if Hyrax
5
- require 'csv'
6
-
7
- before_action :load_yaml
8
- before_action :set_exclusive_fields, only: [:generate, :mappings]
9
- skip_before_action :verify_authenticity_token
10
-
11
- def generate
12
- @h_to_c = {}
13
- @c_to_h = {}
14
- params[:mappings].each do |key, mapping|
15
- if !mapping['hydra'].empty?
16
- @c_to_h[mapping['cdm']] = mapping['hydra']
17
- @h_to_c[mapping['hydra']] ||= []
18
- @h_to_c[mapping['hydra']] << mapping['cdm']
19
- elsif !mapping['hydrac'].empty?
20
- @c_to_h[mapping['cdm']] = mapping['hydrac']
21
- @h_to_c[mapping['hydrac']] ||= []
22
- @h_to_c[mapping['hydrac']] << mapping['cdm']
23
- end
24
- end
25
- json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery#{params[:collection]}/0/0/filetype/1024/0/0/0/0/0/1/0/json")).body)
26
- total_recs = json['pager']['total'].to_i
27
- if total_recs > 1024
28
- start = 1
29
- records = []
30
- [0..(total_recs/1024)].each do |index|
31
- start = (index*1024) + 1
32
- json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery#{params[:collection]}/0/0/filetype/1024/#{start}/0/0/0/0/1/0/json")).body)
33
- records << json['records'].map { |rec| [rec['pointer'], rec['filetype']] }
34
- end
35
- else
36
- records = json['records'].map { |rec| [rec['pointer'], rec['filetype']] }
37
- end
38
- headers = ::CSV.generate_line (['object_type','url']+@terms+@work_only)
39
- csv_lines = [] << headers
40
- records.each do |rec|
41
- if rec.last == 'cpd'
42
- json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo#{params[:collection]}/#{rec.first}/json")).body)
43
- csv_lines << create_line(params[:work],'',json)
44
- json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCompoundObjectInfo#{params[:collection]}/#{rec.first}/json")).body)
45
- rec_pages = json['page'] || json['node']['page']
46
- rec_pages.each do |child|
47
- child_json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo#{params[:collection]}/#{child['pageptr']}/json")).body)
48
- url = api_check rec, child
49
- csv_lines << create_line('File',url,child_json)
50
- end
51
- else
52
- json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo#{params[:collection]}/#{rec.first}/json")).body)
53
- csv_lines << create_line(params[:work],'',json)
54
- url = api_check rec
55
- csv_lines << create_line('File',url,{})
56
- end
57
- end
58
- render plain: csv_lines.join, content_type: 'text/csv'
59
-
60
- end
61
-
62
- def mappings
63
- json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCollectionFieldInfo"+params['collection']+'/json')).body)
64
- @cdm_terms = json.collect { |c| [c['name'],c['nick']] }
65
- get_dirs if @cdm_dirs
66
- @yaml = YAML.load_file(params['template'].tempfile) if params.has_key? 'template'
67
- end
68
-
69
- def collection
70
- json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCollectionList/json")).body)
71
- @collections = json.collect { |c| [c['name'],c['alias']] }
72
- load_concerns
73
- end
74
-
75
- def template
76
- hashed = params[:mappings].permit!.to_h
77
- template = {}
78
- hashed.each do |k,v|
79
- template[v['cdm']] = {'hydra' => v['hydra'], 'hydrac' => v['hydrac']}
80
- end
81
- render plain: template.to_yaml, content_type: 'text/yaml'
82
- end
83
-
84
- protected
85
-
86
- def available_works
87
- @available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
88
- end
89
-
90
- def default_page_title
91
- 'CDM Mapping'
92
- end
93
-
94
- def admin_host?
95
- false unless Settings.multitenancy.enabled rescue nil
96
- end
97
-
98
- def available_translations
99
- {
100
- 'en' => 'English',
101
- 'fr' => 'French'
102
- }
103
- end
104
-
105
- def load_yaml
106
- stripped_url = request.base_url.dup.gsub(/https?:\/\//, '').gsub(/:[0-9]*/,'')
107
- if CdmMigrator::Engine.config['tenant_settings'].key? stripped_url
108
- tenant = CdmMigrator::Engine.config['tenant_settings'][stripped_url]['cdm_api']
109
- else
110
- tenant = CdmMigrator::Engine.config['tenant_settings']['default']['cdm_api']
111
- end
112
- @cdm_url = tenant['url']
113
- @cdm_port = tenant['port']
114
- @cdm_dirs = tenant['dirs'] || false
115
- @cdm_api = tenant['type']
116
- @default_fields = CdmMigrator::Engine.config['default_fields']
117
- end
118
-
119
- def api_check rec, child=nil
120
- cisoptr = child ? child['pageptr'] : rec.first
121
- filename = child ? child['pagefile'] : "#{rec.first}.#{rec.last}"
122
-
123
- if params[:file_system]=='true'
124
- "file://#{file_path(cisoptr)}"
125
- elsif @cdm_api == 'server'
126
- "#{@cdm_url}:#{@cdm_port}/cgi-bin/showfile.exe?CISOROOT=#{params[:collection]}&CISOPTR=#{cisoptr}"
127
- else
128
- "#{@cdm_url}/utils/getfile/collection#{params[:collection]}/id/#{cisoptr}/filename/#{filename}"
129
- end
130
- end
131
-
132
- def standalone
133
- Hyrax rescue nil
134
- end
135
-
136
- def load_concerns
137
- @available_concerns = []
138
- unless @default_fields.nil?
139
- @available_concerns += [['DefaultWork', 'DefaultWork']]
140
- end
141
- unless standalone.nil?
142
- @available_concerns += Hyrax.config.curation_concerns.map { |c| [c.to_s, c.to_s]}
143
- end
144
- end
145
-
146
- def work_form
147
- Module.const_get("Hyrax::#{params[:work]}Form") rescue nil || Module.const_get('Hyrax::Forms::WorkForm')
148
- end
149
-
150
- def file_form
151
- Module.const_get('Hyrax::FileSetForm') rescue nil || Module.const_get('Hyrax::Forms::FileSetEditForm')
152
- end
153
-
154
- def secondary_terms form_name
155
- form_name.terms - form_name.required_fields -
156
- [:visibility_during_embargo, :embargo_release_date,
157
- :visibility_after_embargo, :visibility_during_lease,
158
- :lease_expiration_date, :visibility_after_lease, :visibility,
159
- :thumbnail_id, :representative_id, :ordered_member_ids,
160
- :collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
161
- end
162
-
163
- def set_exclusive_fields
164
- if params[:work] != 'DefaultWork'
165
- @terms = file_form.required_fields + secondary_terms(file_form)
166
- @work_only = (secondary_terms work_form) - @terms
167
- else
168
- @terms = @default_fields
169
- @work_only = []
170
- end
171
- end
172
-
173
- def create_line type, url, json
174
- line = [] << type
175
- line << url
176
- (@terms+@work_only).each do |term|
177
- content = []
178
- unless @h_to_c[term.to_s].nil?
179
- @h_to_c[term.to_s].each do |cdm_term|
180
- content << json[cdm_term] unless json[cdm_term].nil?
181
- end
182
- content.delete_if(&:empty?)
183
- end
184
- if content.nil? || content.empty? || content == [{}]
185
- line << ''
186
- else
187
- line << content.join('|')
188
- end
189
- end
190
- ::CSV.generate_line line
191
- end
192
-
193
- def file_path pointer
194
- file_types = ['tif','jpg','mp4','mp3']
195
- files = []
196
- file_types.each do |type|
197
- files << Dir.glob("#{params['mappings_url']}/**/#{pointer}_*#{type}")
198
- end
199
- files.each do |file|
200
- return file.first if file.count > 0
201
- end
202
- end
203
-
204
- def get_dirs
205
- @dirs = []
206
- @cdm_dirs.each do |name, dir|
207
- ent = Dir.entries(dir).select {|entry| File.directory? File.join(dir,entry) and !(entry =='.' || entry == '..') }
208
- ent = ent.map { |url| ["#{name}/#{url}", "#{dir}/#{url}"] }
209
- @dirs += ent
210
- end
211
- end
212
- end
213
- end
1
+ module CdmMigrator
2
+ class CdmController < ApplicationController
3
+ helper_method :default_page_title, :admin_host?, :available_translations, :available_works
4
+ layout 'hyrax/dashboard' if Hyrax
5
+ require 'csv'
6
+
7
+ before_action :load_yaml
8
+ before_action :set_exclusive_fields, only: [:generate, :mappings]
9
+ skip_before_action :verify_authenticity_token
10
+
11
+ def generate
12
+ @h_to_c = {}
13
+ @c_to_h = {}
14
+ params[:mappings].each do |key, mapping|
15
+ if !mapping['hydra'].empty?
16
+ @c_to_h[mapping['cdm']] = mapping['hydra']
17
+ @h_to_c[mapping['hydra']] ||= []
18
+ @h_to_c[mapping['hydra']] << mapping['cdm']
19
+ elsif !mapping['hydrac'].empty?
20
+ @c_to_h[mapping['cdm']] = mapping['hydrac']
21
+ @h_to_c[mapping['hydrac']] ||= []
22
+ @h_to_c[mapping['hydrac']] << mapping['cdm']
23
+ end
24
+ end
25
+ json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery#{params[:collection]}/0/0/filetype/1024/0/0/0/0/0/1/0/json")).body)
26
+ total_recs = json['pager']['total'].to_i
27
+ if total_recs > 1024
28
+ start = 1
29
+ records = []
30
+ [*0..(total_recs/1024)].each do |index|
31
+ start = (index*1024) + 1
32
+ json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery#{params[:collection]}/0/0/filetype/1024/#{start}/0/0/0/0/1/0/json")).body)
33
+ records << json['records'].map { |rec| [rec['pointer'], rec['filetype']] }
34
+ end
35
+ records = records.flatten(1)
36
+ else
37
+ records = json['records'].map { |rec| [rec['pointer'], rec['filetype']] }
38
+ end
39
+ headers = ::CSV.generate_line (['object_type','url']+@terms+@work_only)
40
+ csv_lines = [] << headers
41
+ records.each do |rec|
42
+ if rec.last == 'cpd'
43
+ json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo#{params[:collection]}/#{rec.first}/json")).body)
44
+ csv_lines << create_line(params[:work],'',json)
45
+ json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCompoundObjectInfo#{params[:collection]}/#{rec.first}/json")).body)
46
+ rec_pages = json['page'] || json['node']['page']
47
+ # Fix bug for compound objects with a single page/child: Cdm Migrator expects an array and gets a hash instead, so we wrap the hash in an array
48
+ rec_pages = Array.wrap(rec_pages) if rec_pages.class == Hash
49
+ rec_pages.each do |child|
50
+ child_json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo#{params[:collection]}/#{child['pageptr']}/json")).body)
51
+ url = api_check rec, child
52
+ csv_lines << create_line('File',url,child_json)
53
+ end
54
+ else
55
+ json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo#{params[:collection]}/#{rec.first}/json")).body)
56
+ csv_lines << create_line(params[:work],'',json)
57
+ url = api_check rec
58
+ csv_lines << create_line('File',url,{})
59
+ end
60
+ end
61
+ render plain: csv_lines.join, content_type: 'text/csv'
62
+
63
+ end
64
+
65
+ def mappings
66
+ json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCollectionFieldInfo"+params['collection']+'/json')).body)
67
+ @cdm_terms = json.collect { |c| [c['name'],c['nick']] }
68
+ get_dirs if @cdm_dirs
69
+ @yaml = YAML.load_file(params['template'].tempfile) if params.has_key? 'template'
70
+ end
71
+
72
+ def collection
73
+ json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCollectionList/json")).body)
74
+ @collections = json.collect { |c| [c['name'],c['alias']] }
75
+ load_concerns
76
+ end
77
+
78
+ def template
79
+ hashed = params[:mappings].permit!.to_h
80
+ template = {}
81
+ hashed.each do |k,v|
82
+ template[v['cdm']] = {'hydra' => v['hydra'], 'hydrac' => v['hydrac']}
83
+ end
84
+ render plain: template.to_yaml, content_type: 'text/yaml'
85
+ end
86
+
87
+ protected
88
+
89
+ def available_works
90
+ @available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
91
+ end
92
+
93
+ def default_page_title
94
+ 'CDM Mapping'
95
+ end
96
+
97
+ def admin_host?
98
+ false unless Settings.multitenancy.enabled rescue nil
99
+ end
100
+
101
+ def available_translations
102
+ {
103
+ 'en' => 'English',
104
+ 'fr' => 'French'
105
+ }
106
+ end
107
+
108
+ def load_yaml
109
+ stripped_url = request.base_url.dup.gsub(/https?:\/\//, '').gsub(/:[0-9]*/,'')
110
+ if CdmMigrator::Engine.config['tenant_settings'].key? stripped_url
111
+ tenant = CdmMigrator::Engine.config['tenant_settings'][stripped_url]['cdm_api']
112
+ else
113
+ tenant = CdmMigrator::Engine.config['tenant_settings']['default']['cdm_api']
114
+ end
115
+ @cdm_url = tenant['url']
116
+ @cdm_port = tenant['port']
117
+ @cdm_dirs = tenant['dirs'] || false
118
+ @cdm_api = tenant['type']
119
+ @default_fields = CdmMigrator::Engine.config['default_fields']
120
+ end
121
+
122
+ def api_check rec, child=nil
123
+ cisoptr = child ? child['pageptr'] : rec.first
124
+ filename = child ? child['pagefile'] : "#{rec.first}.#{rec.last}"
125
+
126
+ if params[:file_system]=='true'
127
+ "file://#{file_path(cisoptr)}"
128
+ elsif @cdm_api == 'server'
129
+ "#{@cdm_url}:#{@cdm_port}/cgi-bin/showfile.exe?CISOROOT=#{params[:collection]}&CISOPTR=#{cisoptr}"
130
+ else
131
+ "#{@cdm_url}/utils/getfile/collection#{params[:collection]}/id/#{cisoptr}/filename/#{filename}"
132
+ end
133
+ end
134
+
135
+ def standalone
136
+ Hyrax rescue nil
137
+ end
138
+
139
+ def load_concerns
140
+ @available_concerns = []
141
+ unless @default_fields.nil?
142
+ @available_concerns += [['DefaultWork', 'DefaultWork']]
143
+ end
144
+ unless standalone.nil?
145
+ @available_concerns += Hyrax.config.curation_concerns.map { |c| [c.to_s, c.to_s]}
146
+ end
147
+ end
148
+
149
+ def work_form
150
+ Module.const_get("Hyrax::#{params[:work]}Form") rescue nil || Module.const_get('Hyrax::Forms::WorkForm')
151
+ end
152
+
153
+ def file_form
154
+ Module.const_get('Hyrax::FileSetForm') rescue nil || Module.const_get('Hyrax::Forms::FileSetEditForm')
155
+ end
156
+
157
+ def secondary_terms form_name
158
+ form_name.terms - form_name.required_fields -
159
+ [:visibility_during_embargo, :embargo_release_date,
160
+ :visibility_after_embargo, :visibility_during_lease,
161
+ :lease_expiration_date, :visibility_after_lease, :visibility,
162
+ :thumbnail_id, :representative_id, :ordered_member_ids,
163
+ :collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
164
+ end
165
+
166
+ def set_exclusive_fields
167
+ if params[:work] != 'DefaultWork'
168
+ @terms = file_form.required_fields + secondary_terms(file_form)
169
+ @work_only = (secondary_terms work_form) - @terms
170
+ else
171
+ @terms = @default_fields
172
+ @work_only = []
173
+ end
174
+ end
175
+
176
+ def create_line type, url, json
177
+ line = [] << type
178
+ line << url
179
+ (@terms+@work_only).each do |term|
180
+ content = []
181
+ unless @h_to_c[term.to_s].nil?
182
+ @h_to_c[term.to_s].each do |cdm_term|
183
+ content << json[cdm_term] unless json[cdm_term].nil?
184
+ end
185
+ content.delete_if(&:empty?)
186
+ end
187
+ if content.nil? || content.empty? || content == [{}]
188
+ line << ''
189
+ else
190
+ line << content.join('|')
191
+ end
192
+ end
193
+ ::CSV.generate_line line
194
+ end
195
+
196
+ def file_path pointer
197
+ file_types = ['tif','jpg','mp4','mp3']
198
+ files = []
199
+ file_types.each do |type|
200
+ files << Dir.glob("#{params['mappings_url']}/**/#{pointer}_*#{type}")
201
+ end
202
+ files.each do |file|
203
+ return file.first if file.count > 0
204
+ end
205
+ end
206
+
207
+ def get_dirs
208
+ @dirs = []
209
+ @cdm_dirs.each do |name, dir|
210
+ ent = Dir.entries(dir).select {|entry| File.directory? File.join(dir,entry) and !(entry =='.' || entry == '..') }
211
+ ent = ent.map { |url| ["#{name}/#{url}", "#{dir}/#{url}"] }
212
+ @dirs += ent
213
+ end
214
+ end
215
+ end
216
+ end