cdm_migrator 3.2.1 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/MIT-LICENSE +20 -20
  3. data/README.md +41 -41
  4. data/Rakefile +38 -38
  5. data/app/assets/config/cdm_migrator_manifest.js +2 -2
  6. data/app/assets/javascripts/cdm_migrator/application.js +13 -13
  7. data/app/assets/stylesheets/cdm_migrator/application.css +15 -15
  8. data/app/assets/stylesheets/cdm_migrator/csv_checker.css +36 -36
  9. data/app/controllers/cdm_migrator/application_controller.rb +10 -10
  10. data/app/controllers/cdm_migrator/cdm_controller.rb +216 -213
  11. data/app/controllers/cdm_migrator/csv_controller.rb +428 -408
  12. data/app/helpers/cdm_migrator/application_helper.rb +4 -4
  13. data/app/jobs/cdm_migrator/application_job.rb +4 -4
  14. data/app/jobs/cdm_migrator/batch_create_files_job.rb +32 -20
  15. data/app/jobs/cdm_migrator/batch_create_files_with_ordered_members_job.rb +45 -0
  16. data/app/jobs/cdm_migrator/batch_create_works_job.rb +20 -14
  17. data/app/jobs/cdm_migrator/cdm_ingest_files_job.rb +35 -35
  18. data/app/jobs/cdm_migrator/create_work_job.rb +36 -25
  19. data/app/jobs/cdm_migrator/restart_upload_from_middle_job.rb +36 -0
  20. data/app/jobs/cdm_migrator/update_object_job.rb +10 -10
  21. data/app/mailers/cdm_migrator/application_mailer.rb +6 -6
  22. data/app/models/cdm_migrator/application_record.rb +5 -5
  23. data/app/models/cdm_migrator/batch_ingest.rb +33 -33
  24. data/app/models/cdm_migrator/ingest_work.rb +7 -16
  25. data/app/views/cdm_migrator/cdm/collection.html.erb +11 -11
  26. data/app/views/cdm_migrator/cdm/mappings.html.erb +54 -53
  27. data/app/views/cdm_migrator/csv/_batches_list.html.erb +4 -4
  28. data/app/views/cdm_migrator/csv/_default_group.html.erb +17 -17
  29. data/app/views/cdm_migrator/csv/_error_list.html.erb +20 -20
  30. data/app/views/cdm_migrator/csv/_list_batches.html.erb +21 -21
  31. data/app/views/cdm_migrator/csv/_tabs.html.erb +8 -8
  32. data/app/views/cdm_migrator/csv/csv_checker.html.erb +45 -45
  33. data/app/views/cdm_migrator/csv/edit.html.erb +17 -18
  34. data/app/views/cdm_migrator/csv/index.html.erb +19 -19
  35. data/app/views/cdm_migrator/csv/upload.html.erb +18 -18
  36. data/app/views/layouts/cdm_migrator/application.html.erb +14 -14
  37. data/config/routes.rb +19 -19
  38. data/db/migrate/20191211193859_create_batch_ingests.rb +19 -19
  39. data/db/migrate/20191212192315_create_ingest_works.rb +18 -18
  40. data/lib/cdm_migrator/engine.rb +29 -29
  41. data/lib/cdm_migrator/version.rb +3 -3
  42. data/lib/cdm_migrator.rb +5 -5
  43. data/lib/generators/cdm_migrator/install/install_generator.rb +79 -79
  44. data/lib/generators/cdm_migrator/install/templates/config/cdm_migrator.yml +53 -50
  45. data/lib/generators/cdm_migrator/install/templates/presenters/hyku/menu_presenter.rb +47 -47
  46. data/lib/generators/cdm_migrator/install/templates/presenters/hyrax/menu_presenter.rb +66 -66
  47. data/lib/generators/cdm_migrator/install/templates/sidebar/_tasks.html.erb +55 -55
  48. data/lib/tasks/cdm_migrator_tasks.rake +4 -4
  49. metadata +7 -5
@@ -1,213 +1,216 @@
1
- module CdmMigrator
2
- class CdmController < ApplicationController
3
- helper_method :default_page_title, :admin_host?, :available_translations, :available_works
4
- layout 'hyrax/dashboard' if Hyrax
5
- require 'csv'
6
-
7
- before_action :load_yaml
8
- before_action :set_exclusive_fields, only: [:generate, :mappings]
9
- skip_before_action :verify_authenticity_token
10
-
11
- def generate
12
- @h_to_c = {}
13
- @c_to_h = {}
14
- params[:mappings].each do |key, mapping|
15
- if !mapping['hydra'].empty?
16
- @c_to_h[mapping['cdm']] = mapping['hydra']
17
- @h_to_c[mapping['hydra']] ||= []
18
- @h_to_c[mapping['hydra']] << mapping['cdm']
19
- elsif !mapping['hydrac'].empty?
20
- @c_to_h[mapping['cdm']] = mapping['hydrac']
21
- @h_to_c[mapping['hydrac']] ||= []
22
- @h_to_c[mapping['hydrac']] << mapping['cdm']
23
- end
24
- end
25
- json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery#{params[:collection]}/0/0/filetype/1024/0/0/0/0/0/1/0/json")).body)
26
- total_recs = json['pager']['total'].to_i
27
- if total_recs > 1024
28
- start = 1
29
- records = []
30
- [0..(total_recs/1024)].each do |index|
31
- start = (index*1024) + 1
32
- json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery#{params[:collection]}/0/0/filetype/1024/#{start}/0/0/0/0/1/0/json")).body)
33
- records << json['records'].map { |rec| [rec['pointer'], rec['filetype']] }
34
- end
35
- else
36
- records = json['records'].map { |rec| [rec['pointer'], rec['filetype']] }
37
- end
38
- headers = ::CSV.generate_line (['object_type','url']+@terms+@work_only)
39
- csv_lines = [] << headers
40
- records.each do |rec|
41
- if rec.last == 'cpd'
42
- json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo#{params[:collection]}/#{rec.first}/json")).body)
43
- csv_lines << create_line(params[:work],'',json)
44
- json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCompoundObjectInfo#{params[:collection]}/#{rec.first}/json")).body)
45
- rec_pages = json['page'] || json['node']['page']
46
- rec_pages.each do |child|
47
- child_json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo#{params[:collection]}/#{child['pageptr']}/json")).body)
48
- url = api_check rec, child
49
- csv_lines << create_line('File',url,child_json)
50
- end
51
- else
52
- json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo#{params[:collection]}/#{rec.first}/json")).body)
53
- csv_lines << create_line(params[:work],'',json)
54
- url = api_check rec
55
- csv_lines << create_line('File',url,{})
56
- end
57
- end
58
- render plain: csv_lines.join, content_type: 'text/csv'
59
-
60
- end
61
-
62
- def mappings
63
- json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCollectionFieldInfo"+params['collection']+'/json')).body)
64
- @cdm_terms = json.collect { |c| [c['name'],c['nick']] }
65
- get_dirs if @cdm_dirs
66
- @yaml = YAML.load_file(params['template'].tempfile) if params.has_key? 'template'
67
- end
68
-
69
- def collection
70
- json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCollectionList/json")).body)
71
- @collections = json.collect { |c| [c['name'],c['alias']] }
72
- load_concerns
73
- end
74
-
75
- def template
76
- hashed = params[:mappings].permit!.to_h
77
- template = {}
78
- hashed.each do |k,v|
79
- template[v['cdm']] = {'hydra' => v['hydra'], 'hydrac' => v['hydrac']}
80
- end
81
- render plain: template.to_yaml, content_type: 'text/yaml'
82
- end
83
-
84
- protected
85
-
86
- def available_works
87
- @available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
88
- end
89
-
90
- def default_page_title
91
- 'CDM Mapping'
92
- end
93
-
94
- def admin_host?
95
- false unless Settings.multitenancy.enabled rescue nil
96
- end
97
-
98
- def available_translations
99
- {
100
- 'en' => 'English',
101
- 'fr' => 'French'
102
- }
103
- end
104
-
105
- def load_yaml
106
- stripped_url = request.base_url.dup.gsub(/https?:\/\//, '').gsub(/:[0-9]*/,'')
107
- if CdmMigrator::Engine.config['tenant_settings'].key? stripped_url
108
- tenant = CdmMigrator::Engine.config['tenant_settings'][stripped_url]['cdm_api']
109
- else
110
- tenant = CdmMigrator::Engine.config['tenant_settings']['default']['cdm_api']
111
- end
112
- @cdm_url = tenant['url']
113
- @cdm_port = tenant['port']
114
- @cdm_dirs = tenant['dirs'] || false
115
- @cdm_api = tenant['type']
116
- @default_fields = CdmMigrator::Engine.config['default_fields']
117
- end
118
-
119
- def api_check rec, child=nil
120
- cisoptr = child ? child['pageptr'] : rec.first
121
- filename = child ? child['pagefile'] : "#{rec.first}.#{rec.last}"
122
-
123
- if params[:file_system]=='true'
124
- "file://#{file_path(cisoptr)}"
125
- elsif @cdm_api == 'server'
126
- "#{@cdm_url}:#{@cdm_port}/cgi-bin/showfile.exe?CISOROOT=#{params[:collection]}&CISOPTR=#{cisoptr}"
127
- else
128
- "#{@cdm_url}/utils/getfile/collection#{params[:collection]}/id/#{cisoptr}/filename/#{filename}"
129
- end
130
- end
131
-
132
- def standalone
133
- Hyrax rescue nil
134
- end
135
-
136
- def load_concerns
137
- @available_concerns = []
138
- unless @default_fields.nil?
139
- @available_concerns += [['DefaultWork', 'DefaultWork']]
140
- end
141
- unless standalone.nil?
142
- @available_concerns += Hyrax.config.curation_concerns.map { |c| [c.to_s, c.to_s]}
143
- end
144
- end
145
-
146
- def work_form
147
- Module.const_get("Hyrax::#{params[:work]}Form") rescue nil || Module.const_get('Hyrax::Forms::WorkForm')
148
- end
149
-
150
- def file_form
151
- Module.const_get('Hyrax::FileSetForm') rescue nil || Module.const_get('Hyrax::Forms::FileSetEditForm')
152
- end
153
-
154
- def secondary_terms form_name
155
- form_name.terms - form_name.required_fields -
156
- [:visibility_during_embargo, :embargo_release_date,
157
- :visibility_after_embargo, :visibility_during_lease,
158
- :lease_expiration_date, :visibility_after_lease, :visibility,
159
- :thumbnail_id, :representative_id, :ordered_member_ids,
160
- :collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
161
- end
162
-
163
- def set_exclusive_fields
164
- if params[:work] != 'DefaultWork'
165
- @terms = file_form.required_fields + secondary_terms(file_form)
166
- @work_only = (secondary_terms work_form) - @terms
167
- else
168
- @terms = @default_fields
169
- @work_only = []
170
- end
171
- end
172
-
173
- def create_line type, url, json
174
- line = [] << type
175
- line << url
176
- (@terms+@work_only).each do |term|
177
- content = []
178
- unless @h_to_c[term.to_s].nil?
179
- @h_to_c[term.to_s].each do |cdm_term|
180
- content << json[cdm_term] unless json[cdm_term].nil?
181
- end
182
- content.delete_if(&:empty?)
183
- end
184
- if content.nil? || content.empty? || content == [{}]
185
- line << ''
186
- else
187
- line << content.join('|')
188
- end
189
- end
190
- ::CSV.generate_line line
191
- end
192
-
193
- def file_path pointer
194
- file_types = ['tif','jpg','mp4','mp3']
195
- files = []
196
- file_types.each do |type|
197
- files << Dir.glob("#{params['mappings_url']}/**/#{pointer}_*#{type}")
198
- end
199
- files.each do |file|
200
- return file.first if file.count > 0
201
- end
202
- end
203
-
204
- def get_dirs
205
- @dirs = []
206
- @cdm_dirs.each do |name, dir|
207
- ent = Dir.entries(dir).select {|entry| File.directory? File.join(dir,entry) and !(entry =='.' || entry == '..') }
208
- ent = ent.map { |url| ["#{name}/#{url}", "#{dir}/#{url}"] }
209
- @dirs += ent
210
- end
211
- end
212
- end
213
- end
1
+ module CdmMigrator
2
+ class CdmController < ApplicationController
3
+ helper_method :default_page_title, :admin_host?, :available_translations, :available_works
4
+ layout 'hyrax/dashboard' if Hyrax
5
+ require 'csv'
6
+
7
+ before_action :load_yaml
8
+ before_action :set_exclusive_fields, only: [:generate, :mappings]
9
+ skip_before_action :verify_authenticity_token
10
+
11
+ def generate
12
+ @h_to_c = {}
13
+ @c_to_h = {}
14
+ params[:mappings].each do |key, mapping|
15
+ if !mapping['hydra'].empty?
16
+ @c_to_h[mapping['cdm']] = mapping['hydra']
17
+ @h_to_c[mapping['hydra']] ||= []
18
+ @h_to_c[mapping['hydra']] << mapping['cdm']
19
+ elsif !mapping['hydrac'].empty?
20
+ @c_to_h[mapping['cdm']] = mapping['hydrac']
21
+ @h_to_c[mapping['hydrac']] ||= []
22
+ @h_to_c[mapping['hydrac']] << mapping['cdm']
23
+ end
24
+ end
25
+ json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery#{params[:collection]}/0/0/filetype/1024/0/0/0/0/0/1/0/json")).body)
26
+ total_recs = json['pager']['total'].to_i
27
+ if total_recs > 1024
28
+ start = 1
29
+ records = []
30
+ [*0..(total_recs/1024)].each do |index|
31
+ start = (index*1024) + 1
32
+ json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery#{params[:collection]}/0/0/filetype/1024/#{start}/0/0/0/0/1/0/json")).body)
33
+ records << json['records'].map { |rec| [rec['pointer'], rec['filetype']] }
34
+ end
35
+ records = records.flatten(1)
36
+ else
37
+ records = json['records'].map { |rec| [rec['pointer'], rec['filetype']] }
38
+ end
39
+ headers = ::CSV.generate_line (['object_type','url']+@terms+@work_only)
40
+ csv_lines = [] << headers
41
+ records.each do |rec|
42
+ if rec.last == 'cpd'
43
+ json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo#{params[:collection]}/#{rec.first}/json")).body)
44
+ csv_lines << create_line(params[:work],'',json)
45
+ json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCompoundObjectInfo#{params[:collection]}/#{rec.first}/json")).body)
46
+ rec_pages = json['page'] || json['node']['page']
47
+ # Fix bug for compound objects with a single page/child: Cdm Migrator expects an array and gets a hash instead, so we wrap the hash in an array
48
+ rec_pages = Array.wrap(rec_pages) if rec_pages.class == Hash
49
+ rec_pages.each do |child|
50
+ child_json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo#{params[:collection]}/#{child['pageptr']}/json")).body)
51
+ url = api_check rec, child
52
+ csv_lines << create_line('File',url,child_json)
53
+ end
54
+ else
55
+ json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetItemInfo#{params[:collection]}/#{rec.first}/json")).body)
56
+ csv_lines << create_line(params[:work],'',json)
57
+ url = api_check rec
58
+ csv_lines << create_line('File',url,{})
59
+ end
60
+ end
61
+ render plain: csv_lines.join, content_type: 'text/csv'
62
+
63
+ end
64
+
65
+ def mappings
66
+ json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCollectionFieldInfo"+params['collection']+'/json')).body)
67
+ @cdm_terms = json.collect { |c| [c['name'],c['nick']] }
68
+ get_dirs if @cdm_dirs
69
+ @yaml = YAML.load_file(params['template'].tempfile) if params.has_key? 'template'
70
+ end
71
+
72
+ def collection
73
+ json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmGetCollectionList/json")).body)
74
+ @collections = json.collect { |c| [c['name'],c['alias']] }
75
+ load_concerns
76
+ end
77
+
78
+ def template
79
+ hashed = params[:mappings].permit!.to_h
80
+ template = {}
81
+ hashed.each do |k,v|
82
+ template[v['cdm']] = {'hydra' => v['hydra'], 'hydrac' => v['hydrac']}
83
+ end
84
+ render plain: template.to_yaml, content_type: 'text/yaml'
85
+ end
86
+
87
+ protected
88
+
89
+ def available_works
90
+ @available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
91
+ end
92
+
93
+ def default_page_title
94
+ 'CDM Mapping'
95
+ end
96
+
97
+ def admin_host?
98
+ false unless Settings.multitenancy.enabled rescue nil
99
+ end
100
+
101
+ def available_translations
102
+ {
103
+ 'en' => 'English',
104
+ 'fr' => 'French'
105
+ }
106
+ end
107
+
108
+ def load_yaml
109
+ stripped_url = request.base_url.dup.gsub(/https?:\/\//, '').gsub(/:[0-9]*/,'')
110
+ if CdmMigrator::Engine.config['tenant_settings'].key? stripped_url
111
+ tenant = CdmMigrator::Engine.config['tenant_settings'][stripped_url]['cdm_api']
112
+ else
113
+ tenant = CdmMigrator::Engine.config['tenant_settings']['default']['cdm_api']
114
+ end
115
+ @cdm_url = tenant['url']
116
+ @cdm_port = tenant['port']
117
+ @cdm_dirs = tenant['dirs'] || false
118
+ @cdm_api = tenant['type']
119
+ @default_fields = CdmMigrator::Engine.config['default_fields']
120
+ end
121
+
122
+ def api_check rec, child=nil
123
+ cisoptr = child ? child['pageptr'] : rec.first
124
+ filename = child ? child['pagefile'] : "#{rec.first}.#{rec.last}"
125
+
126
+ if params[:file_system]=='true'
127
+ "file://#{file_path(cisoptr)}"
128
+ elsif @cdm_api == 'server'
129
+ "#{@cdm_url}:#{@cdm_port}/cgi-bin/showfile.exe?CISOROOT=#{params[:collection]}&CISOPTR=#{cisoptr}"
130
+ else
131
+ "#{@cdm_url}/utils/getfile/collection#{params[:collection]}/id/#{cisoptr}/filename/#{filename}"
132
+ end
133
+ end
134
+
135
+ def standalone
136
+ Hyrax rescue nil
137
+ end
138
+
139
+ def load_concerns
140
+ @available_concerns = []
141
+ unless @default_fields.nil?
142
+ @available_concerns += [['DefaultWork', 'DefaultWork']]
143
+ end
144
+ unless standalone.nil?
145
+ @available_concerns += Hyrax.config.curation_concerns.map { |c| [c.to_s, c.to_s]}
146
+ end
147
+ end
148
+
149
+ def work_form
150
+ Module.const_get("Hyrax::#{params[:work]}Form") rescue nil || Module.const_get('Hyrax::Forms::WorkForm')
151
+ end
152
+
153
+ def file_form
154
+ Module.const_get('Hyrax::FileSetForm') rescue nil || Module.const_get('Hyrax::Forms::FileSetEditForm')
155
+ end
156
+
157
+ def secondary_terms form_name
158
+ form_name.terms - form_name.required_fields -
159
+ [:visibility_during_embargo, :embargo_release_date,
160
+ :visibility_after_embargo, :visibility_during_lease,
161
+ :lease_expiration_date, :visibility_after_lease, :visibility,
162
+ :thumbnail_id, :representative_id, :ordered_member_ids,
163
+ :collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
164
+ end
165
+
166
+ def set_exclusive_fields
167
+ if params[:work] != 'DefaultWork'
168
+ @terms = file_form.required_fields + secondary_terms(file_form)
169
+ @work_only = (secondary_terms work_form) - @terms
170
+ else
171
+ @terms = @default_fields
172
+ @work_only = []
173
+ end
174
+ end
175
+
176
+ def create_line type, url, json
177
+ line = [] << type
178
+ line << url
179
+ (@terms+@work_only).each do |term|
180
+ content = []
181
+ unless @h_to_c[term.to_s].nil?
182
+ @h_to_c[term.to_s].each do |cdm_term|
183
+ content << json[cdm_term] unless json[cdm_term].nil?
184
+ end
185
+ content.delete_if(&:empty?)
186
+ end
187
+ if content.nil? || content.empty? || content == [{}]
188
+ line << ''
189
+ else
190
+ line << content.join('|')
191
+ end
192
+ end
193
+ ::CSV.generate_line line
194
+ end
195
+
196
+ def file_path pointer
197
+ file_types = ['tif','jpg','mp4','mp3']
198
+ files = []
199
+ file_types.each do |type|
200
+ files << Dir.glob("#{params['mappings_url']}/**/#{pointer}_*#{type}")
201
+ end
202
+ files.each do |file|
203
+ return file.first if file.count > 0
204
+ end
205
+ end
206
+
207
+ def get_dirs
208
+ @dirs = []
209
+ @cdm_dirs.each do |name, dir|
210
+ ent = Dir.entries(dir).select {|entry| File.directory? File.join(dir,entry) and !(entry =='.' || entry == '..') }
211
+ ent = ent.map { |url| ["#{name}/#{url}", "#{dir}/#{url}"] }
212
+ @dirs += ent
213
+ end
214
+ end
215
+ end
216
+ end