cdm_migrator 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e8c4fdc95bd2d038a441f126555e80767c40b05a7f9ed20eb1aae116f2a66921
4
- data.tar.gz: 1a9a1695951cbb45f6ad78855223f62900a9033ea6b749db77827b67745b5b4f
3
+ metadata.gz: ce7a2c1babecf6e2c4da24d3afe33d9f16229418c8b18484cfa0ac546a45b9d6
4
+ data.tar.gz: fa52d0f174a46612fa1b42226b5be5c521b2b8c6da64ce03f59bb890fec3a981
5
5
  SHA512:
6
- metadata.gz: f123901208397e46b758690c7de039712b0f3da5d4375e9ca8102668914994c698114f29ac964120fb69e0d88b0964b0e9a25a72dde6b1f6987826c024239e20
7
- data.tar.gz: ca95b987182c67fdca1d21d3317658f8895b66ca74eb2eb8e57e94d501930799a700c532afc079cfc1509c00ce14ac3d3c9f9e757280d34bfa33e05b1f8bb36c
6
+ metadata.gz: 6ef368921d6f641c8493c4b7a9e61f414dcb802db7a80dcaf4d51ba335cc2bff62dc56f047442b7372e98fcce825d4bc9fe62ab180f8d38fd489ff1ae7841a2b
7
+ data.tar.gz: c48c112970bb36749528faf4e8a84382c50c5564b8a73c27dfbc2a7dccccb5fc5c29f570906e0ef7f108bf05ce322ecf508f20caeaa48ceb7a590958be4ffd6d
data/README.md CHANGED
@@ -21,6 +21,7 @@ $ gem install cdm_migrator
21
21
  Finally, run:
22
22
  ```bash
23
23
  $ rails g cdm_migrator:install
24
+ $ rails db:migrate
24
25
  ```
25
26
  to insert the yml and add a link to your Hyrax dashboard
26
27
 
@@ -1,7 +1,7 @@
1
1
  module CdmMigrator
2
2
  class CdmController < ApplicationController
3
3
  helper_method :default_page_title, :admin_host?, :available_translations, :available_works
4
- layout 'dashboard' if Hyrax
4
+ layout 'hyrax/dashboard' if Hyrax
5
5
  require 'csv'
6
6
 
7
7
  before_action :load_yaml
@@ -29,7 +29,7 @@ module CdmMigrator
29
29
  records = []
30
30
  [0..(total_recs/1024)].each do |index|
31
31
  start = (index*1024) + 1
32
- json = JSON.parse(Net::HTTP.get_response(URI.parse("http://#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery#{params[:collection]}/0/0/filetype/1024/#{start}/0/0/0/0/1/0/json")).body)
32
+ json = JSON.parse(Net::HTTP.get_response(URI.parse("#{@cdm_url}:#{@cdm_port}/dmwebservices/index.php?q=dmQuery#{params[:collection]}/0/0/filetype/1024/#{start}/0/0/0/0/1/0/json")).body)
33
33
  records << json['records'].map { |rec| [rec['pointer'], rec['filetype']] }
34
34
  end
35
35
  else
@@ -1,175 +1,184 @@
1
1
  module CdmMigrator
2
2
  class CsvController < ApplicationController
3
3
  helper_method :default_page_title, :admin_host?, :available_translations, :available_works
4
- layout 'dashboard'
5
-
6
- def generate
7
- headers = ['type','url']
8
- skip = ["id", "head", "tail", "depositor", "date_uploaded", "date_modified", "import_url", "thumbnail_id", "embargo_id", "lease_id", "access_control_id", "representative_id"]
9
- GenericWork.new.attributes.each do |key, val|
10
- headers << "work_#{key}" unless skip.include? key
11
- end
12
- FileSet.new.attributes.each do |key, val|
13
- headers << "file_#{key}" unless skip.include? key
14
- end
15
- fname = "template_#{DateTime.now.to_i}"
16
- render plain: CSV.generate { |csv| csv << headers }, content_type: 'text/csv'
17
- end
4
+ include ActionView::Helpers::UrlHelper
5
+ layout 'hyrax/dashboard' if Hyrax
6
+ before_action :authenticate, except: :index
7
+
8
+ def file_path_checker
9
+ if params[:file]
10
+ check_paths params[:file].path
11
+
12
+ if @path_list.blank?
13
+ flash[:notice] = "All file paths are valid."
14
+ else
15
+ flash[:error] = "Cdm Migrator couldn't find files at the following urls. Please correct the paths and try again."
16
+ end
17
+ end
18
+ end
19
+
20
+ def index
21
+ if current_page?(main_app.csv_my_batches_path(locale: nil))
22
+ @batches = BatchIngest.where(user_id: current_user.id).reverse_order
23
+ elsif current_page?(main_app.csv_all_batches_path(locale: nil))
24
+ @batches = BatchIngest.all.reverse_order
25
+ else
26
+ @batches = []
27
+ end
28
+ end
18
29
 
19
30
  def upload
20
- #byebug
21
- authorize! :create, available_works.first
22
31
  @admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
23
32
  @collections = Collection.all.map { |col| [col.title.first, col.id] }
24
33
  end
25
34
 
26
35
  def create
27
- #byebug
28
- authorize! :create, available_works.first
29
36
  dir = Rails.root.join('public', 'uploads', 'csvs')
30
- Dir.mkdir(dir) unless Dir.exist?(dir)
31
- File.open(dir.join(params[:csv_import][:csv_file].original_filename), 'wb') do |file|
37
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
38
+ time = DateTime.now.strftime('%s')
39
+ filename = params[:csv_import][:csv_file].original_filename.gsub('.csv',"#{time}.csv")
40
+ csv = dir.join(filename).to_s
41
+ File.open(csv, 'wb') do |file|
32
42
  file.write(params[:csv_import][:csv_file].read)
33
- end
34
- csv = CSV.parse(File.read(dir.join(params[:csv_import][:csv_file].original_filename)), headers: true, encoding: 'utf-8')
35
- CsvUploadJob.perform_later(dir.join(params[:csv_import][:csv_file].original_filename).to_s, params[:csv_import][:mvs], params[:collection], params[:admin_set], current_user)
36
- #perform(params[:csv_import][:csv_file].path, params[:csv_import][:mvs], current_user)
37
- flash[:notice] = "csv successfully uploaded"
38
- redirect_to csv_upload_path
39
- end
43
+ end
44
+ check_paths csv
45
+ if @path_list.present?
46
+ flash[:error] = "some file paths are invalid"
47
+ end
48
+ parse_csv(csv, params[:csv_import][:mvs])
49
+
50
+ ingest = BatchIngest.new({
51
+ data: @works,
52
+ size: @works.length,
53
+ csv: csv,
54
+ admin_set_id: params[:admin_set],
55
+ collection_id: params[:collection],
56
+ user_id: current_user.id,
57
+ message: @path_list.blank? ? nil : @path_list.to_s.gsub("\"","&quot;")
58
+ })
59
+ if ingest.save! && @path_list.blank?
60
+ BatchCreateWorksJob.perform_later(ingest, current_user)
61
+ flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
62
+ redirect_to csv_my_batches_path
63
+ else
64
+ flash[:error] ||= "csv could not be parsed, please check and re-upload"
65
+ redirect_to csv_upload_path
66
+ end
67
+ end
68
+
69
+ def rerun
70
+ ingest = BatchIngest.find(params[:id]).deep_dup
71
+ ingest.save
72
+ BatchCreateWorksJob.perform_later(ingest, current_user)
73
+ flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
74
+ redirect_to csv_my_batches_path
75
+ end
76
+
77
+ def generate
78
+ headers = ['type','url']
79
+ skip = ["id", "head", "tail", "depositor", "date_uploaded", "date_modified", "import_url", "thumbnail_id",
80
+ "embargo_id", "lease_id", "access_control_id", "representative_id"]
81
+ GenericWork.new.attributes.each do |key, val|
82
+ headers << "work_#{key}" unless skip.include? key
83
+ end
84
+ FileSet.new.attributes.each do |key, val|
85
+ headers << "file_#{key}" unless skip.include? key
86
+ end
87
+ fname = "template_#{DateTime.now.to_i}"
88
+ render plain: CSV.generate { |csv| csv << headers }, content_type: 'text/csv'
89
+ end
40
90
 
41
91
  private
42
92
 
43
- def perform(csv, mvs, current_user)
44
- @csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
45
- @mvs = mvs
46
- @collection = Collection.find(params[:csv_import][:collection]) rescue nil
47
- @admin_set = AdminSet.find(params[:csv_import][:admin_set]) rescue nil
48
- @works = []
49
- @files = {}
50
- @csv.each do |row|
51
- type = row.first.last
52
- if type.nil?
53
- next
54
- elsif(type.include? "Work")
55
- @works << row
56
- @files[@works.length] = []
57
- elsif(type.include? "File")
58
- row.delete("object_type")
59
- @files[@works.length] << row
60
- end
61
- end
62
- create_works
63
- end
64
-
65
- def available_works
66
- @available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
67
- end
68
-
69
- def default_page_title
70
- 'CSV Batch Uploader'
71
- end
72
-
73
- def admin_host?
74
- false unless Settings.multitenancy.enabled
75
- end
76
-
77
- def available_translations
78
- {
79
- 'en' => 'English',
80
- 'fr' => 'French'
81
- }
82
- end
83
-
84
- def work_form
85
- Module.const_get("Hyrax::#{params[:work]}Form") rescue nil || Module.const_get("Hyrax::Forms::WorkForm")
86
- end
87
-
88
- def file_form
89
- Module.const_get("Hyrax::FileSetForm") rescue nil || Module.const_get("Hyrax::Forms::FileSetEditForm")
90
- end
91
-
92
- def secondary_terms form_name
93
- form_name.terms - form_name.required_fields -
94
- [:visibility_during_embargo, :embargo_release_date,
95
- :visibility_after_embargo, :visibility_during_lease,
96
- :lease_expiration_date, :visibility_after_lease, :visibility,
97
- :thumbnail_id, :representative_id, :ordered_member_ids,
98
- :collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
99
- end
100
-
101
- def create_file_from_url(url, file_name, work, file_data)
102
- ::FileSet.new(import_url: url, label: file_name) do |fs|
103
- fs.save
104
- actor = Hyrax::Actors::FileSetActor.new(fs, current_user)
105
- actor.create_metadata#(work, visibility: work.visibility)
106
- actor.attach_file_to_work(work)
107
- #byebug
108
- fs.attributes = file_data
109
- fs.save!
110
- uri = URI.parse(url.gsub(' ','%20'))
111
- if uri.scheme == 'file'
112
- IngestLocalFileJob.perform_later(fs, uri.path.gsub('%20',' '), current_user)
113
- else
114
- ImportUrlJob.perform_later(fs, log(actor.user))
115
- end
116
- end
117
- end
118
- #
119
- def load_metadata(fs, file_array)
120
- file_array.each do |line|
121
- fileset = fs
122
- index = -1
123
- line.each do |data|
124
- index = index + 1
125
- next if index==0
126
- if @csv.headers[index] == "visibility"
127
- fileset.visibility = data
128
- elsif @csv.headers[index] == "depositor"
129
- fileset.depositor = data
130
- else
131
- data_arr = data.split @mvs
132
- fileset[@csv.headers[index]] = data_arr
133
- end
134
- end
135
- fileset.save
136
- end
137
- end
138
-
139
- def create_works
140
- index = 1
141
- @works.each do |work_data|
142
- work = Object.const_get(work_data.first.last).new#delete("object_type")).new
143
- status_after, embargo_date, lease_date = nil, nil, nil
144
- final_work_data = create_data work_data, work_form, work
145
- work.apply_depositor_metadata(current_user)
146
- work.attributes = final_work_data
147
- work.member_of_collections = [@collection] if @collection
148
- work.admin_set = @admin_set if @admin_set
149
- work.save
150
- create_files(work, index)
151
- index+=1
152
- end
153
- end
154
-
155
- def create_data data, type, object
156
- final_data = {}
157
- accepted_terms = type.required_fields + secondary_terms(type)
158
- data.each do |key, att|
159
- if(att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym) )
160
- next
161
- elsif(object.send(key).nil?)
162
- final_data[key] = att
163
- else
164
- final_data[key] = att.split @mvs
165
- end
166
- end
167
- final_data
168
- end
93
+ def authenticate
94
+ authorize! :create, available_works.first
95
+ end
96
+
97
+ def available_works
98
+ @available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
99
+ end
100
+
101
+ def parse_csv csv, mvs
102
+ csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
103
+ @works = []
104
+ csv.each do |row|
105
+ type = row.first.last
106
+ if type.nil?
107
+ next
108
+ elsif type.include? "Work"
109
+ metadata = create_data(row, work_form(type), Object.const_get(type).new, mvs)
110
+ @works << {type: type, metadata: metadata, files: []}
111
+ elsif type.include? "File"
112
+ metadata = create_data(row, file_form, FileSet.new, mvs)
113
+ @works.last[:files] << {url: row.delete('url'), title: row.delete('title'), metadata: metadata}
114
+ end
115
+ end
116
+ end
117
+
118
+ def check_paths csv_file
119
+ row_number = 1 # +1 offset to account for csv headers
120
+ @path_list = {}
121
+
122
+ CSV.foreach(csv_file, headers: true, header_converters: :symbol) do |row|
123
+ row_number +=1 # Tells user what CSV row the bogus file path is on
124
+ next if row[:url].nil?
125
+ file_path = row[:url]
126
+ unless File.file?(file_path.gsub("file://", ""))
127
+ @path_list[row_number] = file_path
128
+ end
129
+ end
130
+ end
131
+
132
+ def default_page_title
133
+ 'CSV Batch Uploader'
134
+ end
135
+
136
+ def admin_host?
137
+ false unless Settings.multitenancy.enabled
138
+ end
139
+
140
+ def available_translations
141
+ {
142
+ 'en' => 'English',
143
+ 'fr' => 'French'
144
+ }
145
+ end
146
+
147
+ def work_form(worktype = "GenericWork")
148
+ Module.const_get("Hyrax::#{worktype}Form") rescue nil || Module.const_get("Hyrax::Forms::WorkForm")
149
+ end
150
+
151
+ def file_form
152
+ Module.const_get("Hyrax::FileSetForm") rescue nil || Module.const_get("Hyrax::Forms::FileSetEditForm")
153
+ end
154
+
155
+ def secondary_terms form_name
156
+ form_name.terms - form_name.required_fields -
157
+ [:visibility_during_embargo, :embargo_release_date,
158
+ :visibility_after_embargo, :visibility_during_lease,
159
+ :lease_expiration_date, :visibility_after_lease, :visibility,
160
+ :thumbnail_id, :representative_id, :ordered_member_ids,
161
+ :collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
162
+ end
163
+
164
+ def create_data data, type, object, mvs
165
+ final_data = {}
166
+ accepted_terms = type.required_fields + secondary_terms(type)
167
+ data.each do |key, att|
168
+ if(att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym) )
169
+ next
170
+ elsif(object.send(key).nil?)
171
+ final_data[key] = att
172
+ else
173
+ final_data[key] = att.split(mvs)
174
+ end
175
+ end
176
+ final_data
177
+ end
169
178
 
170
179
  def create_lease visibility, status_after, date
171
180
  lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
172
- visibility_after_lease: @status_after, lease_expiration_date: @lease_date)
181
+ visibility_after_lease: status_after, lease_expiration_date: @lease_date)
173
182
  lease.save
174
183
  end
175
184
 
@@ -181,26 +190,9 @@ module CdmMigrator
181
190
  embargo.save
182
191
  end
183
192
 
184
- def create_files(work, index)
185
- file = FileSet.new
186
- @files[index].each do |file_data|
187
- url = file_data.delete('url')
188
- title = file_data.delete('title')
189
- final_file_data = create_data file_data, file_form, file
190
- create_file_from_url(url, title, work, final_file_data)
191
- end
192
- end
193
-
194
- # def log(user)
195
- # CurationConcerns::Operation.create!(user: user,
196
- # operation_type: "Attach Remote File")
197
- # end
198
-
199
-
200
-
201
- def log(user)
202
- Hyrax::Operation.create!(user: user,
203
- operation_type: "Attach Remote File")
204
- end
193
+ def log(user)
194
+ Hyrax::Operation.create!(user: user,
195
+ operation_type: "Attach Remote File")
196
+ end
205
197
  end
206
198
  end
@@ -0,0 +1,22 @@
1
+ module CdmMigrator
2
+ class BatchCreateFilesJob < ActiveJob::Base
3
+ queue_as Hyrax.config.ingest_queue_name
4
+
5
+ def perform work, ingest_work, user, last_work=false
6
+ ingest_work.files.each do |file|
7
+ url = file[:url]
8
+ last_file = ingest_work.files.last==file
9
+ ::FileSet.new(import_url: url, label: file[:title]) do |fs|
10
+ fs.save
11
+ actor = Hyrax::Actors::FileSetActor.new(fs, user)
12
+ actor.create_metadata#(work, visibility: work.visibility)
13
+ actor.attach_file_to_work(work)
14
+ #byebug
15
+ fs.attributes = file[:metadata]
16
+ fs.save!
17
+ CdmIngestFilesJob.perform_later(fs, url, user, ingest_work, last_file, last_work)
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,15 @@
1
+ module CdmMigrator
2
+ class BatchCreateWorksJob < ActiveJob::Base
3
+ queue_as Hyrax.config.ingest_queue_name
4
+
5
+ def perform(ingest, user)
6
+ ingest.data.each do |w|
7
+ last_work = ingest.data.last==w
8
+ ingest_work = IngestWork.new(w, ingest.id)
9
+ ingest_work.save!
10
+ CreateWorkJob.perform_later ingest_work, user, ingest.admin_set_id, ingest.collection_id, last_work
11
+ end
12
+
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,35 @@
1
+ module CdmMigrator
2
+ class CdmIngestFilesJob < ActiveJob::Base
3
+ queue_as Hyrax.config.ingest_queue_name
4
+
5
+ def perform(fs, url, user, ingest_work = nil, last_file = false, last_work = false)
6
+ if url.include?("http") && File.extname(url).include?("pdf")
7
+ download = open(url)
8
+ dir = Rails.root.join('public', 'uploads', 'csv_pdfs')
9
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
10
+ filename = download.base_uri.to_s.split('/').last
11
+ url = dir.join(filename)
12
+ if fs.title.empty?
13
+ fs.title << filename.split('.').first
14
+ fs.save
15
+ end
16
+ IO.copy_stream(download, url)
17
+ url = "file://"+url.to_s
18
+ end
19
+ uri = URI.parse(url.gsub(' ','%20'))
20
+ if uri.scheme == 'file'
21
+ IngestLocalFileJob.perform_now(fs, uri.path.gsub('%20',' '), user)
22
+ else
23
+ ImportUrlJob.perform_now(fs, log(user))
24
+ end
25
+ ingest_work.update_attribute('complete', true) if last_file
26
+ BatchIngest.find(ingest_work.id).update_attribute('complete', true) if last_work
27
+ end
28
+
29
+ def log(user)
30
+ Hyrax::Operation.create!(user: user,
31
+ operation_type: "Attach Remote File")
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,20 @@
1
+ module CdmMigrator
2
+ class CreateWorkJob < ActiveJob::Base
3
+ queue_as Hyrax.config.ingest_queue_name
4
+
5
+ def perform(ingest_work, user, admin_set_id, collection_id, last_work=false)
6
+ admin_set = ::AdminSet.find(admin_set_id) rescue nil
7
+ collection = ::Collection.find(collection_id) rescue nil
8
+ work = Object.const_get(ingest_work.work_type).new
9
+ #status_after, embargo_date, lease_date = nil, nil, nil
10
+ work.apply_depositor_metadata(user)
11
+ work.attributes = ingest_work.data
12
+ work.member_of_collections = [collection] if collection
13
+ work.admin_set = admin_set if admin_set
14
+ work.date_uploaded = DateTime.now
15
+ work.save
16
+ BatchCreateFilesJob.perform_later work, ingest_work, user, last_work
17
+
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,30 @@
1
+ module CdmMigrator
2
+ class BatchIngest < ActiveRecord::Base
3
+ serialize :data
4
+
5
+ def name
6
+ csv.split('/').last.gsub(/[0-9]{10}/,"")
7
+ end
8
+
9
+ def progress
10
+ if complete?
11
+ "Complete"
12
+ else
13
+ completed = IngestWork.where(batch_ingest_id: id, complete: true ).length.to_s
14
+ "#{completed}/#{size}"
15
+ end
16
+ end
17
+
18
+ def username
19
+ @username ||= User.find(user_id).name
20
+ end
21
+
22
+ def complete?
23
+ self.complete
24
+ end
25
+
26
+ def message?
27
+ not(message.nil?||message.empty?)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,16 @@
1
+ module CdmMigrator
2
+ class IngestWork < ActiveRecord::Base
3
+
4
+ def initialize(work, ingest_id)
5
+ super({
6
+ work_type: work[:type],
7
+ data: work[:metadata],
8
+ files: work[:files],
9
+ batch_ingest_id: ingest_id
10
+ })
11
+ end
12
+
13
+ serialize :data
14
+ serialize :files
15
+ end
16
+ end
@@ -0,0 +1,4 @@
1
+ <% # container for all batches in index view -%>
2
+ <div class="table-responsive" id="batches">
3
+ <%= render 'default_group', batches: @batches %>
4
+ </div>
@@ -0,0 +1,17 @@
1
+ <table class="table table-striped works-list">
2
+ <thead>
3
+ <tr>
4
+ <th><%= "Name" %></th>
5
+ <th><%= "Uploaded" %></th>
6
+ <th><%= "User" %></th>
7
+ <th><%= "Progress" %></th>
8
+ <th><%= "Message" %></th>
9
+ <th><%= "Actions" %></th>
10
+ </tr>
11
+ </thead>
12
+ <tbody>
13
+ <% batches.each do |batch| %>
14
+ <%= render 'list_batches', batch: batch %>
15
+ <% end %>
16
+ </tbody>
17
+ </table>
@@ -0,0 +1,21 @@
1
+ <tr id="batch_<%= batch.id %>">
2
+
3
+ <td><%= batch.name %></td>
4
+
5
+ <td class="date"><%= batch.created_at %></td>
6
+
7
+ <td><%= batch.username %></td>
8
+
9
+
10
+ <td><%= batch.progress %></td>
11
+ <td><%= batch.message? ? "<button onclick=\"alert('#{batch.message.html_safe}');\">View Message</button>".html_safe : "No Message" %></td>
12
+
13
+ <td>
14
+ <%= link_to(
15
+ "Run Again",
16
+ main_app.csv_rerun_path(id: batch.id),
17
+ data: { 'create-type' => 'batch' },
18
+ class: 'btn btn-primary'
19
+ ) if batch.complete? && (current_ability.admin? || current_page?(main_app.csv_my_batches_path(locale: nil))) %>
20
+ </td>
21
+ </tr>
@@ -0,0 +1,19 @@
1
+ <% if @path_list && @path_list.any? %>
2
+ <table class="table table-striped">
3
+ <thead>
4
+ <tr>
5
+ <th scope="col" style="min-width: 100px;">Line No.</th>
6
+ <th scope="col">File Path (url)</th>
7
+ </tr>
8
+ </thead>
9
+ <tbody>
10
+ <% @path_list.each do |line, path| %>
11
+ <tr>
12
+ <td><%= line %></td>
13
+ <td><%= path %></td>
14
+ </tr>
15
+ <% end %>
16
+ </tbody>
17
+ </table>
18
+ <% end %>
19
+
@@ -0,0 +1,9 @@
1
+ <% if @response.total_pages > 1 %>
2
+ <div class="row record-padding">
3
+ <div class="col-md-9">
4
+ <div class="pagination">
5
+ <%= paginate @response, outer_window: 2, theme: 'blacklight', route_set: hyrax %>
6
+ </div>
7
+ </div>
8
+ </div>
9
+ <% end %>
@@ -0,0 +1,8 @@
1
+ <ul class="nav nav-tabs" id="my_nav" role="navigation">
2
+ <li<%= ' class="active"'.html_safe if current_page?(main_app.csv_all_batches_path(locale: nil)) %>>
3
+ <%= link_to "All CSV Batches", main_app.csv_all_batches_path %>
4
+ </li>
5
+ <li<%= ' class="active"'.html_safe if current_page?(main_app.csv_my_batches_path(locale: nil)) %>>
6
+ <%= link_to "My CSV Batches", main_app.csv_my_batches_path %>
7
+ </li>
8
+ </ul>
@@ -0,0 +1,21 @@
1
+ <% provide :page_title, "File Path Checker" %>
2
+
3
+ <h1>File Path Checker</h1>
4
+
5
+ <!-- <div class="row"> -->
6
+ <p>This tool checks if a file exists at each url in a csv.</p>
7
+ <%= form_tag(check_file_paths_path, remote: true, method: :post, multipart: true, id: "csv-form") do %>
8
+ <div class="input-group">
9
+ <%= file_field_tag(:file, class: "form-control-file") %>
10
+ <%= hidden_field_tag :authenticity_token, value: form_authenticity_token %>
11
+ <%= button_tag(type: :submit, class: "btn btn-large btn-primary", style: "margin-top: 1em;") do %>
12
+ Check CSV
13
+ <% end %>
14
+ <% end %>
15
+ </div>
16
+ <!-- </div> -->
17
+
18
+ <div id="path_list" class="col-md-10 offset-md-1">
19
+ <%= render 'path_list' %>
20
+ </div>
21
+
@@ -0,0 +1,19 @@
1
+ <% provide :page_title, "Batches" %>
2
+
3
+
4
+ <% provide :page_header do %>
5
+ <h1><span class="fa fa-database" aria-hidden="true"></span> <%= "Batches" %></h1>
6
+ <% end %>
7
+
8
+ <div class="row">
9
+ <div class="col-md-12">
10
+ <div class="panel panel-default tabs">
11
+ <%= render 'tabs' %>
12
+ <div class="panel-body">
13
+ <%= render 'batches_list' %>
14
+
15
+ <%#= render 'results_pagination' %>
16
+ </div>
17
+ </div>
18
+ </div>
19
+ </div>
@@ -2,6 +2,12 @@ Rails.application.routes.draw do
2
2
  get '/cdm_migrator/upload', to: 'cdm_migrator/csv#upload', as: 'csv_upload'
3
3
  post '/cdm_migrator/upload', to: 'cdm_migrator/csv#create', as: 'csv_create'
4
4
  get '/cdm_migrator/generate', to: 'cdm_migrator/csv#generate', as: 'csv_generate'
5
+ get '/cdm_migrator/my/batches', to: 'cdm_migrator/csv#index', as: 'csv_my_batches'
6
+ get '/cdm_migrator/batches', to: 'cdm_migrator/csv#index', as: 'csv_all_batches'
7
+ get '/cdm_migrator/rerun/:id', to: 'cdm_migrator/csv#rerun', as: 'csv_rerun'
8
+
9
+ get '/cdm_migrator/file_path_checker', to: 'cdm_migrator/csv#file_path_checker', as: 'file_path_checker'
10
+ post '/cdm_migrator/file_path_checker', to: 'cdm_migrator/csv#file_path_checker', as: 'check_file_paths'
5
11
 
6
12
  get '/cdm_migrator/collection', to: 'cdm_migrator/cdm#collection', as: 'cdm_start'
7
13
  post '/cdm_migrator/mappings/', to: 'cdm_migrator/cdm#mappings', as: 'cdm_mappings'
@@ -0,0 +1,20 @@
1
+ class CreateBatchIngests < ActiveRecord::Migration[5.0]
2
+ def up
3
+ create_table :batch_ingests do |t|
4
+ t.text :data
5
+ t.string :admin_set_id
6
+ t.string :collection_id
7
+ t.text :message
8
+ t.integer :size
9
+ t.string :csv
10
+ t.references :user, foreign_key: true
11
+ t.boolean :complete, default: false
12
+
13
+ t.timestamps
14
+ end
15
+ end
16
+
17
+ def down
18
+ drop_table :batch_ingests
19
+ end
20
+ end
@@ -0,0 +1,18 @@
1
+ class CreateIngestWorks < ActiveRecord::Migration[5.0]
2
+ def up
3
+ create_table :ingest_works do |t|
4
+ t.string :work_type
5
+ t.text :data
6
+ t.text :files
7
+ t.boolean :complete, :default => false
8
+
9
+ t.references :batch_ingest, foreign_key: true
10
+
11
+ t.timestamps
12
+ end
13
+ end
14
+
15
+ def down
16
+ drop_table :ingest_works
17
+ end
18
+ end
@@ -1,15 +1,22 @@
1
-
2
-
3
1
  module CdmMigrator
4
2
  class Engine < ::Rails::Engine
5
3
 
4
+ initializer :append_migrations do |app|
5
+ unless app.root.to_s.match root.to_s
6
+ config.paths["db/migrate"].expanded.each do |expanded_path|
7
+ app.config.paths["db/migrate"] << expanded_path
8
+ end
9
+ end
10
+ end
11
+
6
12
  #isolate_namespace CdmMigrator
7
13
  class << self
8
-
14
+
9
15
  def config
10
- file = File.open(File.join(::Rails.root, "/config/cdm_migrator.yml"))
16
+ file = File.open(File.join(::Rails.root, "/config/cdm_migrator.yml"))
11
17
  @config ||= YAML.safe_load(file)
12
18
  end
19
+
13
20
  # loads a yml file with the configuration options
14
21
  #
15
22
  # @param file [String] path to the yml file
@@ -1,3 +1,3 @@
1
1
  module CdmMigrator
2
- VERSION = '2.0.0'
2
+ VERSION = '3.0.0'
3
3
  end
@@ -4,21 +4,36 @@ class CdmMigrator::InstallGenerator < Rails::Generators::Base
4
4
  def inject_dashboard_link
5
5
  file_path = "app/views/hyrax/dashboard/sidebar/_tasks.html.erb"
6
6
  if File.file?(file_path)
7
- gsub_file file_path,/[ \t]*(<% if can\? :review, :submissions %>)\n[ \t]*(<li class="h5"><%= t\('hyrax\.admin\.sidebar\.tasks'\) %><\/li>)\n/ do |match|
8
- match.split("\n")[1].to_s+
9
- "\n <li>\n" \
10
- " <%= menu.collapsable_section t('CDM Migrator'),\n" \
11
- " icon_class: \"fa fa-map-signs\",\n" \
12
- " id: 'collapseCdmMigrator',\n" \
13
- " open: menu.cdm_migrator_section? do %>\n" \
14
- " <%= menu.nav_link(main_app.csv_upload_path) do %>\n"\
15
- " <span class=\"fa fa-angle-double-up\"></span> <span class=\"sidebar-action-text\"><%= t('CSV Batch Uploader') %></span>\n" \
16
- " <% end %>\n" \
17
- " <%= menu.nav_link(main_app.cdm_start_path) do %>\n" \
18
- " <span class=\"fa fa-map\"></span> <span class=\"sidebar-action-text\"><%= t('CDM Mapping Tool') %></span>\n" \
19
- " <% end %>\n" \
20
- " <% end %>\n" \
21
- " </li>\n" + match.split("\n")[0].to_s + "\n"
7
+ title = "<li class=\"h5\"><%= t('hyrax.admin.sidebar.tasks') %></li>"
8
+ perm = " <% if can? :review, :submissions %>"
9
+ gsub_file file_path,/[ \t]*(<li class="h5"><%= t\('hyrax\.admin\.sidebar\.tasks'\) %><\/li>)\n[\s\S]*[ \t]*(<% if can\? :review, :submissions %>)\n/ do |match|
10
+ ""
11
+ end
12
+ gsub_file file_path,/[ \t]*(<% if can\? :review, :submissions %>)\n[\s\S]*[ \t]*(<li class="h5"><%= t\('hyrax\.admin\.sidebar\.tasks'\) %><\/li>)\n/ do |match|
13
+ ""
14
+ end
15
+ prepend_to_file file_path do
16
+ title + "\n" \
17
+ "<li>\n" \
18
+ " <%= menu.collapsable_section t('CDM Migrator'),\n" \
19
+ " icon_class: \"fa fa-map-signs\",\n" \
20
+ " id: 'collapseCdmMigrator',\n" \
21
+ " open: menu.cdm_migrator_section? do %>\n" \
22
+ " <%= menu.nav_link(main_app.cdm_start_path) do %>\n" \
23
+ " <span class=\"fa fa-map\"></span> <span class=\"sidebar-action-text\"><%= t('CDM Mapping Tool') %></span>\n" \
24
+ " <% end %>\n" \
25
+ " <%= menu.nav_link(main_app.file_path_checker_path) do %>\n" \
26
+ " <span class=\"fa fa-check-circle\"></span><span>File Path Checker</span>\n" \
27
+ " <% end %>\n" \
28
+ " <%= menu.nav_link(main_app.csv_upload_path) do %>\n"\
29
+ " <span class=\"fa fa-angle-double-up\"></span> <span class=\"sidebar-action-text\"><%= t('CSV Batch Uploader') %></span>\n" \
30
+ " <% end %>\n" \
31
+ " <%= menu.nav_link(main_app.csv_my_batches_path) do %>\n" \
32
+ " <span class=\"fa fa-database\"></span> <span class=\"sidebar-action-text\"><%= t('Batches') %></span>\n" \
33
+ " <% end %>\n" \
34
+ " <% end %>\n" \
35
+ " </li>\n" + perm + "\n"
36
+
22
37
  end
23
38
  else
24
39
  copy_file "sidebar/_tasks.html.erb", "app/views/hyrax/dashboard/sidebar/_tasks.html.erb"
@@ -28,14 +43,14 @@ match.split("\n")[1].to_s+
28
43
  def inject_menu_presenter
29
44
  hyku_file_path = "app/presenters/hyku/menu_presenter.rb"
30
45
  hyrax_file_path = "app/presenters/hyrax/menu_presenter.rb"
31
- if File.file?(hyku_file_path)
46
+ if File.file?(hyku_file_path) && File.readlines(hyku_file_path).join.include?("cdm_migrator_section")
32
47
  insert_into_file hyku_file_path, :after => /def settings_section\?\n.*\(controller_name\)\n[ \t]*end/ do
33
48
  "\n\n" \
34
49
  " def cdm_migrator_section?\n" \
35
50
  " %w[cdm csv].include?(controller_name)\n" \
36
51
  " end\n"
37
52
  end
38
- elsif File.file?(hyrax_file_path)
53
+ elsif File.file?(hyrax_file_path) && File.readlines(hyrax_file_path).join.include?("cdm_migrator_section")
39
54
  insert_into_file hyrax_file_path, :after => /def settings_section\?\n.*\(controller_name\)\n[ \t]*end/ do
40
55
  "\n\n" \
41
56
  " def cdm_migrator_section?\n" \
@@ -50,7 +65,7 @@ match.split("\n")[1].to_s+
50
65
  end
51
66
 
52
67
  def inject_content_dm_yml
53
- copy_file "config/cdm_migrator.yml", "config/cdm_migrator.yml"
68
+ copy_file("config/cdm_migrator.yml", "config/cdm_migrator.yml") unless File.file?("config/cdm_migrator.yml")
54
69
  end
55
70
 
56
- end
71
+ end
@@ -4,12 +4,20 @@
4
4
  icon_class: "fa fa-map-signs",
5
5
  id: 'collapseCdmMigrator',
6
6
  open: menu.cdm_migrator_section? do %>
7
+ <%= menu.nav_link(main_app.cdm_start_path) do %>
8
+ <span class="fa fa-map"></span> <span class="sidebar-action-text"><%= t('CDM Mapping Tool') %></span>
9
+ <% end %>
10
+
11
+ <%= menu.nav_link(main_app.file_path_checker_path) do %>
12
+ <span class="fa fa-check-circle"></span><span class="sidebar-action-text"><%= t('File Path Checker') %></span>
13
+ <% end %>
14
+
7
15
  <%= menu.nav_link(main_app.csv_upload_path) do %>
8
16
  <span class="fa fa-angle-double-up"></span> <span class="sidebar-action-text"><%= t('CSV Batch Uploader') %></span>
9
17
  <% end %>
10
18
 
11
- <%= menu.nav_link(main_app.cdm_start_path) do %>
12
- <span class="fa fa-map"></span> <span class="sidebar-action-text"><%= t('CDM Mapping Tool') %></span>
19
+ <%= menu.nav_link(main_app.csv_my_batches_path) do %>
20
+ <span class="fa fa-database"></span> <span class="sidebar-action-text"><%= t('Batches') %></span>
13
21
  <% end %>
14
22
  <% end %>
15
23
  </li>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdm_migrator
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - sephirothkod
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-01-29 00:00:00.000000000 Z
11
+ date: 2019-12-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -86,14 +86,29 @@ files:
86
86
  - app/controllers/cdm_migrator/csv_controller.rb
87
87
  - app/helpers/cdm_migrator/application_helper.rb
88
88
  - app/jobs/cdm_migrator/application_job.rb
89
- - app/jobs/csv_upload_job.rb
89
+ - app/jobs/cdm_migrator/batch_create_files_job.rb
90
+ - app/jobs/cdm_migrator/batch_create_works_job.rb
91
+ - app/jobs/cdm_migrator/cdm_ingest_files_job.rb
92
+ - app/jobs/cdm_migrator/create_work_job.rb
90
93
  - app/mailers/cdm_migrator/application_mailer.rb
91
94
  - app/models/cdm_migrator/application_record.rb
95
+ - app/models/cdm_migrator/batch_ingest.rb
96
+ - app/models/cdm_migrator/ingest_work.rb
92
97
  - app/views/cdm_migrator/cdm/collection.html.erb
93
98
  - app/views/cdm_migrator/cdm/mappings.html.erb
99
+ - app/views/cdm_migrator/csv/_batches_list.html.erb
100
+ - app/views/cdm_migrator/csv/_default_group.html.erb
101
+ - app/views/cdm_migrator/csv/_list_batches.html.erb
102
+ - app/views/cdm_migrator/csv/_path_list.html.erb
103
+ - app/views/cdm_migrator/csv/_results_pagination.html.erb
104
+ - app/views/cdm_migrator/csv/_tabs.html.erb
105
+ - app/views/cdm_migrator/csv/file_path_checker.html.erb
106
+ - app/views/cdm_migrator/csv/index.html.erb
94
107
  - app/views/cdm_migrator/csv/upload.html.erb
95
108
  - app/views/layouts/cdm_migrator/application.html.erb
96
109
  - config/routes.rb
110
+ - db/migrate/20191211193859_create_batch_ingests.rb
111
+ - db/migrate/20191212192315_create_ingest_works.rb
97
112
  - lib/cdm_migrator.rb
98
113
  - lib/cdm_migrator/engine.rb
99
114
  - lib/cdm_migrator/version.rb
@@ -1,145 +0,0 @@
1
- class CsvUploadJob < ActiveJob::Base
2
- queue_as Hyrax.config.ingest_queue_name
3
-
4
-
5
- def perform(csv, mvs, collection, admin_set, current_user)
6
- @current_user = current_user
7
- @csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
8
- @mvs = mvs
9
- @collection = Collection.find(collection) rescue nil
10
- @admin_set = AdminSet.find(admin_set) rescue nil
11
- @works = []
12
- @files = {}
13
- @csv.each do |row|
14
- type = row.first.last
15
- if type.nil?
16
- next
17
- elsif(type.include? "Work")
18
- @works << row
19
- @files[@works.length] = []
20
- @worktype = type.dup
21
- elsif(type.include? "File")
22
- row.delete("object_type")
23
- @files[@works.length] << row
24
- end
25
- end
26
- create_works
27
- end
28
-
29
- private
30
-
31
- def work_form
32
- Module.const_get("Hyrax::#{@worktype}Form") rescue nil || Module.const_get("Hyrax::Forms::WorkForm")
33
- end
34
-
35
- def file_form
36
- Module.const_get("Hyrax::FileSetForm") rescue nil || Module.const_get("Hyrax::Forms::FileSetEditForm")
37
- end
38
-
39
- def secondary_terms form_name
40
- form_name.terms - form_name.required_fields -
41
- [:visibility_during_embargo, :embargo_release_date,
42
- :visibility_after_embargo, :visibility_during_lease,
43
- :lease_expiration_date, :visibility_after_lease, :visibility,
44
- :thumbnail_id, :representative_id, :ordered_member_ids,
45
- :collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
46
- end
47
-
48
- def create_file_from_url(url, file_name, work, file_data)
49
- ::FileSet.new(import_url: url, label: file_name) do |fs|
50
- fs.save
51
- actor = Hyrax::Actors::FileSetActor.new(fs, @current_user)
52
- actor.create_metadata#(work, visibility: work.visibility)
53
- actor.attach_file_to_work(work)
54
- #byebug
55
- fs.attributes = file_data
56
- fs.save!
57
- uri = URI.parse(url.gsub(' ','%20'))
58
- if uri.scheme == 'file'
59
- IngestLocalFileJob.perform_later(fs, uri.path.gsub('%20',' '), @current_user)
60
- else
61
- ImportUrlJob.perform_later(fs, log(actor.user))
62
- end
63
- end
64
- end
65
- #
66
- def load_metadata(fs, file_array)
67
- file_array.each do |line|
68
- fileset = fs
69
- index = -1
70
- line.each do |data|
71
- index = index + 1
72
- next if index==0
73
- if @csv.headers[index] == "visibility"
74
- fileset.visibility = data
75
- elsif @csv.headers[index] == "depositor"
76
- fileset.depositor = data
77
- else
78
- data_arr = data.split @mvs
79
- fileset[@csv.headers[index]] = data_arr
80
- end
81
- end
82
- fileset.save
83
- end
84
- end
85
-
86
- def create_works
87
- index = 1
88
- @works.each do |work_data|
89
- work = Object.const_get(work_data.first.last).new#delete("object_type")).new
90
- status_after, embargo_date, lease_date = nil, nil, nil
91
- final_work_data = create_data work_data, work_form, work
92
- work.apply_depositor_metadata(@current_user)
93
- work.attributes = final_work_data
94
- work.member_of_collections = [@collection] if @collection
95
- work.admin_set = @admin_set if @admin_set
96
- work.save
97
- create_files(work, index)
98
- index+=1
99
- end
100
- end
101
-
102
- def create_data data, type, object
103
- final_data = {}
104
- accepted_terms = type.required_fields + secondary_terms(type)
105
- data.each do |key, att|
106
- if(att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym) )
107
- next
108
- elsif(object.send(key).nil?)
109
- final_data[key] = att
110
- else
111
- final_data[key] = att.split @mvs
112
- end
113
- end
114
- final_data
115
- end
116
-
117
- def create_lease visibility, status_after, date
118
- lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
119
- visibility_after_lease: @status_after, lease_expiration_date: @lease_date)
120
- lease.save
121
- end
122
-
123
- def create_embargo visibility
124
- embargo = Hydra::AccessControls::Embargo.new
125
- embargo.visibility_during_embargo = visibility
126
- embargo.visibility_after_embargo = @status_after
127
- embargo.embargo_release_date = @embargo_date
128
- embargo.save
129
- end
130
-
131
- def create_files(work, index)
132
- file = FileSet.new
133
- @files[index].each do |file_data|
134
- url = file_data.delete('url')
135
- title = file_data.delete('title')
136
- final_file_data = create_data file_data, file_form, file
137
- create_file_from_url(url, title, work, final_file_data)
138
- end
139
- end
140
-
141
- def log(user)
142
- Hyrax::Operation.create!(user: user,
143
- operation_type: "Attach Remote File")
144
- end
145
- end