zizia 2.0.0.alpha.01 → 2.1.0.alpha.01

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 024af582dbc7b0f9e6a9cef853e26b3822c19de334087eb0d209306e5ae7360b
4
- data.tar.gz: 967080be47ce3a04039352847e7eb0913dfa0bfd7adf9cae5f7636cd4f7c1ba7
3
+ metadata.gz: 35bae6a98cd9938f07fa8de029d79c9628e912f9e8ca31dddd1ac127ac50217c
4
+ data.tar.gz: fe5e84efeda35a7bd04c76b9261c42059e9e19241359ee02ac8385349288bb0a
5
5
  SHA512:
6
- metadata.gz: 70104aeca87a2ed4b657af0ed0a2f2990dbea38f76d4ebca567395b64c0392c7e02952955fe7509c612312dfc7da05f90ccd113ec121c68bb6c467bae66ca498
7
- data.tar.gz: 6b588a28977d885d025673accd8f33e82792ffb03eb2e465a86f458883ac2e455a89d66f8d59fa4d3ad66ff8e7a065b4b109c9cab0a430d2ac461fd4335d1293
6
+ metadata.gz: 03b32b69417893e6f3439566f7a0c44e15bd2e4f8f018e1fe930f799db3f9564f23a79ff57727554b1c2b68e6047d7ec2704007612c29f38ab4c9621852b2b6c
7
+ data.tar.gz: 6e48071e3cb0e90dbeb598fd3e270acc37593376cd0077964a5165ec9ea6da0b6e75040db52fd0f648e8dd117f9eab8e6ca9dab759bd23a9701e990587cb3137
@@ -0,0 +1,44 @@
1
+ version: 2.1
2
+ orbs:
3
+ samvera: samvera/circleci-orb@dev:fix-bundle-cache
4
+ jobs:
5
+ build:
6
+ parameters:
7
+ ruby_version:
8
+ type: string
9
+ default: 2.5.3
10
+ bundler_version:
11
+ type: string
12
+ default: 2.0.1
13
+ executor:
14
+ name: samvera/ruby_fcrepo_solr_redis
15
+ ruby_version: << parameters.ruby_version >>
16
+ working_directory: ~/project
17
+ parallelism: 4
18
+ steps:
19
+ - checkout
20
+
21
+ - run:
22
+ name: install sqlite3
23
+ command: sudo apt-get update && sudo apt-get install -y libsqlite3-dev
24
+
25
+ - samvera/bundle:
26
+ ruby_version: << parameters.ruby_version >>
27
+ bundler_version: << parameters.bundler_version >>
28
+
29
+ - samvera/rubocop
30
+
31
+ - samvera/install_solr_core
32
+
33
+ - run:
34
+ name: install sqlite3
35
+ command: bundle exec rake db:migrate
36
+
37
+ - samvera/parallel_rspec
38
+
39
+ workflows:
40
+ version: 2
41
+ ci:
42
+ jobs:
43
+ - build:
44
+ name: ruby2-5-3
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+ require 'zizia'
3
+
4
+ class ModularImporter
5
+ DEDUPLICATION_FIELD = 'identifier'
6
+
7
+ def initialize(csv_import)
8
+ @csv_import = csv_import
9
+ @csv_file = csv_import.manifest.file.file.to_s
10
+ @collection_id = csv_import.fedora_collection_id
11
+ @user_id = csv_import.user_id
12
+ @user_email = User.find(csv_import.user_id).email
13
+ end
14
+
15
+ def import
16
+ raise "Cannot find expected input file #{@csv_file}" unless File.exist?(@csv_file)
17
+
18
+ attrs = {
19
+ collection_id: @collection_id,
20
+ depositor_id: @user_id,
21
+ batch_id: @csv_import.id,
22
+ deduplication_field: DEDUPLICATION_FIELD
23
+ }
24
+
25
+ file = File.open(@csv_file)
26
+
27
+ Zizia.config.default_info_stream << "event: start_import, batch_id: #{@csv_import.id}, collection_id: #{@collection_id}, user: #{@user_email}"
28
+ Zizia::Importer.new(parser: Zizia::CsvParser.new(file: file), record_importer: Zizia::HyraxRecordImporter.new(attributes: attrs)).import
29
+ file.close
30
+ end
31
+ end
@@ -10,7 +10,6 @@ module Zizia
10
10
  log_stream << "Starting import with batch ID: #{csv_import_id}"
11
11
  importer = ModularImporter.new(csv_import)
12
12
  importer.import
13
- endcs
14
13
  end
15
14
  end
16
15
  end
@@ -0,0 +1,163 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Validate a CSV file.
4
+ #
5
+ # Don't put expensive validations in this class.
6
+ # This is meant to be used for running a few quick
7
+ # validations before starting a CSV-based import.
8
+ # It will be called during the HTTP request/response,
9
+ # so long-running validations will make the page load
10
+ # slowly for the user. Any validations that are slow
11
+ # should be run in background jobs during the import
12
+ # instead of here.
13
+ module Zizia
14
+ class CsvManifestValidator
15
+ # @param manifest_uploader [CsvManifestUploader] The manifest that's mounted to a CsvImport record. See carrierwave gem documentation. This is basically a wrapper for the CSV file.
16
+ def initialize(manifest_uploader)
17
+ @csv_file = manifest_uploader.file
18
+ @errors = []
19
+ @warnings = []
20
+ end
21
+
22
+ # Errors and warnings for the CSV file.
23
+ attr_reader :errors, :warnings
24
+ attr_reader :csv_file
25
+
26
+ def validate
27
+ parse_csv
28
+ return unless @rows
29
+
30
+ missing_headers
31
+ duplicate_headers
32
+ unrecognized_headers
33
+ missing_values
34
+ invalid_license
35
+ invalid_resource_type
36
+ invalid_rights_statement
37
+ end
38
+
39
+ # One record per row
40
+ def record_count
41
+ return nil unless @rows
42
+ @rows.size - 1 # Don't include the header row
43
+ end
44
+
45
+ def delimiter
46
+ @delimiter ||= default_delimiter
47
+ end
48
+ attr_writer :delimiter
49
+
50
+ private
51
+
52
+ def default_delimiter
53
+ Zizia::HyraxBasicMetadataMapper.new.delimiter
54
+ end
55
+
56
+ def valid_headers
57
+ ['title', 'files', 'representative media',
58
+ 'thumbnail', 'rendering', 'depositor',
59
+ 'date_uploaded', 'date_modified', 'label',
60
+ 'relative_path', 'import url', 'resource type',
61
+ 'creator', 'contributor', 'abstract or summary',
62
+ 'keyword', 'license', 'rights statement',
63
+ 'publisher', 'date created', 'subject',
64
+ 'language', 'identifier', 'location',
65
+ 'related url', 'bibliographic_citation',
66
+ 'source', 'visibility']
67
+ end
68
+
69
+ def parse_csv
70
+ @rows = CSV.read(csv_file.path)
71
+ @headers = @rows.first || []
72
+ @transformed_headers = @headers.map { |header| header.downcase.strip }
73
+ rescue
74
+ @errors << 'We are unable to read this CSV file.'
75
+ end
76
+
77
+ def missing_headers
78
+ required_headers.each do |header|
79
+ next if @transformed_headers.include?(header)
80
+ @errors << "Missing required column: \"#{header.titleize}\". Your spreadsheet must have this column."
81
+ end
82
+ end
83
+
84
+ def required_headers
85
+ ['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files']
86
+ end
87
+
88
+ def duplicate_headers
89
+ duplicates = []
90
+ sorted_headers = @transformed_headers.sort
91
+ sorted_headers.each_with_index do |x, i|
92
+ duplicates << x if x == sorted_headers[i + 1]
93
+ end
94
+ duplicates.uniq.each do |header|
95
+ @errors << "Duplicate column names: You can have only one \"#{header.titleize}\" column."
96
+ end
97
+ end
98
+
99
+ # Warn the user if we find any unexpected headers.
100
+ def unrecognized_headers
101
+ extra_headers = @transformed_headers - valid_headers
102
+ extra_headers.each do |header|
103
+ @warnings << "The field name \"#{header}\" is not supported. This field will be ignored, and the metadata for this field will not be imported."
104
+ end
105
+ end
106
+
107
+ def missing_values
108
+ column_numbers = required_headers.map { |header| @transformed_headers.find_index(header) }.compact
109
+
110
+ @rows.each_with_index do |row, i|
111
+ column_numbers.each_with_index do |column_number, j|
112
+ next unless row[column_number].blank?
113
+ @errors << "Missing required metadata in row #{i + 1}: \"#{required_headers[j].titleize}\" field cannot be blank"
114
+ end
115
+ end
116
+ end
117
+
118
+ # Only allow valid license values expected by Hyrax.
119
+ # Otherwise the app throws an error when it displays the work.
120
+ def invalid_license
121
+ validate_values('license', :valid_licenses)
122
+ end
123
+
124
+ def invalid_resource_type
125
+ validate_values('resource type', :valid_resource_types)
126
+ end
127
+
128
+ def invalid_rights_statement
129
+ validate_values('rights statement', :valid_rights_statements)
130
+ end
131
+
132
+ def valid_licenses
133
+ @valid_license_ids ||= Hyrax::LicenseService.new.authority.all.select { |license| license[:active] }.map { |license| license[:id] }
134
+ end
135
+
136
+ def valid_resource_types
137
+ @valid_resource_type_ids ||= Qa::Authorities::Local.subauthority_for('resource_types').all.select { |term| term[:active] }.map { |term| term[:id] }
138
+ end
139
+
140
+ def valid_rights_statements
141
+ @valid_rights_statement_ids ||= Qa::Authorities::Local.subauthority_for('rights_statements').all.select { |term| term[:active] }.map { |term| term[:id] }
142
+ end
143
+
144
+ # Make sure this column contains only valid values
145
+ def validate_values(header_name, valid_values_method)
146
+ column_number = @transformed_headers.find_index(header_name)
147
+ return unless column_number
148
+
149
+ @rows.each_with_index do |row, i|
150
+ next if i.zero? # Skip the header row
151
+ next unless row[column_number]
152
+
153
+ values = row[column_number].split(delimiter)
154
+ valid_values = method(valid_values_method).call
155
+ invalid_values = values.select { |value| !valid_values.include?(value) }
156
+
157
+ invalid_values.each do |value|
158
+ @errors << "Invalid #{header_name.titleize} in row #{i + 1}: #{value}"
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
@@ -22,7 +22,7 @@
22
22
  <div class="col-md-6">
23
23
  <div class='well'>
24
24
  <p> Your records will be imported in the background. To check the current status, please check the background job status page. </p><br />
25
- <div class="text-center"> <%= link_to 'Background Job Status', sidekiq_web_path, class: "btn btn-primary btn-lg" %> </div>
25
+ <div class="text-center"> <%= link_to 'Background Job Status', '/sidekiq', class: "btn btn-primary btn-lg" %> </div>
26
26
  </div>
27
27
  </div>
28
28
  </div>
data/docs/index.md CHANGED
@@ -14,7 +14,7 @@ The simplest use case is importing content that matches [Hyrax's core and basic
14
14
  1. Make a directory for your fixture files: `mkdir spec/fixtures/images`
15
15
  1. Put three small images in `spec/fixtures/images` In this guide, we're using copyright free images from https://www.pexels.com/, `birds.jpg`, `cat.jpg`, and `dog.jpg`.
16
16
  1. Make a directory for your CSV fixture files: `mkdir spec/fixtures/csv_import`
17
- 1. Put a file like this in `spec/fixtures/csv_import`:
17
+ 1. Put a file like this in `spec/fixtures/csv_import/zizia_basic.csv`:
18
18
  ```
19
19
  title,source,visibility
20
20
  "A Cute Dog",https://www.pexels.com/photo/animal-blur-canine-close-up-551628/,open
@@ -26,17 +26,11 @@ The simplest use case is importing content that matches [Hyrax's core and basic
26
26
  # frozen_string_literal: true
27
27
 
28
28
  require 'rails_helper'
29
- require 'active_fedora/cleaner'
30
29
 
31
30
  RSpec.describe ModularImporter do
32
- let(:modular_csv) { 'spec/fixtures/csv_import/modular_input.csv' }
31
+ let(:modular_csv) { 'spec/fixtures/csv_import/zizia_basic.csv' }
33
32
  let(:user) { ::User.batch_user }
34
33
 
35
- before do
36
- DatabaseCleaner.clean
37
- ActiveFedora::Cleaner.clean!
38
- end
39
-
40
34
  it "imports a csv" do
41
35
  expect { ModularImporter.new(modular_csv).import }.to change { Work.count }.by 3
42
36
  end
@@ -96,3 +90,24 @@ So, at this point, your test is running, but the importer isn't yet creating any
96
90
  Finished in 7.56 seconds (files took 9.06 seconds to load)
97
91
  1 example, 0 failures
98
92
  ```
93
+
94
+ ### 3. Import in development and production
95
+
96
+ A passing test tell us that records are being created, but it's only happening in our test environment right now. Let's write a rake task so we can make it happen in development or production and really see the records we made.
97
+
98
+ Make a file like this at `lib/tasks/import.rake`:
99
+
100
+ ```ruby
101
+ namespace :basic_import do
102
+ desc 'Ingest sample data'
103
+ task sample: [:environment] do
104
+ Rake::Task["hyrax:default_admin_set:create"].invoke
105
+ Rake::Task["hyrax:default_collection_types:create"].invoke
106
+ Rake::Task["hyrax:workflow:load"].invoke
107
+ csv_file = Rails.root.join('spec', 'fixtures', 'csv_import', 'zizia_basic.csv')
108
+ ModularImporter.new(csv_file).import
109
+ end
110
+ end
111
+ ```
112
+
113
+ Now you should be able to run `rake basic_import:sample` and see your records appear in Hyrax.
data/lib/zizia/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zizia
4
- VERSION = '2.0.0.alpha.01'
4
+ VERSION = '2.1.0.alpha.01'
5
5
  end
@@ -0,0 +1,3 @@
1
+ {
2
+ "initArgs":{},
3
+ "managedList":[]}
@@ -0,0 +1,31 @@
1
+ <!--
2
+ Licensed to the Apache Software Foundation (ASF) under one or more
3
+ contributor license agreements. See the NOTICE file distributed with
4
+ this work for additional information regarding copyright ownership.
5
+ The ASF licenses this file to You under the Apache License, Version 2.0
6
+ (the "License"); you may not use this file except in compliance with
7
+ the License. You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ -->
17
+
18
+ <!-- The content of this page will be statically included into the top
19
+ of the admin page. Uncomment this as an example to see there the content
20
+ will show up.
21
+
22
+ <hr>
23
+ <i>This line will appear before the first table</i>
24
+ <tr>
25
+ <td colspan="2">
26
+ This row will be appended to the end of the first table
27
+ </td>
28
+ </tr>
29
+ <hr>
30
+
31
+ -->
@@ -0,0 +1,36 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!-- If this file is found in the config directory, it will only be
20
+ loaded once at startup. If it is found in Solr's data
21
+ directory, it will be re-loaded every commit.
22
+ -->
23
+
24
+ <elevate>
25
+ <query text="foo bar">
26
+ <doc id="1" />
27
+ <doc id="2" />
28
+ <doc id="3" />
29
+ </query>
30
+
31
+ <query text="ipod">
32
+ <doc id="MA147LL/A" /> <!-- put the actual ipod at the top -->
33
+ <doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
34
+ </query>
35
+
36
+ </elevate>
@@ -0,0 +1,246 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ # Syntax:
14
+ # "source" => "target"
15
+ # "source".length() > 0 (source cannot be empty.)
16
+ # "target".length() >= 0 (target can be empty.)
17
+
18
+ # example:
19
+ # "??" => "A"
20
+ # "\u00C0" => "A"
21
+ # "\u00C0" => "\u0041"
22
+ # "??" => "ss"
23
+ # "\t" => " "
24
+ # "\n" => ""
25
+
26
+ # ?? => A
27
+ "\u00C0" => "A"
28
+
29
+ # ?? => A
30
+ "\u00C1" => "A"
31
+
32
+ # ?? => A
33
+ "\u00C2" => "A"
34
+
35
+ # ?? => A
36
+ "\u00C3" => "A"
37
+
38
+ # ?? => A
39
+ "\u00C4" => "A"
40
+
41
+ # ?? => A
42
+ "\u00C5" => "A"
43
+
44
+ # ?? => AE
45
+ "\u00C6" => "AE"
46
+
47
+ # ?? => C
48
+ "\u00C7" => "C"
49
+
50
+ # ?? => E
51
+ "\u00C8" => "E"
52
+
53
+ # ?? => E
54
+ "\u00C9" => "E"
55
+
56
+ # ?? => E
57
+ "\u00CA" => "E"
58
+
59
+ # ?? => E
60
+ "\u00CB" => "E"
61
+
62
+ # ?? => I
63
+ "\u00CC" => "I"
64
+
65
+ # ?? => I
66
+ "\u00CD" => "I"
67
+
68
+ # ?? => I
69
+ "\u00CE" => "I"
70
+
71
+ # ?? => I
72
+ "\u00CF" => "I"
73
+
74
+ # ?? => IJ
75
+ "\u0132" => "IJ"
76
+
77
+ # ?? => D
78
+ "\u00D0" => "D"
79
+
80
+ # ?? => N
81
+ "\u00D1" => "N"
82
+
83
+ # ?? => O
84
+ "\u00D2" => "O"
85
+
86
+ # ?? => O
87
+ "\u00D3" => "O"
88
+
89
+ # ?? => O
90
+ "\u00D4" => "O"
91
+
92
+ # ?? => O
93
+ "\u00D5" => "O"
94
+
95
+ # ?? => O
96
+ "\u00D6" => "O"
97
+
98
+ # ?? => O
99
+ "\u00D8" => "O"
100
+
101
+ # ?? => OE
102
+ "\u0152" => "OE"
103
+
104
+ # ??
105
+ "\u00DE" => "TH"
106
+
107
+ # ?? => U
108
+ "\u00D9" => "U"
109
+
110
+ # ?? => U
111
+ "\u00DA" => "U"
112
+
113
+ # ?? => U
114
+ "\u00DB" => "U"
115
+
116
+ # ?? => U
117
+ "\u00DC" => "U"
118
+
119
+ # ?? => Y
120
+ "\u00DD" => "Y"
121
+
122
+ # ?? => Y
123
+ "\u0178" => "Y"
124
+
125
+ # ?? => a
126
+ "\u00E0" => "a"
127
+
128
+ # ?? => a
129
+ "\u00E1" => "a"
130
+
131
+ # ?? => a
132
+ "\u00E2" => "a"
133
+
134
+ # ?? => a
135
+ "\u00E3" => "a"
136
+
137
+ # ?? => a
138
+ "\u00E4" => "a"
139
+
140
+ # ?? => a
141
+ "\u00E5" => "a"
142
+
143
+ # ?? => ae
144
+ "\u00E6" => "ae"
145
+
146
+ # ?? => c
147
+ "\u00E7" => "c"
148
+
149
+ # ?? => e
150
+ "\u00E8" => "e"
151
+
152
+ # ?? => e
153
+ "\u00E9" => "e"
154
+
155
+ # ?? => e
156
+ "\u00EA" => "e"
157
+
158
+ # ?? => e
159
+ "\u00EB" => "e"
160
+
161
+ # ?? => i
162
+ "\u00EC" => "i"
163
+
164
+ # ?? => i
165
+ "\u00ED" => "i"
166
+
167
+ # ?? => i
168
+ "\u00EE" => "i"
169
+
170
+ # ?? => i
171
+ "\u00EF" => "i"
172
+
173
+ # ?? => ij
174
+ "\u0133" => "ij"
175
+
176
+ # ?? => d
177
+ "\u00F0" => "d"
178
+
179
+ # ?? => n
180
+ "\u00F1" => "n"
181
+
182
+ # ?? => o
183
+ "\u00F2" => "o"
184
+
185
+ # ?? => o
186
+ "\u00F3" => "o"
187
+
188
+ # ?? => o
189
+ "\u00F4" => "o"
190
+
191
+ # ?? => o
192
+ "\u00F5" => "o"
193
+
194
+ # ?? => o
195
+ "\u00F6" => "o"
196
+
197
+ # ?? => o
198
+ "\u00F8" => "o"
199
+
200
+ # ?? => oe
201
+ "\u0153" => "oe"
202
+
203
+ # ?? => ss
204
+ "\u00DF" => "ss"
205
+
206
+ # ?? => th
207
+ "\u00FE" => "th"
208
+
209
+ # ?? => u
210
+ "\u00F9" => "u"
211
+
212
+ # ?? => u
213
+ "\u00FA" => "u"
214
+
215
+ # ?? => u
216
+ "\u00FB" => "u"
217
+
218
+ # ?? => u
219
+ "\u00FC" => "u"
220
+
221
+ # ?? => y
222
+ "\u00FD" => "y"
223
+
224
+ # ?? => y
225
+ "\u00FF" => "y"
226
+
227
+ # ??? => ff
228
+ "\uFB00" => "ff"
229
+
230
+ # ??? => fi
231
+ "\uFB01" => "fi"
232
+
233
+ # ??? => fl
234
+ "\uFB02" => "fl"
235
+
236
+ # ??? => ffi
237
+ "\uFB03" => "ffi"
238
+
239
+ # ??? => ffl
240
+ "\uFB04" => "ffl"
241
+
242
+ # ??? => ft
243
+ "\uFB05" => "ft"
244
+
245
+ # ??? => st
246
+ "\uFB06" => "st"