hyrax-ingest 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +52 -0
- data/config/routes.rb +2 -0
- data/lib/hyrax/ingest.rb +12 -0
- data/lib/hyrax/ingest/batch_runner.rb +130 -0
- data/lib/hyrax/ingest/configuration.rb +54 -0
- data/lib/hyrax/ingest/engine.rb +6 -0
- data/lib/hyrax/ingest/errors.rb +186 -0
- data/lib/hyrax/ingest/fetcher.rb +55 -0
- data/lib/hyrax/ingest/fetcher/base.rb +78 -0
- data/lib/hyrax/ingest/fetcher/csv_file.rb +89 -0
- data/lib/hyrax/ingest/fetcher/date_time.rb +15 -0
- data/lib/hyrax/ingest/fetcher/literal.rb +24 -0
- data/lib/hyrax/ingest/fetcher/premis_event_type.rb +28 -0
- data/lib/hyrax/ingest/fetcher/rdf_uri.rb +21 -0
- data/lib/hyrax/ingest/fetcher/xml_file.rb +54 -0
- data/lib/hyrax/ingest/fetcher/yaml_file.rb +36 -0
- data/lib/hyrax/ingest/has_depositor.rb +13 -0
- data/lib/hyrax/ingest/has_iteration.rb +18 -0
- data/lib/hyrax/ingest/has_logger.rb +29 -0
- data/lib/hyrax/ingest/has_report.rb +17 -0
- data/lib/hyrax/ingest/has_shared_sip.rb +20 -0
- data/lib/hyrax/ingest/has_sip.rb +20 -0
- data/lib/hyrax/ingest/ingester.rb +75 -0
- data/lib/hyrax/ingest/ingester/active_fedora_base_ingester.rb +136 -0
- data/lib/hyrax/ingest/ingester/active_fedora_file_ingester.rb +17 -0
- data/lib/hyrax/ingest/ingester/active_fedora_property_assigner.rb +67 -0
- data/lib/hyrax/ingest/ingester/base.rb +28 -0
- data/lib/hyrax/ingest/ingester/file_set_ingester.rb +68 -0
- data/lib/hyrax/ingest/ingester/preservation_event_ingester.rb +27 -0
- data/lib/hyrax/ingest/ingester/work_ingester.rb +55 -0
- data/lib/hyrax/ingest/reporting.rb +13 -0
- data/lib/hyrax/ingest/reporting/configuration.rb +22 -0
- data/lib/hyrax/ingest/reporting/report.rb +79 -0
- data/lib/hyrax/ingest/reporting/views/hyrax_ingest_report.html.erb +77 -0
- data/lib/hyrax/ingest/reporting/views/hyrax_ingest_report.txt.erb +21 -0
- data/lib/hyrax/ingest/runner.rb +103 -0
- data/lib/hyrax/ingest/sip.rb +92 -0
- data/lib/hyrax/ingest/transformer.rb +42 -0
- data/lib/hyrax/ingest/transformer/base.rb +12 -0
- data/lib/hyrax/ingest/transformer/to_date.rb +33 -0
- data/lib/hyrax/ingest/version.rb +5 -0
- data/lib/tasks/ingest_tasks.rake +22 -0
- metadata +330 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'hyrax/ingest/ingester/active_fedora_base_ingester'
|
2
|
+
require 'hyrax/ingest/ingester/active_fedora_file_ingester'
|
3
|
+
|
4
|
+
module Hyrax
|
5
|
+
module Ingest
|
6
|
+
module Ingester
|
7
|
+
class PreservationEventIngester < ActiveFedoraBaseIngester
|
8
|
+
|
9
|
+
attr_reader :premis_event_related_object
|
10
|
+
|
11
|
+
def initialize(config={})
|
12
|
+
config[:af_model_class_name] ||= 'Hyrax::Preservation::Event'
|
13
|
+
@premis_event_related_object = config.delete(:premis_event_related_object)
|
14
|
+
super(config)
|
15
|
+
end
|
16
|
+
|
17
|
+
def run!
|
18
|
+
assign_properties!
|
19
|
+
af_model.premis_event_related_object = premis_event_related_object
|
20
|
+
save_model!
|
21
|
+
# return the new instance of the ActiveFedora model
|
22
|
+
af_model
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'hyrax/ingest/ingester/active_fedora_base_ingester'
|
2
|
+
require 'hyrax/ingest/ingester/file_set_ingester'
|
3
|
+
|
4
|
+
module Hyrax
|
5
|
+
module Ingest
|
6
|
+
module Ingester
|
7
|
+
class WorkIngester < ActiveFedoraBaseIngester
|
8
|
+
attr_reader :file_sets_config
|
9
|
+
|
10
|
+
def initialize(config={})
|
11
|
+
# TODO: Throw a useful custom error when :type option is missing.
|
12
|
+
config[:af_model_class_name] ||= config.delete(:type)
|
13
|
+
@file_sets_config = config.delete(:FileSets) || []
|
14
|
+
super(config)
|
15
|
+
end
|
16
|
+
|
17
|
+
def run!
|
18
|
+
assign_properties!
|
19
|
+
assign_related_file_set_properties!
|
20
|
+
apply_depositor_metadata!
|
21
|
+
save_model!
|
22
|
+
assign_file_sets_to_work!
|
23
|
+
# return the new instance of the ActiveFedora model
|
24
|
+
af_model
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def apply_depositor_metadata!
|
30
|
+
af_model.apply_depositor_metadata(depositor) if depositor
|
31
|
+
end
|
32
|
+
|
33
|
+
def assign_related_file_set_properties!
|
34
|
+
file_set_ingesters.each { |file_set_ingester| file_set_ingester.assign_properties! }
|
35
|
+
end
|
36
|
+
|
37
|
+
def assign_file_sets_to_work!
|
38
|
+
file_set_ingesters.each do |file_set_ingester|
|
39
|
+
af_model.members += [file_set_ingester.af_model]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def file_set_ingesters
|
44
|
+
@file_set_ingesters ||= @file_sets_config.map do |file_set_config|
|
45
|
+
Hyrax::Ingest::Ingester::FileSetIngester.new(file_set_config).tap do |file_set_ingester|
|
46
|
+
file_set_ingester.sip = sip if file_set_ingester.respond_to?(:sip=)
|
47
|
+
file_set_ingester.shared_sip = shared_sip if file_set_ingester.respond_to?(:shared_sip=)
|
48
|
+
file_set_ingester.iteration = iteration if file_set_ingester.respond_to?(:iteration=)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'hyrax/ingest/errors'
|
2
|
+
|
3
|
+
module Hyrax
|
4
|
+
module Ingest
|
5
|
+
module Reporting
|
6
|
+
class Configuration
|
7
|
+
attr_reader :default_template_path
|
8
|
+
attr_accessor :default_output_file
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@default_template_path = File.expand_path('../views/hyrax_ingest_report.html.erb', __FILE__)
|
12
|
+
@default_output_file = File.expand_path('hyrax_ingest_report.html')
|
13
|
+
end
|
14
|
+
|
15
|
+
def default_template_path=(path)
|
16
|
+
raise Hyrax::Ingest::Errors::ConfigurationError, "\"#{path}\" does not exist" unless File.exist? path
|
17
|
+
@default_template_path = path
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'erb'
|
2
|
+
require 'functional_hash'
|
3
|
+
require 'hyrax/ingest/reporting/configuration'
|
4
|
+
|
5
|
+
module Hyrax
|
6
|
+
module Ingest
|
7
|
+
module Reporting
|
8
|
+
class Report
|
9
|
+
attr_reader :stat
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@stat = default_stat
|
13
|
+
end
|
14
|
+
|
15
|
+
def default_stat
|
16
|
+
# Initialize a FunctionalHash to serve as a stat tracker, and
|
17
|
+
# add some default values in the same way you would a Hash.
|
18
|
+
@stat = FunctionalHash.new.tap do |stat|
|
19
|
+
# Stores an array of all SIP paths.
|
20
|
+
stat[:sip_paths] = []
|
21
|
+
|
22
|
+
# Stores a list of all files from SIPs that are part of the ingest.
|
23
|
+
stat[:files] = []
|
24
|
+
|
25
|
+
# Stores a hash where keys are Fetcher classes, and values are
|
26
|
+
# the number of occurrences of missing rquired values.
|
27
|
+
stat[:missing_required_values] = {}
|
28
|
+
|
29
|
+
stat[:total_missing_required_values] = Proc.new do |s|
|
30
|
+
stat[:missing_required_values].reduce(0) do |total, key_and_value|
|
31
|
+
# When reducing a Hash, the 2nd arg to the block is a
|
32
|
+
# 2-element array, where the 1st element is the key, and the
|
33
|
+
# 2nd element is the value the key points to.
|
34
|
+
occurrences = key_and_value.last
|
35
|
+
total + occurrences.count
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Filters the :missing_required_values hash to those for XML files.
|
40
|
+
# stat[:xml_files_missing_required_values] = Proc.new do |s|
|
41
|
+
# s[:missing_required_values].select { |fetcher_class, params| fetcher_class.to_s =~ /XMLFile$/ }
|
42
|
+
# end
|
43
|
+
|
44
|
+
# Define a functional hash value that returns the count of the given key.
|
45
|
+
stat[:count] = Proc.new do |s, key_to_count|
|
46
|
+
s[key_to_count].respond_to?(:count) ? s[key_to_count].count : 0
|
47
|
+
end
|
48
|
+
|
49
|
+
stat[:models_saved] = []
|
50
|
+
stat[:models_failed] = []
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def render(template_path: nil)
|
55
|
+
template_path ||= Reporting.config.default_template_path
|
56
|
+
template_content = File.read(File.expand_path(template_path))
|
57
|
+
ERB.new(template_content).result(binding)
|
58
|
+
end
|
59
|
+
|
60
|
+
def write_to_file(filename: nil, template_path: nil)
|
61
|
+
filename ||= Reporting.config.default_output_file
|
62
|
+
File.write(filename, render(template_path: template_path))
|
63
|
+
end
|
64
|
+
|
65
|
+
def failed_with(error)
|
66
|
+
errors << error
|
67
|
+
end
|
68
|
+
|
69
|
+
def errors
|
70
|
+
@errors ||= []
|
71
|
+
end
|
72
|
+
|
73
|
+
def failed?
|
74
|
+
!errors.empty?
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
<style>
|
2
|
+
|
3
|
+
dl.inline dd {
|
4
|
+
display: inline;
|
5
|
+
margin: 0;
|
6
|
+
}
|
7
|
+
|
8
|
+
dl.inline dd:after {
|
9
|
+
display: block;
|
10
|
+
content: '';
|
11
|
+
}
|
12
|
+
|
13
|
+
dl.inline dt {
|
14
|
+
display: inline-block;
|
15
|
+
/*min-width: 100px;*/
|
16
|
+
width: 200px;
|
17
|
+
}
|
18
|
+
|
19
|
+
h3 {
|
20
|
+
margin-left: 20px;
|
21
|
+
}
|
22
|
+
|
23
|
+
</style>
|
24
|
+
|
25
|
+
<h1>Hyrax Ingest Report Summary</h1>
|
26
|
+
|
27
|
+
<% if failed? %>
|
28
|
+
<h2><strong>Ingest Failed!<strong></h2>
|
29
|
+
<p>The following errors caused the ingest to fail:</p>
|
30
|
+
<ol>
|
31
|
+
<% errors.each do |error| %>
|
32
|
+
<li><%= error %></li>
|
33
|
+
<% end %>
|
34
|
+
</ol>
|
35
|
+
<% end %>
|
36
|
+
|
37
|
+
|
38
|
+
<h2>Ingest Details:</h2>
|
39
|
+
<dl class="inline">
|
40
|
+
<dt>Ingest configuration file:</dt>
|
41
|
+
<dd><%= stat[:config_file_path] %></dd>
|
42
|
+
<dt>Started at:</dt>
|
43
|
+
<dd><%= stat[:datetime_started] %></dd>
|
44
|
+
<dt>Completed at:</dt>
|
45
|
+
<dd><%= stat[:datetime_completed] %></dd>
|
46
|
+
<dt>Batch size:</dt>
|
47
|
+
<dd><%= stat[:batch_size] %></dd>
|
48
|
+
<dt>Total # of source files used:</dt>
|
49
|
+
<dd><%= stat[:count, :files] %></dd>
|
50
|
+
<dt>Records ingested:</dt>
|
51
|
+
<dd><%= stat[:count, :models_saved] %></dd>
|
52
|
+
<dt>Records failed:</dt>
|
53
|
+
<dd><%= stat[:count, :models_failed] %></dd>
|
54
|
+
</dl>
|
55
|
+
|
56
|
+
<h2>Missing Required Values: <%= stat[:total_missing_required_values] %> total</h2>
|
57
|
+
<% if stat[:total_missing_required_values] > 0 %>
|
58
|
+
<% stat[:missing_required_values].each do |fetcher_class, list_of_param_hashes| %>
|
59
|
+
<% fetcher_class_short_name = fetcher_class.to_s.gsub(/.*\:\:/, '') %>
|
60
|
+
<h3>Missing required values from <%= fetcher_class_short_name %>: <%= list_of_param_hashes.count %></h3>
|
61
|
+
<ol>
|
62
|
+
<% list_of_param_hashes.each do |param_hash| %>
|
63
|
+
<li>
|
64
|
+
<% if param_hash.empty? %>
|
65
|
+
<em>No additional information</em>
|
66
|
+
<% else %>
|
67
|
+
<dl class="inline">
|
68
|
+
<% param_hash.each do |key, val| %>
|
69
|
+
<dt><%= key%>:</dt><dd><%= val %></dd>
|
70
|
+
<% end %>
|
71
|
+
</dl>
|
72
|
+
<% end %>
|
73
|
+
</li>
|
74
|
+
<% end%>
|
75
|
+
</ol>
|
76
|
+
<% end %>
|
77
|
+
<% end %>
|
@@ -0,0 +1,21 @@
|
|
1
|
+
=============================================
|
2
|
+
======== Hyrax Ingest Report Summary ========
|
3
|
+
=============================================
|
4
|
+
|
5
|
+
Ingest configuration file: <%= stat[:config_file_path] %>
|
6
|
+
Started at: <%= stat[:datetime_started] %>
|
7
|
+
Completed at: <%= stat[:datetime_completed] %>
|
8
|
+
Batch size: <%= stat[:batch_size] %>
|
9
|
+
Total # of source files used: <%= stat[:count, :files] %>
|
10
|
+
Records ingested: <%= stat[:count, :models_saved] %>
|
11
|
+
Records failed: <%= stat[:count, :models_failed] %>
|
12
|
+
|
13
|
+
<% if stat[:count, :xml_files_missing_required_values] %>
|
14
|
+
XML Files missing required values...
|
15
|
+
<% stat[:xml_files_missing_required_values].each do |filename, xpaths| %>
|
16
|
+
Values missing from: <%= filename %>
|
17
|
+
<% xpaths.each do |xpath| %>
|
18
|
+
- <%= xpath %>
|
19
|
+
<% end %>
|
20
|
+
<% end %>
|
21
|
+
<% end %>
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'hyrax/ingest/configuration'
|
2
|
+
require 'hyrax/ingest/ingester'
|
3
|
+
require 'hyrax/ingest/reporting'
|
4
|
+
require 'hyrax/ingest/has_sip'
|
5
|
+
require 'hyrax/ingest/has_shared_sip'
|
6
|
+
require 'hyrax/ingest/has_iteration'
|
7
|
+
require 'hyrax/ingest/has_logger'
|
8
|
+
require 'hyrax/ingest/has_report'
|
9
|
+
require 'hyrax/ingest/has_depositor'
|
10
|
+
|
11
|
+
|
12
|
+
module Hyrax
|
13
|
+
module Ingest
|
14
|
+
class Runner
|
15
|
+
include Reporting
|
16
|
+
include Interloper
|
17
|
+
include HasSIP
|
18
|
+
include HasSharedSIP
|
19
|
+
include HasIteration
|
20
|
+
include HasReport
|
21
|
+
include HasLogger
|
22
|
+
include HasDepositor
|
23
|
+
|
24
|
+
attr_reader :config
|
25
|
+
|
26
|
+
before(:run!) do
|
27
|
+
logger.info "Ingest iteration #{iteration+1} started."
|
28
|
+
report.stat[:datetime_started] ||= DateTime.now
|
29
|
+
report.stat[:batch_size] ||= 1
|
30
|
+
report.stat[:files] += sip.file_paths if sip
|
31
|
+
report.stat[:files] += shared_sip.file_paths if shared_sip
|
32
|
+
report.stat[:config_file_path] = config.config_file_path
|
33
|
+
end
|
34
|
+
|
35
|
+
after(:run!) do
|
36
|
+
logger.info "Ingest iteration #{iteration+1} complete."
|
37
|
+
report.stat[:datetime_completed] ||= DateTime.now
|
38
|
+
end
|
39
|
+
|
40
|
+
def initialize(config_file_path:, sip_path: nil, shared_sip_path: nil, iteration: 0, depositor: nil)
|
41
|
+
self.sip = SIP.new(path: sip_path) if sip_path
|
42
|
+
self.shared_sip = shared_sip_path != nil ? SIP.new(path: shared_sip_path) : nil
|
43
|
+
self.iteration = iteration.to_i
|
44
|
+
self.depositor = depositor
|
45
|
+
@config = Hyrax::Ingest::Configuration.new(config_file_path: config_file_path)
|
46
|
+
end
|
47
|
+
|
48
|
+
def run!
|
49
|
+
ingesters.collect { |ingester| ingester.run! }
|
50
|
+
end
|
51
|
+
|
52
|
+
# TODO: Does not yet return IDs of associated objects that were ingested
|
53
|
+
# as assocaited objects (i.e. objects that are nested under other
|
54
|
+
# objects in the ingest configuration). It only returns IDs for objects that
|
55
|
+
# are ingested per the top-level of ingest configuration.
|
56
|
+
def ingested_ids_by_type
|
57
|
+
{}.tap do |h|
|
58
|
+
ingesters.each do |ingester|
|
59
|
+
if ingester.respond_to? :af_model
|
60
|
+
h[ingester.af_model.class] ||= []
|
61
|
+
h[ingester.af_model.class] << ingester.af_model.id
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# TODO: Does not yet return IDs of associated objects that were ingested
|
68
|
+
# as assocaited objects (i.e. objects that are nested under other
|
69
|
+
# objects in the ingest configuration). It only returns IDs for objects that
|
70
|
+
# are ingested per the top-level of ingest configuration.
|
71
|
+
def ingested_ids_by_type
|
72
|
+
{}.tap do |h|
|
73
|
+
ingesters.each do |ingester|
|
74
|
+
if ingester.respond_to? :af_model
|
75
|
+
h[ingester.af_model.class] ||= []
|
76
|
+
h[ingester.af_model.class] << ingester.af_model.id
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def ingesters
|
85
|
+
@ingesters ||= config.ingester_configs.map do |ingester_config|
|
86
|
+
# TODO: Better way to handle invalid config than throwing big
|
87
|
+
# error msgs from here.
|
88
|
+
raise Hyrax::Ingest::Errors::InvalidConfig.new('Ingester config must be a single key value pair, where the key is the name of the ingester, and the value is the ingester configuration.') unless ingester_config.respond_to? :keys
|
89
|
+
ingester_name = ingester_config.keys.first
|
90
|
+
ingester_options = ingester_config.values.first
|
91
|
+
Hyrax::Ingest::Ingester.factory(ingester_name, ingester_options).tap do |ingester|
|
92
|
+
ingester.sip = sip if ingester.respond_to? :sip=
|
93
|
+
ingester.shared_sip = shared_sip if ingester.respond_to? :shared_sip=
|
94
|
+
ingester.iteration = iteration if ingester.respond_to? :iteration=
|
95
|
+
ingester.logger = logger if ingester.respond_to? :logger=
|
96
|
+
ingester.report = report if ingester.respond_to? :report=
|
97
|
+
ingester.depositor = depositor if ingester.respond_to? :depositor=
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'hyrax/ingest/errors'
|
2
|
+
require 'minitar'
|
3
|
+
|
4
|
+
module Hyrax
|
5
|
+
module Ingest
|
6
|
+
# A model for reading Submission Information Packages (SIPs) from a file_pathsystem.
|
7
|
+
#
|
8
|
+
# @attr_reader [String] path description of a readonly attribute
|
9
|
+
class SIP
|
10
|
+
attr_reader :path
|
11
|
+
|
12
|
+
# @param [String] path The path to the SIP on the filesystem.
|
13
|
+
def initialize(path:)
|
14
|
+
raise Hyrax::Ingest::Errors::InvalidSIPPath.new(path.to_s) unless File.exists? path.to_s
|
15
|
+
@path = path
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [Array] A list of File objects that are part of the SIP
|
19
|
+
def file_paths
|
20
|
+
@file_paths ||= single_file_path
|
21
|
+
@file_paths ||= file_paths_from_dir
|
22
|
+
@file_paths ||= file_paths_from_tarball
|
23
|
+
@file_paths ||= []
|
24
|
+
end
|
25
|
+
|
26
|
+
# @param [String, Regexp] filename A string, a Regexp, or a string representation of a regex
|
27
|
+
# @return [File] The file from the SIP that matches the param.
|
28
|
+
def find_file_path(basename_or_regex)
|
29
|
+
file_path = file_path_from_regex(basename_or_regex) || file_path_from_basename(basename_or_regex)
|
30
|
+
raise Hyrax::Ingest::Errors::FileNotFoundInSIP.new(path, basename_or_regex) unless file_path
|
31
|
+
file_path
|
32
|
+
end
|
33
|
+
|
34
|
+
# Reads the content of a file from the SIP, and automatically rewinds it
|
35
|
+
# so it can be read again.
|
36
|
+
# @param [String, Regexp] filename A string, a Regexp, or a string representation of a regex
|
37
|
+
# @return [String] The contents of the matched file
|
38
|
+
def read_file(basename_or_regex)
|
39
|
+
File.read(find_file_path(basename_or_regex))
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
# @param [String, Regexp] regex Either a Regexp object or a string
|
45
|
+
# beginning and ending in forward slashes, that can be converted to
|
46
|
+
# a regex.
|
47
|
+
# @return [File] The file that matches regex as a regular expression;
|
48
|
+
# nil if no file matches 'regex', or if 'regex' cannot be used as a
|
49
|
+
# regular expression.
|
50
|
+
def file_path_from_regex(regex)
|
51
|
+
# If 'regex' is a string beginning and ending in slash, convert it to
|
52
|
+
# a Regexp.
|
53
|
+
regex = Regexp.new(regex.to_s[1..-2]) if regex.to_s =~ /^\/.*\/$/
|
54
|
+
file_paths.find { |file| File.basename(file) =~ regex } if regex.is_a? Regexp
|
55
|
+
end
|
56
|
+
|
57
|
+
# @param [String] filename The name of the file within the SIP you want
|
58
|
+
# to return.
|
59
|
+
# @return [File] The file that matches the 'filename' parameter; nil if
|
60
|
+
# no file matches the 'filename'.
|
61
|
+
def file_path_from_basename(filename)
|
62
|
+
file_paths.find { |file| File.basename(file) == filename }
|
63
|
+
end
|
64
|
+
|
65
|
+
# @return Array An Array containing the one and only file pointed to by #path
|
66
|
+
def single_file_path
|
67
|
+
Array(path) if File.file? path
|
68
|
+
end
|
69
|
+
|
70
|
+
def file_paths_from_dir
|
71
|
+
if File.directory? path
|
72
|
+
Dir.glob("#{path}/**/*").select { |entry| File.file? entry }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def file_paths_from_tarball
|
77
|
+
# TODO: this is the best test I could find for reliably determining
|
78
|
+
# whether a file was a TAR archive or not, but it seems finicky, as
|
79
|
+
# it probably depends on your operating system, or what kind of tarball
|
80
|
+
# it is. Find something better?
|
81
|
+
if (`file '#{path}'` =~ /tar archive/)
|
82
|
+
Minitar.unpack(path, tmp_dir_for_unpacked_tarball)
|
83
|
+
Dir.glob("#{tmp_dir_for_unpacked_tarball}/**/*")
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def tmp_dir_for_unpacked_tarball
|
88
|
+
"#{Dir.tmpdir}/#{File.basename(path)}.unpacked"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|