hyrax-ingest 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +52 -0
- data/config/routes.rb +2 -0
- data/lib/hyrax/ingest.rb +12 -0
- data/lib/hyrax/ingest/batch_runner.rb +130 -0
- data/lib/hyrax/ingest/configuration.rb +54 -0
- data/lib/hyrax/ingest/engine.rb +6 -0
- data/lib/hyrax/ingest/errors.rb +186 -0
- data/lib/hyrax/ingest/fetcher.rb +55 -0
- data/lib/hyrax/ingest/fetcher/base.rb +78 -0
- data/lib/hyrax/ingest/fetcher/csv_file.rb +89 -0
- data/lib/hyrax/ingest/fetcher/date_time.rb +15 -0
- data/lib/hyrax/ingest/fetcher/literal.rb +24 -0
- data/lib/hyrax/ingest/fetcher/premis_event_type.rb +28 -0
- data/lib/hyrax/ingest/fetcher/rdf_uri.rb +21 -0
- data/lib/hyrax/ingest/fetcher/xml_file.rb +54 -0
- data/lib/hyrax/ingest/fetcher/yaml_file.rb +36 -0
- data/lib/hyrax/ingest/has_depositor.rb +13 -0
- data/lib/hyrax/ingest/has_iteration.rb +18 -0
- data/lib/hyrax/ingest/has_logger.rb +29 -0
- data/lib/hyrax/ingest/has_report.rb +17 -0
- data/lib/hyrax/ingest/has_shared_sip.rb +20 -0
- data/lib/hyrax/ingest/has_sip.rb +20 -0
- data/lib/hyrax/ingest/ingester.rb +75 -0
- data/lib/hyrax/ingest/ingester/active_fedora_base_ingester.rb +136 -0
- data/lib/hyrax/ingest/ingester/active_fedora_file_ingester.rb +17 -0
- data/lib/hyrax/ingest/ingester/active_fedora_property_assigner.rb +67 -0
- data/lib/hyrax/ingest/ingester/base.rb +28 -0
- data/lib/hyrax/ingest/ingester/file_set_ingester.rb +68 -0
- data/lib/hyrax/ingest/ingester/preservation_event_ingester.rb +27 -0
- data/lib/hyrax/ingest/ingester/work_ingester.rb +55 -0
- data/lib/hyrax/ingest/reporting.rb +13 -0
- data/lib/hyrax/ingest/reporting/configuration.rb +22 -0
- data/lib/hyrax/ingest/reporting/report.rb +79 -0
- data/lib/hyrax/ingest/reporting/views/hyrax_ingest_report.html.erb +77 -0
- data/lib/hyrax/ingest/reporting/views/hyrax_ingest_report.txt.erb +21 -0
- data/lib/hyrax/ingest/runner.rb +103 -0
- data/lib/hyrax/ingest/sip.rb +92 -0
- data/lib/hyrax/ingest/transformer.rb +42 -0
- data/lib/hyrax/ingest/transformer/base.rb +12 -0
- data/lib/hyrax/ingest/transformer/to_date.rb +33 -0
- data/lib/hyrax/ingest/version.rb +5 -0
- data/lib/tasks/ingest_tasks.rake +22 -0
- metadata +330 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'hyrax/ingest/ingester/active_fedora_base_ingester'
|
2
|
+
require 'hyrax/ingest/ingester/active_fedora_file_ingester'
|
3
|
+
|
4
|
+
module Hyrax
|
5
|
+
module Ingest
|
6
|
+
module Ingester
|
7
|
+
class PreservationEventIngester < ActiveFedoraBaseIngester
|
8
|
+
|
9
|
+
attr_reader :premis_event_related_object
|
10
|
+
|
11
|
+
def initialize(config={})
|
12
|
+
config[:af_model_class_name] ||= 'Hyrax::Preservation::Event'
|
13
|
+
@premis_event_related_object = config.delete(:premis_event_related_object)
|
14
|
+
super(config)
|
15
|
+
end
|
16
|
+
|
17
|
+
def run!
|
18
|
+
assign_properties!
|
19
|
+
af_model.premis_event_related_object = premis_event_related_object
|
20
|
+
save_model!
|
21
|
+
# return the new instance of the ActiveFedora model
|
22
|
+
af_model
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'hyrax/ingest/ingester/active_fedora_base_ingester'
|
2
|
+
require 'hyrax/ingest/ingester/file_set_ingester'
|
3
|
+
|
4
|
+
module Hyrax
|
5
|
+
module Ingest
|
6
|
+
module Ingester
|
7
|
+
class WorkIngester < ActiveFedoraBaseIngester
|
8
|
+
attr_reader :file_sets_config
|
9
|
+
|
10
|
+
def initialize(config={})
|
11
|
+
# TODO: Throw a useful custom error when :type option is missing.
|
12
|
+
config[:af_model_class_name] ||= config.delete(:type)
|
13
|
+
@file_sets_config = config.delete(:FileSets) || []
|
14
|
+
super(config)
|
15
|
+
end
|
16
|
+
|
17
|
+
def run!
|
18
|
+
assign_properties!
|
19
|
+
assign_related_file_set_properties!
|
20
|
+
apply_depositor_metadata!
|
21
|
+
save_model!
|
22
|
+
assign_file_sets_to_work!
|
23
|
+
# return the new instance of the ActiveFedora model
|
24
|
+
af_model
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def apply_depositor_metadata!
|
30
|
+
af_model.apply_depositor_metadata(depositor) if depositor
|
31
|
+
end
|
32
|
+
|
33
|
+
def assign_related_file_set_properties!
|
34
|
+
file_set_ingesters.each { |file_set_ingester| file_set_ingester.assign_properties! }
|
35
|
+
end
|
36
|
+
|
37
|
+
def assign_file_sets_to_work!
|
38
|
+
file_set_ingesters.each do |file_set_ingester|
|
39
|
+
af_model.members += [file_set_ingester.af_model]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def file_set_ingesters
|
44
|
+
@file_set_ingesters ||= @file_sets_config.map do |file_set_config|
|
45
|
+
Hyrax::Ingest::Ingester::FileSetIngester.new(file_set_config).tap do |file_set_ingester|
|
46
|
+
file_set_ingester.sip = sip if file_set_ingester.respond_to?(:sip=)
|
47
|
+
file_set_ingester.shared_sip = shared_sip if file_set_ingester.respond_to?(:shared_sip=)
|
48
|
+
file_set_ingester.iteration = iteration if file_set_ingester.respond_to?(:iteration=)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'hyrax/ingest/errors'
|
2
|
+
|
3
|
+
module Hyrax
|
4
|
+
module Ingest
|
5
|
+
module Reporting
|
6
|
+
class Configuration
|
7
|
+
attr_reader :default_template_path
|
8
|
+
attr_accessor :default_output_file
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@default_template_path = File.expand_path('../views/hyrax_ingest_report.html.erb', __FILE__)
|
12
|
+
@default_output_file = File.expand_path('hyrax_ingest_report.html')
|
13
|
+
end
|
14
|
+
|
15
|
+
def default_template_path=(path)
|
16
|
+
raise Hyrax::Ingest::Errors::ConfigurationError, "\"#{path}\" does not exist" unless File.exist? path
|
17
|
+
@default_template_path = path
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'erb'
|
2
|
+
require 'functional_hash'
|
3
|
+
require 'hyrax/ingest/reporting/configuration'
|
4
|
+
|
5
|
+
module Hyrax
|
6
|
+
module Ingest
|
7
|
+
module Reporting
|
8
|
+
class Report
|
9
|
+
attr_reader :stat
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@stat = default_stat
|
13
|
+
end
|
14
|
+
|
15
|
+
def default_stat
|
16
|
+
# Initialize a FunctionalHash to serve as a stat tracker, and
|
17
|
+
# add some default values in the same way you would a Hash.
|
18
|
+
@stat = FunctionalHash.new.tap do |stat|
|
19
|
+
# Stores an array of all SIP paths.
|
20
|
+
stat[:sip_paths] = []
|
21
|
+
|
22
|
+
# Stores a list of all files from SIPs that are part of the ingest.
|
23
|
+
stat[:files] = []
|
24
|
+
|
25
|
+
# Stores a hash where keys are Fetcher classes, and values are
|
26
|
+
# the number of occurrences of missing rquired values.
|
27
|
+
stat[:missing_required_values] = {}
|
28
|
+
|
29
|
+
stat[:total_missing_required_values] = Proc.new do |s|
|
30
|
+
stat[:missing_required_values].reduce(0) do |total, key_and_value|
|
31
|
+
# When reducing a Hash, the 2nd arg to the block is a
|
32
|
+
# 2-element array, where the 1st element is the key, and the
|
33
|
+
# 2nd element is the value the key points to.
|
34
|
+
occurrences = key_and_value.last
|
35
|
+
total + occurrences.count
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Filters the :missing_required_values hash to those for XML files.
|
40
|
+
# stat[:xml_files_missing_required_values] = Proc.new do |s|
|
41
|
+
# s[:missing_required_values].select { |fetcher_class, params| fetcher_class.to_s =~ /XMLFile$/ }
|
42
|
+
# end
|
43
|
+
|
44
|
+
# Define a functional hash value that returns the count of the given key.
|
45
|
+
stat[:count] = Proc.new do |s, key_to_count|
|
46
|
+
s[key_to_count].respond_to?(:count) ? s[key_to_count].count : 0
|
47
|
+
end
|
48
|
+
|
49
|
+
stat[:models_saved] = []
|
50
|
+
stat[:models_failed] = []
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def render(template_path: nil)
|
55
|
+
template_path ||= Reporting.config.default_template_path
|
56
|
+
template_content = File.read(File.expand_path(template_path))
|
57
|
+
ERB.new(template_content).result(binding)
|
58
|
+
end
|
59
|
+
|
60
|
+
def write_to_file(filename: nil, template_path: nil)
|
61
|
+
filename ||= Reporting.config.default_output_file
|
62
|
+
File.write(filename, render(template_path: template_path))
|
63
|
+
end
|
64
|
+
|
65
|
+
def failed_with(error)
|
66
|
+
errors << error
|
67
|
+
end
|
68
|
+
|
69
|
+
def errors
|
70
|
+
@errors ||= []
|
71
|
+
end
|
72
|
+
|
73
|
+
def failed?
|
74
|
+
!errors.empty?
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
<style>
|
2
|
+
|
3
|
+
dl.inline dd {
|
4
|
+
display: inline;
|
5
|
+
margin: 0;
|
6
|
+
}
|
7
|
+
|
8
|
+
dl.inline dd:after {
|
9
|
+
display: block;
|
10
|
+
content: '';
|
11
|
+
}
|
12
|
+
|
13
|
+
dl.inline dt {
|
14
|
+
display: inline-block;
|
15
|
+
/*min-width: 100px;*/
|
16
|
+
width: 200px;
|
17
|
+
}
|
18
|
+
|
19
|
+
h3 {
|
20
|
+
margin-left: 20px;
|
21
|
+
}
|
22
|
+
|
23
|
+
</style>
|
24
|
+
|
25
|
+
<h1>Hyrax Ingest Report Summary</h1>
|
26
|
+
|
27
|
+
<% if failed? %>
|
28
|
+
<h2><strong>Ingest Failed!<strong></h2>
|
29
|
+
<p>The following errors caused the ingest to fail:</p>
|
30
|
+
<ol>
|
31
|
+
<% errors.each do |error| %>
|
32
|
+
<li><%= error %></li>
|
33
|
+
<% end %>
|
34
|
+
</ol>
|
35
|
+
<% end %>
|
36
|
+
|
37
|
+
|
38
|
+
<h2>Ingest Details:</h2>
|
39
|
+
<dl class="inline">
|
40
|
+
<dt>Ingest configuration file:</dt>
|
41
|
+
<dd><%= stat[:config_file_path] %></dd>
|
42
|
+
<dt>Started at:</dt>
|
43
|
+
<dd><%= stat[:datetime_started] %></dd>
|
44
|
+
<dt>Completed at:</dt>
|
45
|
+
<dd><%= stat[:datetime_completed] %></dd>
|
46
|
+
<dt>Batch size:</dt>
|
47
|
+
<dd><%= stat[:batch_size] %></dd>
|
48
|
+
<dt>Total # of source files used:</dt>
|
49
|
+
<dd><%= stat[:count, :files] %></dd>
|
50
|
+
<dt>Records ingested:</dt>
|
51
|
+
<dd><%= stat[:count, :models_saved] %></dd>
|
52
|
+
<dt>Records failed:</dt>
|
53
|
+
<dd><%= stat[:count, :models_failed] %></dd>
|
54
|
+
</dl>
|
55
|
+
|
56
|
+
<h2>Missing Required Values: <%= stat[:total_missing_required_values] %> total</h2>
|
57
|
+
<% if stat[:total_missing_required_values] > 0 %>
|
58
|
+
<% stat[:missing_required_values].each do |fetcher_class, list_of_param_hashes| %>
|
59
|
+
<% fetcher_class_short_name = fetcher_class.to_s.gsub(/.*\:\:/, '') %>
|
60
|
+
<h3>Missing required values from <%= fetcher_class_short_name %>: <%= list_of_param_hashes.count %></h3>
|
61
|
+
<ol>
|
62
|
+
<% list_of_param_hashes.each do |param_hash| %>
|
63
|
+
<li>
|
64
|
+
<% if param_hash.empty? %>
|
65
|
+
<em>No additional information</em>
|
66
|
+
<% else %>
|
67
|
+
<dl class="inline">
|
68
|
+
<% param_hash.each do |key, val| %>
|
69
|
+
<dt><%= key%>:</dt><dd><%= val %></dd>
|
70
|
+
<% end %>
|
71
|
+
</dl>
|
72
|
+
<% end %>
|
73
|
+
</li>
|
74
|
+
<% end%>
|
75
|
+
</ol>
|
76
|
+
<% end %>
|
77
|
+
<% end %>
|
@@ -0,0 +1,21 @@
|
|
1
|
+
=============================================
|
2
|
+
======== Hyrax Ingest Report Summary ========
|
3
|
+
=============================================
|
4
|
+
|
5
|
+
Ingest configuration file: <%= stat[:config_file_path] %>
|
6
|
+
Started at: <%= stat[:datetime_started] %>
|
7
|
+
Completed at: <%= stat[:datetime_completed] %>
|
8
|
+
Batch size: <%= stat[:batch_size] %>
|
9
|
+
Total # of source files used: <%= stat[:count, :files] %>
|
10
|
+
Records ingested: <%= stat[:count, :models_saved] %>
|
11
|
+
Records failed: <%= stat[:count, :models_failed] %>
|
12
|
+
|
13
|
+
<% if stat[:count, :xml_files_missing_required_values] %>
|
14
|
+
XML Files missing required values...
|
15
|
+
<% stat[:xml_files_missing_required_values].each do |filename, xpaths| %>
|
16
|
+
Values missing from: <%= filename %>
|
17
|
+
<% xpaths.each do |xpath| %>
|
18
|
+
- <%= xpath %>
|
19
|
+
<% end %>
|
20
|
+
<% end %>
|
21
|
+
<% end %>
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'hyrax/ingest/configuration'
|
2
|
+
require 'hyrax/ingest/ingester'
|
3
|
+
require 'hyrax/ingest/reporting'
|
4
|
+
require 'hyrax/ingest/has_sip'
|
5
|
+
require 'hyrax/ingest/has_shared_sip'
|
6
|
+
require 'hyrax/ingest/has_iteration'
|
7
|
+
require 'hyrax/ingest/has_logger'
|
8
|
+
require 'hyrax/ingest/has_report'
|
9
|
+
require 'hyrax/ingest/has_depositor'
|
10
|
+
|
11
|
+
|
12
|
+
module Hyrax
|
13
|
+
module Ingest
|
14
|
+
class Runner
|
15
|
+
include Reporting
|
16
|
+
include Interloper
|
17
|
+
include HasSIP
|
18
|
+
include HasSharedSIP
|
19
|
+
include HasIteration
|
20
|
+
include HasReport
|
21
|
+
include HasLogger
|
22
|
+
include HasDepositor
|
23
|
+
|
24
|
+
attr_reader :config
|
25
|
+
|
26
|
+
before(:run!) do
|
27
|
+
logger.info "Ingest iteration #{iteration+1} started."
|
28
|
+
report.stat[:datetime_started] ||= DateTime.now
|
29
|
+
report.stat[:batch_size] ||= 1
|
30
|
+
report.stat[:files] += sip.file_paths if sip
|
31
|
+
report.stat[:files] += shared_sip.file_paths if shared_sip
|
32
|
+
report.stat[:config_file_path] = config.config_file_path
|
33
|
+
end
|
34
|
+
|
35
|
+
after(:run!) do
|
36
|
+
logger.info "Ingest iteration #{iteration+1} complete."
|
37
|
+
report.stat[:datetime_completed] ||= DateTime.now
|
38
|
+
end
|
39
|
+
|
40
|
+
def initialize(config_file_path:, sip_path: nil, shared_sip_path: nil, iteration: 0, depositor: nil)
|
41
|
+
self.sip = SIP.new(path: sip_path) if sip_path
|
42
|
+
self.shared_sip = shared_sip_path != nil ? SIP.new(path: shared_sip_path) : nil
|
43
|
+
self.iteration = iteration.to_i
|
44
|
+
self.depositor = depositor
|
45
|
+
@config = Hyrax::Ingest::Configuration.new(config_file_path: config_file_path)
|
46
|
+
end
|
47
|
+
|
48
|
+
def run!
|
49
|
+
ingesters.collect { |ingester| ingester.run! }
|
50
|
+
end
|
51
|
+
|
52
|
+
# TODO: Does not yet return IDs of associated objects that were ingested
|
53
|
+
# as assocaited objects (i.e. objects that are nested under other
|
54
|
+
# objects in the ingest configuration). It only returns IDs for objects that
|
55
|
+
# are ingested per the top-level of ingest configuration.
|
56
|
+
def ingested_ids_by_type
|
57
|
+
{}.tap do |h|
|
58
|
+
ingesters.each do |ingester|
|
59
|
+
if ingester.respond_to? :af_model
|
60
|
+
h[ingester.af_model.class] ||= []
|
61
|
+
h[ingester.af_model.class] << ingester.af_model.id
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# TODO: Does not yet return IDs of associated objects that were ingested
|
68
|
+
# as assocaited objects (i.e. objects that are nested under other
|
69
|
+
# objects in the ingest configuration). It only returns IDs for objects that
|
70
|
+
# are ingested per the top-level of ingest configuration.
|
71
|
+
def ingested_ids_by_type
|
72
|
+
{}.tap do |h|
|
73
|
+
ingesters.each do |ingester|
|
74
|
+
if ingester.respond_to? :af_model
|
75
|
+
h[ingester.af_model.class] ||= []
|
76
|
+
h[ingester.af_model.class] << ingester.af_model.id
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def ingesters
|
85
|
+
@ingesters ||= config.ingester_configs.map do |ingester_config|
|
86
|
+
# TODO: Better way to handle invalid config than throwing big
|
87
|
+
# error msgs from here.
|
88
|
+
raise Hyrax::Ingest::Errors::InvalidConfig.new('Ingester config must be a single key value pair, where the key is the name of the ingester, and the value is the ingester configuration.') unless ingester_config.respond_to? :keys
|
89
|
+
ingester_name = ingester_config.keys.first
|
90
|
+
ingester_options = ingester_config.values.first
|
91
|
+
Hyrax::Ingest::Ingester.factory(ingester_name, ingester_options).tap do |ingester|
|
92
|
+
ingester.sip = sip if ingester.respond_to? :sip=
|
93
|
+
ingester.shared_sip = shared_sip if ingester.respond_to? :shared_sip=
|
94
|
+
ingester.iteration = iteration if ingester.respond_to? :iteration=
|
95
|
+
ingester.logger = logger if ingester.respond_to? :logger=
|
96
|
+
ingester.report = report if ingester.respond_to? :report=
|
97
|
+
ingester.depositor = depositor if ingester.respond_to? :depositor=
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'hyrax/ingest/errors'
|
2
|
+
require 'minitar'
|
3
|
+
|
4
|
+
module Hyrax
|
5
|
+
module Ingest
|
6
|
+
# A model for reading Submission Information Packages (SIPs) from a file_pathsystem.
|
7
|
+
#
|
8
|
+
# @attr_reader [String] path description of a readonly attribute
|
9
|
+
class SIP
|
10
|
+
attr_reader :path
|
11
|
+
|
12
|
+
# @param [String] path The path to the SIP on the filesystem.
|
13
|
+
def initialize(path:)
|
14
|
+
raise Hyrax::Ingest::Errors::InvalidSIPPath.new(path.to_s) unless File.exists? path.to_s
|
15
|
+
@path = path
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [Array] A list of File objects that are part of the SIP
|
19
|
+
def file_paths
|
20
|
+
@file_paths ||= single_file_path
|
21
|
+
@file_paths ||= file_paths_from_dir
|
22
|
+
@file_paths ||= file_paths_from_tarball
|
23
|
+
@file_paths ||= []
|
24
|
+
end
|
25
|
+
|
26
|
+
# @param [String, Regexp] filename A string, a Regexp, or a string representation of a regex
|
27
|
+
# @return [File] The file from the SIP that matches the param.
|
28
|
+
def find_file_path(basename_or_regex)
|
29
|
+
file_path = file_path_from_regex(basename_or_regex) || file_path_from_basename(basename_or_regex)
|
30
|
+
raise Hyrax::Ingest::Errors::FileNotFoundInSIP.new(path, basename_or_regex) unless file_path
|
31
|
+
file_path
|
32
|
+
end
|
33
|
+
|
34
|
+
# Reads the content of a file from the SIP, and automatically rewinds it
|
35
|
+
# so it can be read again.
|
36
|
+
# @param [String, Regexp] filename A string, a Regexp, or a string representation of a regex
|
37
|
+
# @return [String] The contents of the matched file
|
38
|
+
def read_file(basename_or_regex)
|
39
|
+
File.read(find_file_path(basename_or_regex))
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
# @param [String, Regexp] regex Either a Regexp object or a string
|
45
|
+
# beginning and ending in forward slashes, that can be converted to
|
46
|
+
# a regex.
|
47
|
+
# @return [File] The file that matches regex as a regular expression;
|
48
|
+
# nil if no file matches 'regex', or if 'regex' cannot be used as a
|
49
|
+
# regular expression.
|
50
|
+
def file_path_from_regex(regex)
|
51
|
+
# If 'regex' is a string beginning and ending in slash, convert it to
|
52
|
+
# a Regexp.
|
53
|
+
regex = Regexp.new(regex.to_s[1..-2]) if regex.to_s =~ /^\/.*\/$/
|
54
|
+
file_paths.find { |file| File.basename(file) =~ regex } if regex.is_a? Regexp
|
55
|
+
end
|
56
|
+
|
57
|
+
# @param [String] filename The name of the file within the SIP you want
|
58
|
+
# to return.
|
59
|
+
# @return [File] The file that matches the 'filename' parameter; nil if
|
60
|
+
# no file matches the 'filename'.
|
61
|
+
def file_path_from_basename(filename)
|
62
|
+
file_paths.find { |file| File.basename(file) == filename }
|
63
|
+
end
|
64
|
+
|
65
|
+
# @return Array An Array containing the one and only file pointed to by #path
|
66
|
+
def single_file_path
|
67
|
+
Array(path) if File.file? path
|
68
|
+
end
|
69
|
+
|
70
|
+
def file_paths_from_dir
|
71
|
+
if File.directory? path
|
72
|
+
Dir.glob("#{path}/**/*").select { |entry| File.file? entry }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def file_paths_from_tarball
|
77
|
+
# TODO: this is the best test I could find for reliably determining
|
78
|
+
# whether a file was a TAR archive or not, but it seems finicky, as
|
79
|
+
# it probably depends on your operating system, or what kind of tarball
|
80
|
+
# it is. Find something better?
|
81
|
+
if (`file '#{path}'` =~ /tar archive/)
|
82
|
+
Minitar.unpack(path, tmp_dir_for_unpacked_tarball)
|
83
|
+
Dir.glob("#{tmp_dir_for_unpacked_tarball}/**/*")
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def tmp_dir_for_unpacked_tarball
|
88
|
+
"#{Dir.tmpdir}/#{File.basename(path)}.unpacked"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|