fedora-migrate 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rspec +2 -0
- data/.travis.yml +19 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +31 -0
- data/Rakefile +5 -0
- data/config/fedora.yml +14 -0
- data/config/fedora3.yml +12 -0
- data/config/jetty.yml +6 -0
- data/config/solr.yml +15 -0
- data/fedora-migrate.gemspec +30 -0
- data/lib/fedora-migrate.rb +82 -0
- data/lib/fedora_migrate/datastream_mover.rb +78 -0
- data/lib/fedora_migrate/errors.rb +7 -0
- data/lib/fedora_migrate/file_configurator.rb +34 -0
- data/lib/fedora_migrate/hooks.rb +11 -0
- data/lib/fedora_migrate/logger.rb +36 -0
- data/lib/fedora_migrate/migration_options.rb +11 -0
- data/lib/fedora_migrate/mover.rb +44 -0
- data/lib/fedora_migrate/object_mover.rb +62 -0
- data/lib/fedora_migrate/permissions.rb +32 -0
- data/lib/fedora_migrate/permissions_mover.rb +31 -0
- data/lib/fedora_migrate/rdf_datastream_mover.rb +28 -0
- data/lib/fedora_migrate/rdf_datastream_parser.rb +29 -0
- data/lib/fedora_migrate/rels_ext_datastream_mover.rb +90 -0
- data/lib/fedora_migrate/repository_migrator.rb +60 -0
- data/lib/fedora_migrate/rights_metadata.rb +281 -0
- data/lib/fedora_migrate/rubydora_connection.rb +21 -0
- data/lib/fedora_migrate/triple_converter.rb +39 -0
- data/lib/fedora_migrate/version.rb +3 -0
- data/lib/tasks/fedora-migrate.rake +45 -0
- data/spec/fixtures/datastreams/rdf_ntriples_datastream.txt +2 -0
- data/spec/fixtures/datastreams/sufia-rb68xc089-characterization.xml +27 -0
- data/spec/fixtures/objects/f3-migration-a.xml +110 -0
- data/spec/fixtures/objects/gf-versioned-content.xml +2776 -0
- data/spec/fixtures/objects/sufia-batch-gf-1.xml +94 -0
- data/spec/fixtures/objects/sufia-batch-gf-2.xml +93 -0
- data/spec/fixtures/objects/sufia-batch.xml +51 -0
- data/spec/integration/content_versions_spec.rb +42 -0
- data/spec/integration/fedora3_interface_spec.rb +23 -0
- data/spec/integration/object_migration_spec.rb +112 -0
- data/spec/integration/permission_migration_spec.rb +13 -0
- data/spec/integration/rdf_migration_spec.rb +22 -0
- data/spec/integration/relationship_migration_spec.rb +51 -0
- data/spec/integration/repository_migration_spec.rb +59 -0
- data/spec/spec_helper.rb +39 -0
- data/spec/support/example_model.rb +36 -0
- data/spec/unit/datastream_mover_spec.rb +39 -0
- data/spec/unit/fedora_migrate_spec.rb +19 -0
- data/spec/unit/file_configurator_spec.rb +17 -0
- data/spec/unit/mover_spec.rb +39 -0
- data/spec/unit/object_mover_spec.rb +38 -0
- data/spec/unit/permissions_mover_spec.rb +53 -0
- data/spec/unit/rdf_datastream_mover_spec.rb +8 -0
- data/spec/unit/rdf_datastream_parser_spec.rb +38 -0
- data/spec/unit/rels_ext_datastream_mover_spec.rb +36 -0
- data/spec/unit/repository_migrator_spec.rb +43 -0
- data/spec/unit/rubydora_connection_spec.rb +25 -0
- data/spec/unit/triple_converter_spec.rb +35 -0
- data/tasks/dev.rake +37 -0
- metadata +246 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1cd495240ef6ef07dafbfa15720b1c5eca3a4146
|
4
|
+
data.tar.gz: 4d59a16932745f6082133cf3a5492a024e499f84
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3a906945035f3966d5c5ecb24668081da701e52a5cec8f881b0b75d3e938ee72a53354008315911e1dd3914793311f9efb79991aeb78199ef210f659e6b9c111
|
7
|
+
data.tar.gz: 18a47d1a58619bd9a57f4489537f79a35655f184eac5f3a47e2424f5feef646946e407d1bdc6c52504cd529d42f7f61bfa98935dab8f8e98c83e881178660ff6
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 2.1
|
4
|
+
- 2.0
|
5
|
+
env:
|
6
|
+
global:
|
7
|
+
- NOKOGIRI_USE_SYSTEM_LIBRARIES=true
|
8
|
+
notifications:
|
9
|
+
email:
|
10
|
+
recipients:
|
11
|
+
- "ul-dlt-hydra@lists.psu.edu"
|
12
|
+
on_success: "change"
|
13
|
+
on_failure: "always"
|
14
|
+
irc:
|
15
|
+
channels:
|
16
|
+
- "irc.freenode.org#scholarsphere"
|
17
|
+
- "irc.freenode.org#projecthydra"
|
18
|
+
template:
|
19
|
+
- "%{repository}//%{branch}@%{commit} by %{author}: %{message} - %{build_url}"
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Adam Wead
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# FedoraMigrate
|
2
|
+
|
3
|
+
Migrates content from a Fedora3 repository to a Fedora4 one.
|
4
|
+
|
5
|
+
## Status
|
6
|
+
|
7
|
+
Very alpha. This has been tested against Penn State's existing Scholarsohere applications, as well
|
8
|
+
as generic Sufia applications.
|
9
|
+
|
10
|
+
## TODOs
|
11
|
+
|
12
|
+
See the list of issues.
|
13
|
+
|
14
|
+
## Contributing
|
15
|
+
|
16
|
+
### Hydra Developers
|
17
|
+
|
18
|
+
For Hydra developers, or anyone with a signed CLA, please clone the repo and submit PRs via
|
19
|
+
topic branches. If you don't have rights to projecthydra-labs and do have a signed
|
20
|
+
CLA, please send a note to hydra-tech@googlegroups.com.
|
21
|
+
|
22
|
+
1. Clone it
|
23
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
24
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
25
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
26
|
+
5. Create a new Pull Request
|
27
|
+
|
28
|
+
### Non-Hydra Developers
|
29
|
+
|
30
|
+
Anyone is welcome to use this software and report issues.
|
31
|
+
In order to merge any work contributed, you'll need to sign a contributor license agreement.
|
data/Rakefile
ADDED
data/config/fedora.yml
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
development:
|
2
|
+
user: fedoraAdmin
|
3
|
+
password: fedoraAdmin
|
4
|
+
url: http://127.0.0.1:8983/fedora/rest
|
5
|
+
base_path: /dev
|
6
|
+
test:
|
7
|
+
user: fedoraAdmin
|
8
|
+
password: fedoraAdmin
|
9
|
+
url: http://localhost:8983/fedora/rest
|
10
|
+
base_path: /test
|
11
|
+
production:
|
12
|
+
user: fedoraAdmin
|
13
|
+
password: fedoraAdmin
|
14
|
+
url: http://127.0.0.1:8983/fedora/rest
|
data/config/fedora3.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
development:
|
2
|
+
user: fedoraAdmin
|
3
|
+
password: fedoraAdmin
|
4
|
+
url: http://localhost:8983/fedora3
|
5
|
+
test:
|
6
|
+
user: fedoraAdmin
|
7
|
+
password: fedoraAdmin
|
8
|
+
url: http://localhost:8983/fedora3
|
9
|
+
production:
|
10
|
+
user: fedoraAdmin
|
11
|
+
password: fedoraAdmin
|
12
|
+
url: http://localhost:8983/fedora3
|
data/config/jetty.yml
ADDED
data/config/solr.yml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
development:
|
2
|
+
default:
|
3
|
+
url: http://localhost:8983/solr/development
|
4
|
+
full_text:
|
5
|
+
url: http://localhost:8983/solr/development
|
6
|
+
test:
|
7
|
+
default:
|
8
|
+
url: http://localhost:8983/solr/test
|
9
|
+
full_text:
|
10
|
+
url: http://localhost:8983/solr/test
|
11
|
+
production:
|
12
|
+
default:
|
13
|
+
url: http://localhost:8080/solr/production
|
14
|
+
full_text:
|
15
|
+
url: http://localhost:8080/solr/production
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'fedora_migrate/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "fedora-migrate"
|
8
|
+
spec.version = FedoraMigrate::VERSION
|
9
|
+
spec.authors = ["Adam Wead"]
|
10
|
+
spec.email = ["amsterdamos@gmail.com"]
|
11
|
+
spec.summary = %q{Migrate Hydra-based repository data from Fedora3 to Fedora4}
|
12
|
+
spec.description = %q{Migrates data (models, datastreams, content) from a Fedora3 repository to Fedora4}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "APACHE2"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "rubydora", "~> 1.8"
|
22
|
+
spec.add_dependency "hydra-head", "~> 9.0.0.beta1"
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
25
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
26
|
+
spec.add_development_dependency "rspec"
|
27
|
+
spec.add_development_dependency "jettywrapper"
|
28
|
+
spec.add_development_dependency "equivalent-xml"
|
29
|
+
spec.add_development_dependency "sufia", "~> 6.0.0.beta1"
|
30
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
require "fedora_migrate/version"
|
2
|
+
require "active_support"
|
3
|
+
require "active_fedora"
|
4
|
+
require "hydra-core"
|
5
|
+
|
6
|
+
# Loads rake tasks
|
7
|
+
Dir[File.expand_path(File.join(File.dirname(__FILE__),"tasks/*.rake"))].each { |ext| load ext } if defined?(Rake)
|
8
|
+
|
9
|
+
# Shenanigans because we're not in a Rails environment and we need
|
10
|
+
# Hydra::AccessControls
|
11
|
+
Hydra::Engine.config.autoload_paths.each { |path| $LOAD_PATH.unshift path }
|
12
|
+
# in gem version 2.4, .find_by_name isn't pulling up gems given in the Gemfile
|
13
|
+
# as opposed to those in the gemspec file.
|
14
|
+
# This is a workaround:
|
15
|
+
Gem::Specification.all.each do |g|
|
16
|
+
HAC_DIR = g.gem_dir if g.name.match("hydra-access-controls")
|
17
|
+
end
|
18
|
+
require HAC_DIR+'/app/vocabularies/acl'
|
19
|
+
require HAC_DIR+'/app/vocabularies/hydra/acl'
|
20
|
+
require HAC_DIR+'/app/models/role_mapper'
|
21
|
+
require HAC_DIR+'/app/models/ability'
|
22
|
+
require HAC_DIR+'/app/models/hydra/access_controls/access_control_list'
|
23
|
+
require HAC_DIR+'/app/models/hydra/access_controls/permission'
|
24
|
+
require HAC_DIR+'/app/models/hydra/access_controls/embargo'
|
25
|
+
require HAC_DIR+'/app/models/hydra/access_controls/lease'
|
26
|
+
require HAC_DIR+'/app/services/hydra/lease_service'
|
27
|
+
require HAC_DIR+'/app/services/hydra/embargo_service'
|
28
|
+
require HAC_DIR+'/app/validators/hydra/future_date_validator'
|
29
|
+
|
30
|
+
module FedoraMigrate
|
31
|
+
extend ActiveSupport::Autoload
|
32
|
+
|
33
|
+
autoload :DatastreamMover
|
34
|
+
autoload :Errors
|
35
|
+
autoload :FileConfigurator
|
36
|
+
autoload :Hooks
|
37
|
+
autoload :Logger
|
38
|
+
autoload :MigrationOptions
|
39
|
+
autoload :Mover
|
40
|
+
autoload :ObjectMover
|
41
|
+
autoload :Permissions
|
42
|
+
autoload :PermissionsMover
|
43
|
+
autoload :RDFDatastreamMover
|
44
|
+
autoload :RDFDatastreamParser
|
45
|
+
autoload :RelsExtDatastreamMover
|
46
|
+
autoload :RepositoryMigrator
|
47
|
+
autoload :RightsMetadata
|
48
|
+
autoload :RubydoraConnection
|
49
|
+
autoload :TripleConverter
|
50
|
+
|
51
|
+
class << self
|
52
|
+
attr_reader :fedora_config, :config_options, :source
|
53
|
+
attr_accessor :configurator
|
54
|
+
|
55
|
+
def fedora_config
|
56
|
+
@fedora_config ||= ActiveFedora::Config.new(configurator.fedora3_config)
|
57
|
+
end
|
58
|
+
|
59
|
+
def config_options
|
60
|
+
@config_options ||= "comming soon!"
|
61
|
+
end
|
62
|
+
|
63
|
+
def source
|
64
|
+
@source ||= FedoraMigrate::RubydoraConnection.new(fedora_config.credentials)
|
65
|
+
end
|
66
|
+
|
67
|
+
def find id
|
68
|
+
FedoraMigrate.source.connection.find(id)
|
69
|
+
end
|
70
|
+
|
71
|
+
def migrate_repository args
|
72
|
+
migrator = FedoraMigrate::RepositoryMigrator.new(args[:namespace], args[:options])
|
73
|
+
migrator.migrate_objects
|
74
|
+
migrator.migrate_relationships
|
75
|
+
migrator.results
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
self.configurator ||= FedoraMigrate::FileConfigurator.new
|
81
|
+
|
82
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module FedoraMigrate
|
2
|
+
class DatastreamMover < Mover
|
3
|
+
|
4
|
+
attr_accessor :versionable
|
5
|
+
|
6
|
+
def post_initialize
|
7
|
+
raise FedoraMigrate::Errors::MigrationError, "You must supply a target" if target.nil?
|
8
|
+
end
|
9
|
+
|
10
|
+
def versionable?
|
11
|
+
versionable.nil? ? target_versionable? : versionable
|
12
|
+
end
|
13
|
+
|
14
|
+
def target_versionable?
|
15
|
+
if target.respond_to?(:versionable?)
|
16
|
+
target.versionable?
|
17
|
+
else
|
18
|
+
false
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def migrate
|
23
|
+
if versionable?
|
24
|
+
migrate_versions
|
25
|
+
else
|
26
|
+
migrate_current
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
# Reloading the target, otherwise #get_checksum is nil
|
33
|
+
def migrate_current
|
34
|
+
migrate_content
|
35
|
+
target.reload
|
36
|
+
verify
|
37
|
+
end
|
38
|
+
|
39
|
+
def migrate_versions
|
40
|
+
source.versions.each do |version|
|
41
|
+
migrate_content(version)
|
42
|
+
target.create_version
|
43
|
+
verify(version)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# TODO: lastModified isn't the right place for the original creation date (issue #1)
|
48
|
+
def migrate_content datastream=nil
|
49
|
+
datastream ||= source
|
50
|
+
if datastream.content.nil?
|
51
|
+
Logger.info "datastream '#{datastream.dsid}' is nil. It's probably defined in the target but not present in the source"
|
52
|
+
return true
|
53
|
+
end
|
54
|
+
target.content = datastream.content
|
55
|
+
target.original_name = datastream.label
|
56
|
+
target.mime_type = datastream.mimeType
|
57
|
+
target.last_modified = datastream.createDate
|
58
|
+
Logger.info "#{target.inspect}"
|
59
|
+
save
|
60
|
+
end
|
61
|
+
|
62
|
+
# TODO: Reporting mechanism? If there isn't a checksum it defaults to "none" (issue #4)
|
63
|
+
def verify datastream=nil
|
64
|
+
datastream ||= source
|
65
|
+
target_checksum = get_checksum
|
66
|
+
return true if datastream.checksum == "none"
|
67
|
+
unless datastream.checksum == target_checksum.split(/:/).last
|
68
|
+
Logger.fatal "expected #{datastream.dsid} #{datastream.checksumType} #{datastream.checksum} to match #{target_checksum}"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def get_checksum
|
73
|
+
target.digest.first.to_s
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module FedoraMigrate
|
2
|
+
class FileConfigurator < ActiveFedora::FileConfigurator
|
3
|
+
|
4
|
+
def fedora3_config
|
5
|
+
load_fedora3_config
|
6
|
+
@fedora_config
|
7
|
+
end
|
8
|
+
|
9
|
+
def load_fedora3_config
|
10
|
+
return @fedora_config unless @fedora_config.empty?
|
11
|
+
@fedora_config_path = get_config_path(:fedora3)
|
12
|
+
Logger.info("loading fedora config from #{::File.expand_path(@fedora_config_path)}")
|
13
|
+
|
14
|
+
begin
|
15
|
+
config_erb = ERB.new(IO.read(@fedora_config_path)).result(binding)
|
16
|
+
rescue Exception => e
|
17
|
+
raise("fedora.yml was found, but could not be parsed with ERB. \n#{$!.inspect}")
|
18
|
+
end
|
19
|
+
|
20
|
+
begin
|
21
|
+
fedora_yml = YAML.load(config_erb)
|
22
|
+
rescue Psych::SyntaxError => e
|
23
|
+
raise "fedora.yml was found, but could not be parsed. " \
|
24
|
+
"Error #{e.message}"
|
25
|
+
end
|
26
|
+
|
27
|
+
config = fedora_yml.symbolize_keys
|
28
|
+
|
29
|
+
cfg = config[ActiveFedora.environment.to_sym] || {}
|
30
|
+
@fedora_config = cfg.kind_of?(Array) ? cfg.map(&:symbolize_keys) : cfg.symbolize_keys
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module FedoraMigrate
|
2
|
+
class Logger
|
3
|
+
|
4
|
+
class << self
|
5
|
+
|
6
|
+
def method_missing method_name, *arguments, &block
|
7
|
+
logger.send(method_name, *arguments, &block)
|
8
|
+
rescue
|
9
|
+
super
|
10
|
+
end
|
11
|
+
|
12
|
+
def respond_to?(method_name, include_private = false)
|
13
|
+
logger.respond_to? method_name
|
14
|
+
end
|
15
|
+
|
16
|
+
def info msg
|
17
|
+
super("FedoraMigrate INFO: ##{caller_locations(1,1)[0].label} " + msg)
|
18
|
+
end
|
19
|
+
|
20
|
+
def warn msg
|
21
|
+
super("FedoraMigrate WARN: ##{caller_locations(1,1)[0].label} " + msg)
|
22
|
+
end
|
23
|
+
|
24
|
+
def fatal msg
|
25
|
+
super("FedoraMigrate FATAL: ##{caller_locations(1,1)[0].label} " + msg)
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def logger
|
31
|
+
ActiveFedora::Base.logger || ::Logger.new(STDOUT)
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|