fedora-migrate 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rspec +2 -0
- data/.travis.yml +19 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +31 -0
- data/Rakefile +5 -0
- data/config/fedora.yml +14 -0
- data/config/fedora3.yml +12 -0
- data/config/jetty.yml +6 -0
- data/config/solr.yml +15 -0
- data/fedora-migrate.gemspec +30 -0
- data/lib/fedora-migrate.rb +82 -0
- data/lib/fedora_migrate/datastream_mover.rb +78 -0
- data/lib/fedora_migrate/errors.rb +7 -0
- data/lib/fedora_migrate/file_configurator.rb +34 -0
- data/lib/fedora_migrate/hooks.rb +11 -0
- data/lib/fedora_migrate/logger.rb +36 -0
- data/lib/fedora_migrate/migration_options.rb +11 -0
- data/lib/fedora_migrate/mover.rb +44 -0
- data/lib/fedora_migrate/object_mover.rb +62 -0
- data/lib/fedora_migrate/permissions.rb +32 -0
- data/lib/fedora_migrate/permissions_mover.rb +31 -0
- data/lib/fedora_migrate/rdf_datastream_mover.rb +28 -0
- data/lib/fedora_migrate/rdf_datastream_parser.rb +29 -0
- data/lib/fedora_migrate/rels_ext_datastream_mover.rb +90 -0
- data/lib/fedora_migrate/repository_migrator.rb +60 -0
- data/lib/fedora_migrate/rights_metadata.rb +281 -0
- data/lib/fedora_migrate/rubydora_connection.rb +21 -0
- data/lib/fedora_migrate/triple_converter.rb +39 -0
- data/lib/fedora_migrate/version.rb +3 -0
- data/lib/tasks/fedora-migrate.rake +45 -0
- data/spec/fixtures/datastreams/rdf_ntriples_datastream.txt +2 -0
- data/spec/fixtures/datastreams/sufia-rb68xc089-characterization.xml +27 -0
- data/spec/fixtures/objects/f3-migration-a.xml +110 -0
- data/spec/fixtures/objects/gf-versioned-content.xml +2776 -0
- data/spec/fixtures/objects/sufia-batch-gf-1.xml +94 -0
- data/spec/fixtures/objects/sufia-batch-gf-2.xml +93 -0
- data/spec/fixtures/objects/sufia-batch.xml +51 -0
- data/spec/integration/content_versions_spec.rb +42 -0
- data/spec/integration/fedora3_interface_spec.rb +23 -0
- data/spec/integration/object_migration_spec.rb +112 -0
- data/spec/integration/permission_migration_spec.rb +13 -0
- data/spec/integration/rdf_migration_spec.rb +22 -0
- data/spec/integration/relationship_migration_spec.rb +51 -0
- data/spec/integration/repository_migration_spec.rb +59 -0
- data/spec/spec_helper.rb +39 -0
- data/spec/support/example_model.rb +36 -0
- data/spec/unit/datastream_mover_spec.rb +39 -0
- data/spec/unit/fedora_migrate_spec.rb +19 -0
- data/spec/unit/file_configurator_spec.rb +17 -0
- data/spec/unit/mover_spec.rb +39 -0
- data/spec/unit/object_mover_spec.rb +38 -0
- data/spec/unit/permissions_mover_spec.rb +53 -0
- data/spec/unit/rdf_datastream_mover_spec.rb +8 -0
- data/spec/unit/rdf_datastream_parser_spec.rb +38 -0
- data/spec/unit/rels_ext_datastream_mover_spec.rb +36 -0
- data/spec/unit/repository_migrator_spec.rb +43 -0
- data/spec/unit/rubydora_connection_spec.rb +25 -0
- data/spec/unit/triple_converter_spec.rb +35 -0
- data/tasks/dev.rake +37 -0
- metadata +246 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1cd495240ef6ef07dafbfa15720b1c5eca3a4146
|
4
|
+
data.tar.gz: 4d59a16932745f6082133cf3a5492a024e499f84
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3a906945035f3966d5c5ecb24668081da701e52a5cec8f881b0b75d3e938ee72a53354008315911e1dd3914793311f9efb79991aeb78199ef210f659e6b9c111
|
7
|
+
data.tar.gz: 18a47d1a58619bd9a57f4489537f79a35655f184eac5f3a47e2424f5feef646946e407d1bdc6c52504cd529d42f7f61bfa98935dab8f8e98c83e881178660ff6
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 2.1
|
4
|
+
- 2.0
|
5
|
+
env:
|
6
|
+
global:
|
7
|
+
- NOKOGIRI_USE_SYSTEM_LIBRARIES=true
|
8
|
+
notifications:
|
9
|
+
email:
|
10
|
+
recipients:
|
11
|
+
- "ul-dlt-hydra@lists.psu.edu"
|
12
|
+
on_success: "change"
|
13
|
+
on_failure: "always"
|
14
|
+
irc:
|
15
|
+
channels:
|
16
|
+
- "irc.freenode.org#scholarsphere"
|
17
|
+
- "irc.freenode.org#projecthydra"
|
18
|
+
template:
|
19
|
+
- "%{repository}//%{branch}@%{commit} by %{author}: %{message} - %{build_url}"
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Adam Wead
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# FedoraMigrate
|
2
|
+
|
3
|
+
Migrates content from a Fedora3 repository to a Fedora4 one.
|
4
|
+
|
5
|
+
## Status
|
6
|
+
|
7
|
+
Very alpha. This has been tested against Penn State's existing Scholarsohere applications, as well
|
8
|
+
as generic Sufia applications.
|
9
|
+
|
10
|
+
## TODOs
|
11
|
+
|
12
|
+
See the list of issues.
|
13
|
+
|
14
|
+
## Contributing
|
15
|
+
|
16
|
+
### Hydra Developers
|
17
|
+
|
18
|
+
For Hydra developers, or anyone with a signed CLA, please clone the repo and submit PRs via
|
19
|
+
topic branches. If you don't have rights to projecthydra-labs and do have a signed
|
20
|
+
CLA, please send a note to hydra-tech@googlegroups.com.
|
21
|
+
|
22
|
+
1. Clone it
|
23
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
24
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
25
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
26
|
+
5. Create a new Pull Request
|
27
|
+
|
28
|
+
### Non-Hydra Developers
|
29
|
+
|
30
|
+
Anyone is welcome to use this software and report issues.
|
31
|
+
In order to merge any work contributed, you'll need to sign a contributor license agreement.
|
data/Rakefile
ADDED
data/config/fedora.yml
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
development:
|
2
|
+
user: fedoraAdmin
|
3
|
+
password: fedoraAdmin
|
4
|
+
url: http://127.0.0.1:8983/fedora/rest
|
5
|
+
base_path: /dev
|
6
|
+
test:
|
7
|
+
user: fedoraAdmin
|
8
|
+
password: fedoraAdmin
|
9
|
+
url: http://localhost:8983/fedora/rest
|
10
|
+
base_path: /test
|
11
|
+
production:
|
12
|
+
user: fedoraAdmin
|
13
|
+
password: fedoraAdmin
|
14
|
+
url: http://127.0.0.1:8983/fedora/rest
|
data/config/fedora3.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
development:
|
2
|
+
user: fedoraAdmin
|
3
|
+
password: fedoraAdmin
|
4
|
+
url: http://localhost:8983/fedora3
|
5
|
+
test:
|
6
|
+
user: fedoraAdmin
|
7
|
+
password: fedoraAdmin
|
8
|
+
url: http://localhost:8983/fedora3
|
9
|
+
production:
|
10
|
+
user: fedoraAdmin
|
11
|
+
password: fedoraAdmin
|
12
|
+
url: http://localhost:8983/fedora3
|
data/config/jetty.yml
ADDED
data/config/solr.yml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
development:
|
2
|
+
default:
|
3
|
+
url: http://localhost:8983/solr/development
|
4
|
+
full_text:
|
5
|
+
url: http://localhost:8983/solr/development
|
6
|
+
test:
|
7
|
+
default:
|
8
|
+
url: http://localhost:8983/solr/test
|
9
|
+
full_text:
|
10
|
+
url: http://localhost:8983/solr/test
|
11
|
+
production:
|
12
|
+
default:
|
13
|
+
url: http://localhost:8080/solr/production
|
14
|
+
full_text:
|
15
|
+
url: http://localhost:8080/solr/production
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'fedora_migrate/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "fedora-migrate"
|
8
|
+
spec.version = FedoraMigrate::VERSION
|
9
|
+
spec.authors = ["Adam Wead"]
|
10
|
+
spec.email = ["amsterdamos@gmail.com"]
|
11
|
+
spec.summary = %q{Migrate Hydra-based repository data from Fedora3 to Fedora4}
|
12
|
+
spec.description = %q{Migrates data (models, datastreams, content) from a Fedora3 repository to Fedora4}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "APACHE2"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "rubydora", "~> 1.8"
|
22
|
+
spec.add_dependency "hydra-head", "~> 9.0.0.beta1"
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
25
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
26
|
+
spec.add_development_dependency "rspec"
|
27
|
+
spec.add_development_dependency "jettywrapper"
|
28
|
+
spec.add_development_dependency "equivalent-xml"
|
29
|
+
spec.add_development_dependency "sufia", "~> 6.0.0.beta1"
|
30
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
require "fedora_migrate/version"
|
2
|
+
require "active_support"
|
3
|
+
require "active_fedora"
|
4
|
+
require "hydra-core"
|
5
|
+
|
6
|
+
# Loads rake tasks
|
7
|
+
Dir[File.expand_path(File.join(File.dirname(__FILE__),"tasks/*.rake"))].each { |ext| load ext } if defined?(Rake)
|
8
|
+
|
9
|
+
# Shenanigans because we're not in a Rails environment and we need
|
10
|
+
# Hydra::AccessControls
|
11
|
+
Hydra::Engine.config.autoload_paths.each { |path| $LOAD_PATH.unshift path }
|
12
|
+
# in gem version 2.4, .find_by_name isn't pulling up gems given in the Gemfile
|
13
|
+
# as opposed to those in the gemspec file.
|
14
|
+
# This is a workaround:
|
15
|
+
Gem::Specification.all.each do |g|
|
16
|
+
HAC_DIR = g.gem_dir if g.name.match("hydra-access-controls")
|
17
|
+
end
|
18
|
+
require HAC_DIR+'/app/vocabularies/acl'
|
19
|
+
require HAC_DIR+'/app/vocabularies/hydra/acl'
|
20
|
+
require HAC_DIR+'/app/models/role_mapper'
|
21
|
+
require HAC_DIR+'/app/models/ability'
|
22
|
+
require HAC_DIR+'/app/models/hydra/access_controls/access_control_list'
|
23
|
+
require HAC_DIR+'/app/models/hydra/access_controls/permission'
|
24
|
+
require HAC_DIR+'/app/models/hydra/access_controls/embargo'
|
25
|
+
require HAC_DIR+'/app/models/hydra/access_controls/lease'
|
26
|
+
require HAC_DIR+'/app/services/hydra/lease_service'
|
27
|
+
require HAC_DIR+'/app/services/hydra/embargo_service'
|
28
|
+
require HAC_DIR+'/app/validators/hydra/future_date_validator'
|
29
|
+
|
30
|
+
module FedoraMigrate
|
31
|
+
extend ActiveSupport::Autoload
|
32
|
+
|
33
|
+
autoload :DatastreamMover
|
34
|
+
autoload :Errors
|
35
|
+
autoload :FileConfigurator
|
36
|
+
autoload :Hooks
|
37
|
+
autoload :Logger
|
38
|
+
autoload :MigrationOptions
|
39
|
+
autoload :Mover
|
40
|
+
autoload :ObjectMover
|
41
|
+
autoload :Permissions
|
42
|
+
autoload :PermissionsMover
|
43
|
+
autoload :RDFDatastreamMover
|
44
|
+
autoload :RDFDatastreamParser
|
45
|
+
autoload :RelsExtDatastreamMover
|
46
|
+
autoload :RepositoryMigrator
|
47
|
+
autoload :RightsMetadata
|
48
|
+
autoload :RubydoraConnection
|
49
|
+
autoload :TripleConverter
|
50
|
+
|
51
|
+
class << self
|
52
|
+
attr_reader :fedora_config, :config_options, :source
|
53
|
+
attr_accessor :configurator
|
54
|
+
|
55
|
+
def fedora_config
|
56
|
+
@fedora_config ||= ActiveFedora::Config.new(configurator.fedora3_config)
|
57
|
+
end
|
58
|
+
|
59
|
+
def config_options
|
60
|
+
@config_options ||= "comming soon!"
|
61
|
+
end
|
62
|
+
|
63
|
+
def source
|
64
|
+
@source ||= FedoraMigrate::RubydoraConnection.new(fedora_config.credentials)
|
65
|
+
end
|
66
|
+
|
67
|
+
def find id
|
68
|
+
FedoraMigrate.source.connection.find(id)
|
69
|
+
end
|
70
|
+
|
71
|
+
def migrate_repository args
|
72
|
+
migrator = FedoraMigrate::RepositoryMigrator.new(args[:namespace], args[:options])
|
73
|
+
migrator.migrate_objects
|
74
|
+
migrator.migrate_relationships
|
75
|
+
migrator.results
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
self.configurator ||= FedoraMigrate::FileConfigurator.new
|
81
|
+
|
82
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module FedoraMigrate
|
2
|
+
class DatastreamMover < Mover
|
3
|
+
|
4
|
+
attr_accessor :versionable
|
5
|
+
|
6
|
+
def post_initialize
|
7
|
+
raise FedoraMigrate::Errors::MigrationError, "You must supply a target" if target.nil?
|
8
|
+
end
|
9
|
+
|
10
|
+
def versionable?
|
11
|
+
versionable.nil? ? target_versionable? : versionable
|
12
|
+
end
|
13
|
+
|
14
|
+
def target_versionable?
|
15
|
+
if target.respond_to?(:versionable?)
|
16
|
+
target.versionable?
|
17
|
+
else
|
18
|
+
false
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def migrate
|
23
|
+
if versionable?
|
24
|
+
migrate_versions
|
25
|
+
else
|
26
|
+
migrate_current
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
# Reloading the target, otherwise #get_checksum is nil
|
33
|
+
def migrate_current
|
34
|
+
migrate_content
|
35
|
+
target.reload
|
36
|
+
verify
|
37
|
+
end
|
38
|
+
|
39
|
+
def migrate_versions
|
40
|
+
source.versions.each do |version|
|
41
|
+
migrate_content(version)
|
42
|
+
target.create_version
|
43
|
+
verify(version)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# TODO: lastModified isn't the right place for the original creation date (issue #1)
|
48
|
+
def migrate_content datastream=nil
|
49
|
+
datastream ||= source
|
50
|
+
if datastream.content.nil?
|
51
|
+
Logger.info "datastream '#{datastream.dsid}' is nil. It's probably defined in the target but not present in the source"
|
52
|
+
return true
|
53
|
+
end
|
54
|
+
target.content = datastream.content
|
55
|
+
target.original_name = datastream.label
|
56
|
+
target.mime_type = datastream.mimeType
|
57
|
+
target.last_modified = datastream.createDate
|
58
|
+
Logger.info "#{target.inspect}"
|
59
|
+
save
|
60
|
+
end
|
61
|
+
|
62
|
+
# TODO: Reporting mechanism? If there isn't a checksum it defaults to "none" (issue #4)
|
63
|
+
def verify datastream=nil
|
64
|
+
datastream ||= source
|
65
|
+
target_checksum = get_checksum
|
66
|
+
return true if datastream.checksum == "none"
|
67
|
+
unless datastream.checksum == target_checksum.split(/:/).last
|
68
|
+
Logger.fatal "expected #{datastream.dsid} #{datastream.checksumType} #{datastream.checksum} to match #{target_checksum}"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def get_checksum
|
73
|
+
target.digest.first.to_s
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module FedoraMigrate
|
2
|
+
class FileConfigurator < ActiveFedora::FileConfigurator
|
3
|
+
|
4
|
+
def fedora3_config
|
5
|
+
load_fedora3_config
|
6
|
+
@fedora_config
|
7
|
+
end
|
8
|
+
|
9
|
+
def load_fedora3_config
|
10
|
+
return @fedora_config unless @fedora_config.empty?
|
11
|
+
@fedora_config_path = get_config_path(:fedora3)
|
12
|
+
Logger.info("loading fedora config from #{::File.expand_path(@fedora_config_path)}")
|
13
|
+
|
14
|
+
begin
|
15
|
+
config_erb = ERB.new(IO.read(@fedora_config_path)).result(binding)
|
16
|
+
rescue Exception => e
|
17
|
+
raise("fedora.yml was found, but could not be parsed with ERB. \n#{$!.inspect}")
|
18
|
+
end
|
19
|
+
|
20
|
+
begin
|
21
|
+
fedora_yml = YAML.load(config_erb)
|
22
|
+
rescue Psych::SyntaxError => e
|
23
|
+
raise "fedora.yml was found, but could not be parsed. " \
|
24
|
+
"Error #{e.message}"
|
25
|
+
end
|
26
|
+
|
27
|
+
config = fedora_yml.symbolize_keys
|
28
|
+
|
29
|
+
cfg = config[ActiveFedora.environment.to_sym] || {}
|
30
|
+
@fedora_config = cfg.kind_of?(Array) ? cfg.map(&:symbolize_keys) : cfg.symbolize_keys
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module FedoraMigrate
|
2
|
+
class Logger
|
3
|
+
|
4
|
+
class << self
|
5
|
+
|
6
|
+
def method_missing method_name, *arguments, &block
|
7
|
+
logger.send(method_name, *arguments, &block)
|
8
|
+
rescue
|
9
|
+
super
|
10
|
+
end
|
11
|
+
|
12
|
+
def respond_to?(method_name, include_private = false)
|
13
|
+
logger.respond_to? method_name
|
14
|
+
end
|
15
|
+
|
16
|
+
def info msg
|
17
|
+
super("FedoraMigrate INFO: ##{caller_locations(1,1)[0].label} " + msg)
|
18
|
+
end
|
19
|
+
|
20
|
+
def warn msg
|
21
|
+
super("FedoraMigrate WARN: ##{caller_locations(1,1)[0].label} " + msg)
|
22
|
+
end
|
23
|
+
|
24
|
+
def fatal msg
|
25
|
+
super("FedoraMigrate FATAL: ##{caller_locations(1,1)[0].label} " + msg)
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def logger
|
31
|
+
ActiveFedora::Base.logger || ::Logger.new(STDOUT)
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|