fedora-migrate 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +19 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +31 -0
  8. data/Rakefile +5 -0
  9. data/config/fedora.yml +14 -0
  10. data/config/fedora3.yml +12 -0
  11. data/config/jetty.yml +6 -0
  12. data/config/solr.yml +15 -0
  13. data/fedora-migrate.gemspec +30 -0
  14. data/lib/fedora-migrate.rb +82 -0
  15. data/lib/fedora_migrate/datastream_mover.rb +78 -0
  16. data/lib/fedora_migrate/errors.rb +7 -0
  17. data/lib/fedora_migrate/file_configurator.rb +34 -0
  18. data/lib/fedora_migrate/hooks.rb +11 -0
  19. data/lib/fedora_migrate/logger.rb +36 -0
  20. data/lib/fedora_migrate/migration_options.rb +11 -0
  21. data/lib/fedora_migrate/mover.rb +44 -0
  22. data/lib/fedora_migrate/object_mover.rb +62 -0
  23. data/lib/fedora_migrate/permissions.rb +32 -0
  24. data/lib/fedora_migrate/permissions_mover.rb +31 -0
  25. data/lib/fedora_migrate/rdf_datastream_mover.rb +28 -0
  26. data/lib/fedora_migrate/rdf_datastream_parser.rb +29 -0
  27. data/lib/fedora_migrate/rels_ext_datastream_mover.rb +90 -0
  28. data/lib/fedora_migrate/repository_migrator.rb +60 -0
  29. data/lib/fedora_migrate/rights_metadata.rb +281 -0
  30. data/lib/fedora_migrate/rubydora_connection.rb +21 -0
  31. data/lib/fedora_migrate/triple_converter.rb +39 -0
  32. data/lib/fedora_migrate/version.rb +3 -0
  33. data/lib/tasks/fedora-migrate.rake +45 -0
  34. data/spec/fixtures/datastreams/rdf_ntriples_datastream.txt +2 -0
  35. data/spec/fixtures/datastreams/sufia-rb68xc089-characterization.xml +27 -0
  36. data/spec/fixtures/objects/f3-migration-a.xml +110 -0
  37. data/spec/fixtures/objects/gf-versioned-content.xml +2776 -0
  38. data/spec/fixtures/objects/sufia-batch-gf-1.xml +94 -0
  39. data/spec/fixtures/objects/sufia-batch-gf-2.xml +93 -0
  40. data/spec/fixtures/objects/sufia-batch.xml +51 -0
  41. data/spec/integration/content_versions_spec.rb +42 -0
  42. data/spec/integration/fedora3_interface_spec.rb +23 -0
  43. data/spec/integration/object_migration_spec.rb +112 -0
  44. data/spec/integration/permission_migration_spec.rb +13 -0
  45. data/spec/integration/rdf_migration_spec.rb +22 -0
  46. data/spec/integration/relationship_migration_spec.rb +51 -0
  47. data/spec/integration/repository_migration_spec.rb +59 -0
  48. data/spec/spec_helper.rb +39 -0
  49. data/spec/support/example_model.rb +36 -0
  50. data/spec/unit/datastream_mover_spec.rb +39 -0
  51. data/spec/unit/fedora_migrate_spec.rb +19 -0
  52. data/spec/unit/file_configurator_spec.rb +17 -0
  53. data/spec/unit/mover_spec.rb +39 -0
  54. data/spec/unit/object_mover_spec.rb +38 -0
  55. data/spec/unit/permissions_mover_spec.rb +53 -0
  56. data/spec/unit/rdf_datastream_mover_spec.rb +8 -0
  57. data/spec/unit/rdf_datastream_parser_spec.rb +38 -0
  58. data/spec/unit/rels_ext_datastream_mover_spec.rb +36 -0
  59. data/spec/unit/repository_migrator_spec.rb +43 -0
  60. data/spec/unit/rubydora_connection_spec.rb +25 -0
  61. data/spec/unit/triple_converter_spec.rb +35 -0
  62. data/tasks/dev.rake +37 -0
  63. metadata +246 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1cd495240ef6ef07dafbfa15720b1c5eca3a4146
4
+ data.tar.gz: 4d59a16932745f6082133cf3a5492a024e499f84
5
+ SHA512:
6
+ metadata.gz: 3a906945035f3966d5c5ecb24668081da701e52a5cec8f881b0b75d3e938ee72a53354008315911e1dd3914793311f9efb79991aeb78199ef210f659e6b9c111
7
+ data.tar.gz: 18a47d1a58619bd9a57f4489537f79a35655f184eac5f3a47e2424f5feef646946e407d1bdc6c52504cd529d42f7f61bfa98935dab8f8e98c83e881178660ff6
data/.gitignore ADDED
@@ -0,0 +1,16 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
15
+ jetty
16
+ .ruby-version
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,19 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1
4
+ - 2.0
5
+ env:
6
+ global:
7
+ - NOKOGIRI_USE_SYSTEM_LIBRARIES=true
8
+ notifications:
9
+ email:
10
+ recipients:
11
+ - "ul-dlt-hydra@lists.psu.edu"
12
+ on_success: "change"
13
+ on_failure: "always"
14
+ irc:
15
+ channels:
16
+ - "irc.freenode.org#scholarsphere"
17
+ - "irc.freenode.org#projecthydra"
18
+ template:
19
+ - "%{repository}//%{branch}@%{commit} by %{author}: %{message} - %{build_url}"
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fedora-migrate.gemspec
4
+ gemspec
5
+
6
+ gem 'byebug'
7
+ gem 'pry'
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Adam Wead
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # FedoraMigrate
2
+
3
+ Migrates content from a Fedora3 repository to a Fedora4 one.
4
+
5
+ ## Status
6
+
7
+ Very alpha. This has been tested against Penn State's existing Scholarsohere applications, as well
8
+ as generic Sufia applications.
9
+
10
+ ## TODOs
11
+
12
+ See the list of issues.
13
+
14
+ ## Contributing
15
+
16
+ ### Hydra Developers
17
+
18
+ For Hydra developers, or anyone with a signed CLA, please clone the repo and submit PRs via
19
+ topic branches. If you don't have rights to projecthydra-labs and do have a signed
20
+ CLA, please send a note to hydra-tech@googlegroups.com.
21
+
22
+ 1. Clone it
23
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
24
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
25
+ 4. Push to the branch (`git push origin my-new-feature`)
26
+ 5. Create a new Pull Request
27
+
28
+ ### Non-Hydra Developers
29
+
30
+ Anyone is welcome to use this software and report issues.
31
+ In order to merge any work contributed, you'll need to sign a contributor license agreement.
data/Rakefile ADDED
@@ -0,0 +1,5 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ Dir.glob('tasks/*.rake').each { |r| import r }
4
+
5
+ task default: :ci
data/config/fedora.yml ADDED
@@ -0,0 +1,14 @@
1
+ development:
2
+ user: fedoraAdmin
3
+ password: fedoraAdmin
4
+ url: http://127.0.0.1:8983/fedora/rest
5
+ base_path: /dev
6
+ test:
7
+ user: fedoraAdmin
8
+ password: fedoraAdmin
9
+ url: http://localhost:8983/fedora/rest
10
+ base_path: /test
11
+ production:
12
+ user: fedoraAdmin
13
+ password: fedoraAdmin
14
+ url: http://127.0.0.1:8983/fedora/rest
@@ -0,0 +1,12 @@
1
+ development:
2
+ user: fedoraAdmin
3
+ password: fedoraAdmin
4
+ url: http://localhost:8983/fedora3
5
+ test:
6
+ user: fedoraAdmin
7
+ password: fedoraAdmin
8
+ url: http://localhost:8983/fedora3
9
+ production:
10
+ user: fedoraAdmin
11
+ password: fedoraAdmin
12
+ url: http://localhost:8983/fedora3
data/config/jetty.yml ADDED
@@ -0,0 +1,6 @@
1
+ default:
2
+ startup_wait: 90
3
+ jetty_port: 8983
4
+ java_opts:
5
+ - "-Xmx256m"
6
+ - "-XX:MaxPermSize=128m"
data/config/solr.yml ADDED
@@ -0,0 +1,15 @@
1
+ development:
2
+ default:
3
+ url: http://localhost:8983/solr/development
4
+ full_text:
5
+ url: http://localhost:8983/solr/development
6
+ test:
7
+ default:
8
+ url: http://localhost:8983/solr/test
9
+ full_text:
10
+ url: http://localhost:8983/solr/test
11
+ production:
12
+ default:
13
+ url: http://localhost:8080/solr/production
14
+ full_text:
15
+ url: http://localhost:8080/solr/production
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fedora_migrate/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "fedora-migrate"
8
+ spec.version = FedoraMigrate::VERSION
9
+ spec.authors = ["Adam Wead"]
10
+ spec.email = ["amsterdamos@gmail.com"]
11
+ spec.summary = %q{Migrate Hydra-based repository data from Fedora3 to Fedora4}
12
+ spec.description = %q{Migrates data (models, datastreams, content) from a Fedora3 repository to Fedora4}
13
+ spec.homepage = ""
14
+ spec.license = "APACHE2"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "rubydora", "~> 1.8"
22
+ spec.add_dependency "hydra-head", "~> 9.0.0.beta1"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.7"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_development_dependency "rspec"
27
+ spec.add_development_dependency "jettywrapper"
28
+ spec.add_development_dependency "equivalent-xml"
29
+ spec.add_development_dependency "sufia", "~> 6.0.0.beta1"
30
+ end
@@ -0,0 +1,82 @@
1
+ require "fedora_migrate/version"
2
+ require "active_support"
3
+ require "active_fedora"
4
+ require "hydra-core"
5
+
6
+ # Loads rake tasks
7
+ Dir[File.expand_path(File.join(File.dirname(__FILE__),"tasks/*.rake"))].each { |ext| load ext } if defined?(Rake)
8
+
9
+ # Shenanigans because we're not in a Rails environment and we need
10
+ # Hydra::AccessControls
11
+ Hydra::Engine.config.autoload_paths.each { |path| $LOAD_PATH.unshift path }
12
+ # in gem version 2.4, .find_by_name isn't pulling up gems given in the Gemfile
13
+ # as opposed to those in the gemspec file.
14
+ # This is a workaround:
15
+ Gem::Specification.all.each do |g|
16
+ HAC_DIR = g.gem_dir if g.name.match("hydra-access-controls")
17
+ end
18
+ require HAC_DIR+'/app/vocabularies/acl'
19
+ require HAC_DIR+'/app/vocabularies/hydra/acl'
20
+ require HAC_DIR+'/app/models/role_mapper'
21
+ require HAC_DIR+'/app/models/ability'
22
+ require HAC_DIR+'/app/models/hydra/access_controls/access_control_list'
23
+ require HAC_DIR+'/app/models/hydra/access_controls/permission'
24
+ require HAC_DIR+'/app/models/hydra/access_controls/embargo'
25
+ require HAC_DIR+'/app/models/hydra/access_controls/lease'
26
+ require HAC_DIR+'/app/services/hydra/lease_service'
27
+ require HAC_DIR+'/app/services/hydra/embargo_service'
28
+ require HAC_DIR+'/app/validators/hydra/future_date_validator'
29
+
30
+ module FedoraMigrate
31
+ extend ActiveSupport::Autoload
32
+
33
+ autoload :DatastreamMover
34
+ autoload :Errors
35
+ autoload :FileConfigurator
36
+ autoload :Hooks
37
+ autoload :Logger
38
+ autoload :MigrationOptions
39
+ autoload :Mover
40
+ autoload :ObjectMover
41
+ autoload :Permissions
42
+ autoload :PermissionsMover
43
+ autoload :RDFDatastreamMover
44
+ autoload :RDFDatastreamParser
45
+ autoload :RelsExtDatastreamMover
46
+ autoload :RepositoryMigrator
47
+ autoload :RightsMetadata
48
+ autoload :RubydoraConnection
49
+ autoload :TripleConverter
50
+
51
+ class << self
52
+ attr_reader :fedora_config, :config_options, :source
53
+ attr_accessor :configurator
54
+
55
+ def fedora_config
56
+ @fedora_config ||= ActiveFedora::Config.new(configurator.fedora3_config)
57
+ end
58
+
59
+ def config_options
60
+ @config_options ||= "comming soon!"
61
+ end
62
+
63
+ def source
64
+ @source ||= FedoraMigrate::RubydoraConnection.new(fedora_config.credentials)
65
+ end
66
+
67
+ def find id
68
+ FedoraMigrate.source.connection.find(id)
69
+ end
70
+
71
+ def migrate_repository args
72
+ migrator = FedoraMigrate::RepositoryMigrator.new(args[:namespace], args[:options])
73
+ migrator.migrate_objects
74
+ migrator.migrate_relationships
75
+ migrator.results
76
+ end
77
+
78
+ end
79
+
80
+ self.configurator ||= FedoraMigrate::FileConfigurator.new
81
+
82
+ end
@@ -0,0 +1,78 @@
1
+ module FedoraMigrate
2
+ class DatastreamMover < Mover
3
+
4
+ attr_accessor :versionable
5
+
6
+ def post_initialize
7
+ raise FedoraMigrate::Errors::MigrationError, "You must supply a target" if target.nil?
8
+ end
9
+
10
+ def versionable?
11
+ versionable.nil? ? target_versionable? : versionable
12
+ end
13
+
14
+ def target_versionable?
15
+ if target.respond_to?(:versionable?)
16
+ target.versionable?
17
+ else
18
+ false
19
+ end
20
+ end
21
+
22
+ def migrate
23
+ if versionable?
24
+ migrate_versions
25
+ else
26
+ migrate_current
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ # Reloading the target, otherwise #get_checksum is nil
33
+ def migrate_current
34
+ migrate_content
35
+ target.reload
36
+ verify
37
+ end
38
+
39
+ def migrate_versions
40
+ source.versions.each do |version|
41
+ migrate_content(version)
42
+ target.create_version
43
+ verify(version)
44
+ end
45
+ end
46
+
47
+ # TODO: lastModified isn't the right place for the original creation date (issue #1)
48
+ def migrate_content datastream=nil
49
+ datastream ||= source
50
+ if datastream.content.nil?
51
+ Logger.info "datastream '#{datastream.dsid}' is nil. It's probably defined in the target but not present in the source"
52
+ return true
53
+ end
54
+ target.content = datastream.content
55
+ target.original_name = datastream.label
56
+ target.mime_type = datastream.mimeType
57
+ target.last_modified = datastream.createDate
58
+ Logger.info "#{target.inspect}"
59
+ save
60
+ end
61
+
62
+ # TODO: Reporting mechanism? If there isn't a checksum it defaults to "none" (issue #4)
63
+ def verify datastream=nil
64
+ datastream ||= source
65
+ target_checksum = get_checksum
66
+ return true if datastream.checksum == "none"
67
+ unless datastream.checksum == target_checksum.split(/:/).last
68
+ Logger.fatal "expected #{datastream.dsid} #{datastream.checksumType} #{datastream.checksum} to match #{target_checksum}"
69
+ end
70
+ end
71
+
72
+ def get_checksum
73
+ target.digest.first.to_s
74
+ end
75
+
76
+ end
77
+
78
+ end
@@ -0,0 +1,7 @@
1
+ module FedoraMigrate
2
+ module Errors
3
+ # raise this error when moving or mirgating content
4
+ class MigrationError < StandardError
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,34 @@
1
+ module FedoraMigrate
2
+ class FileConfigurator < ActiveFedora::FileConfigurator
3
+
4
+ def fedora3_config
5
+ load_fedora3_config
6
+ @fedora_config
7
+ end
8
+
9
+ def load_fedora3_config
10
+ return @fedora_config unless @fedora_config.empty?
11
+ @fedora_config_path = get_config_path(:fedora3)
12
+ Logger.info("loading fedora config from #{::File.expand_path(@fedora_config_path)}")
13
+
14
+ begin
15
+ config_erb = ERB.new(IO.read(@fedora_config_path)).result(binding)
16
+ rescue Exception => e
17
+ raise("fedora.yml was found, but could not be parsed with ERB. \n#{$!.inspect}")
18
+ end
19
+
20
+ begin
21
+ fedora_yml = YAML.load(config_erb)
22
+ rescue Psych::SyntaxError => e
23
+ raise "fedora.yml was found, but could not be parsed. " \
24
+ "Error #{e.message}"
25
+ end
26
+
27
+ config = fedora_yml.symbolize_keys
28
+
29
+ cfg = config[ActiveFedora.environment.to_sym] || {}
30
+ @fedora_config = cfg.kind_of?(Array) ? cfg.map(&:symbolize_keys) : cfg.symbolize_keys
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,11 @@
1
+ module FedoraMigrate
2
+ module Hooks
3
+
4
+ def before_object_migration
5
+ end
6
+
7
+ def after_object_migration
8
+ end
9
+
10
+ end
11
+ end
@@ -0,0 +1,36 @@
1
+ module FedoraMigrate
2
+ class Logger
3
+
4
+ class << self
5
+
6
+ def method_missing method_name, *arguments, &block
7
+ logger.send(method_name, *arguments, &block)
8
+ rescue
9
+ super
10
+ end
11
+
12
+ def respond_to?(method_name, include_private = false)
13
+ logger.respond_to? method_name
14
+ end
15
+
16
+ def info msg
17
+ super("FedoraMigrate INFO: ##{caller_locations(1,1)[0].label} " + msg)
18
+ end
19
+
20
+ def warn msg
21
+ super("FedoraMigrate WARN: ##{caller_locations(1,1)[0].label} " + msg)
22
+ end
23
+
24
+ def fatal msg
25
+ super("FedoraMigrate FATAL: ##{caller_locations(1,1)[0].label} " + msg)
26
+ end
27
+
28
+ private
29
+
30
+ def logger
31
+ ActiveFedora::Base.logger || ::Logger.new(STDOUT)
32
+ end
33
+
34
+ end
35
+ end
36
+ end