fedora-migrate 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +19 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +31 -0
  8. data/Rakefile +5 -0
  9. data/config/fedora.yml +14 -0
  10. data/config/fedora3.yml +12 -0
  11. data/config/jetty.yml +6 -0
  12. data/config/solr.yml +15 -0
  13. data/fedora-migrate.gemspec +30 -0
  14. data/lib/fedora-migrate.rb +82 -0
  15. data/lib/fedora_migrate/datastream_mover.rb +78 -0
  16. data/lib/fedora_migrate/errors.rb +7 -0
  17. data/lib/fedora_migrate/file_configurator.rb +34 -0
  18. data/lib/fedora_migrate/hooks.rb +11 -0
  19. data/lib/fedora_migrate/logger.rb +36 -0
  20. data/lib/fedora_migrate/migration_options.rb +11 -0
  21. data/lib/fedora_migrate/mover.rb +44 -0
  22. data/lib/fedora_migrate/object_mover.rb +62 -0
  23. data/lib/fedora_migrate/permissions.rb +32 -0
  24. data/lib/fedora_migrate/permissions_mover.rb +31 -0
  25. data/lib/fedora_migrate/rdf_datastream_mover.rb +28 -0
  26. data/lib/fedora_migrate/rdf_datastream_parser.rb +29 -0
  27. data/lib/fedora_migrate/rels_ext_datastream_mover.rb +90 -0
  28. data/lib/fedora_migrate/repository_migrator.rb +60 -0
  29. data/lib/fedora_migrate/rights_metadata.rb +281 -0
  30. data/lib/fedora_migrate/rubydora_connection.rb +21 -0
  31. data/lib/fedora_migrate/triple_converter.rb +39 -0
  32. data/lib/fedora_migrate/version.rb +3 -0
  33. data/lib/tasks/fedora-migrate.rake +45 -0
  34. data/spec/fixtures/datastreams/rdf_ntriples_datastream.txt +2 -0
  35. data/spec/fixtures/datastreams/sufia-rb68xc089-characterization.xml +27 -0
  36. data/spec/fixtures/objects/f3-migration-a.xml +110 -0
  37. data/spec/fixtures/objects/gf-versioned-content.xml +2776 -0
  38. data/spec/fixtures/objects/sufia-batch-gf-1.xml +94 -0
  39. data/spec/fixtures/objects/sufia-batch-gf-2.xml +93 -0
  40. data/spec/fixtures/objects/sufia-batch.xml +51 -0
  41. data/spec/integration/content_versions_spec.rb +42 -0
  42. data/spec/integration/fedora3_interface_spec.rb +23 -0
  43. data/spec/integration/object_migration_spec.rb +112 -0
  44. data/spec/integration/permission_migration_spec.rb +13 -0
  45. data/spec/integration/rdf_migration_spec.rb +22 -0
  46. data/spec/integration/relationship_migration_spec.rb +51 -0
  47. data/spec/integration/repository_migration_spec.rb +59 -0
  48. data/spec/spec_helper.rb +39 -0
  49. data/spec/support/example_model.rb +36 -0
  50. data/spec/unit/datastream_mover_spec.rb +39 -0
  51. data/spec/unit/fedora_migrate_spec.rb +19 -0
  52. data/spec/unit/file_configurator_spec.rb +17 -0
  53. data/spec/unit/mover_spec.rb +39 -0
  54. data/spec/unit/object_mover_spec.rb +38 -0
  55. data/spec/unit/permissions_mover_spec.rb +53 -0
  56. data/spec/unit/rdf_datastream_mover_spec.rb +8 -0
  57. data/spec/unit/rdf_datastream_parser_spec.rb +38 -0
  58. data/spec/unit/rels_ext_datastream_mover_spec.rb +36 -0
  59. data/spec/unit/repository_migrator_spec.rb +43 -0
  60. data/spec/unit/rubydora_connection_spec.rb +25 -0
  61. data/spec/unit/triple_converter_spec.rb +35 -0
  62. data/tasks/dev.rake +37 -0
  63. metadata +246 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1cd495240ef6ef07dafbfa15720b1c5eca3a4146
4
+ data.tar.gz: 4d59a16932745f6082133cf3a5492a024e499f84
5
+ SHA512:
6
+ metadata.gz: 3a906945035f3966d5c5ecb24668081da701e52a5cec8f881b0b75d3e938ee72a53354008315911e1dd3914793311f9efb79991aeb78199ef210f659e6b9c111
7
+ data.tar.gz: 18a47d1a58619bd9a57f4489537f79a35655f184eac5f3a47e2424f5feef646946e407d1bdc6c52504cd529d42f7f61bfa98935dab8f8e98c83e881178660ff6
data/.gitignore ADDED
@@ -0,0 +1,16 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
15
+ jetty
16
+ .ruby-version
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,19 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1
4
+ - 2.0
5
+ env:
6
+ global:
7
+ - NOKOGIRI_USE_SYSTEM_LIBRARIES=true
8
+ notifications:
9
+ email:
10
+ recipients:
11
+ - "ul-dlt-hydra@lists.psu.edu"
12
+ on_success: "change"
13
+ on_failure: "always"
14
+ irc:
15
+ channels:
16
+ - "irc.freenode.org#scholarsphere"
17
+ - "irc.freenode.org#projecthydra"
18
+ template:
19
+ - "%{repository}//%{branch}@%{commit} by %{author}: %{message} - %{build_url}"
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fedora-migrate.gemspec
4
+ gemspec
5
+
6
+ gem 'byebug'
7
+ gem 'pry'
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Adam Wead
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # FedoraMigrate
2
+
3
+ Migrates content from a Fedora3 repository to a Fedora4 one.
4
+
5
+ ## Status
6
+
7
+ Very alpha. This has been tested against Penn State's existing Scholarsohere applications, as well
8
+ as generic Sufia applications.
9
+
10
+ ## TODOs
11
+
12
+ See the list of issues.
13
+
14
+ ## Contributing
15
+
16
+ ### Hydra Developers
17
+
18
+ For Hydra developers, or anyone with a signed CLA, please clone the repo and submit PRs via
19
+ topic branches. If you don't have rights to projecthydra-labs and do have a signed
20
+ CLA, please send a note to hydra-tech@googlegroups.com.
21
+
22
+ 1. Clone it
23
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
24
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
25
+ 4. Push to the branch (`git push origin my-new-feature`)
26
+ 5. Create a new Pull Request
27
+
28
+ ### Non-Hydra Developers
29
+
30
+ Anyone is welcome to use this software and report issues.
31
+ In order to merge any work contributed, you'll need to sign a contributor license agreement.
data/Rakefile ADDED
@@ -0,0 +1,5 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ Dir.glob('tasks/*.rake').each { |r| import r }
4
+
5
+ task default: :ci
data/config/fedora.yml ADDED
@@ -0,0 +1,14 @@
1
+ development:
2
+ user: fedoraAdmin
3
+ password: fedoraAdmin
4
+ url: http://127.0.0.1:8983/fedora/rest
5
+ base_path: /dev
6
+ test:
7
+ user: fedoraAdmin
8
+ password: fedoraAdmin
9
+ url: http://localhost:8983/fedora/rest
10
+ base_path: /test
11
+ production:
12
+ user: fedoraAdmin
13
+ password: fedoraAdmin
14
+ url: http://127.0.0.1:8983/fedora/rest
@@ -0,0 +1,12 @@
1
+ development:
2
+ user: fedoraAdmin
3
+ password: fedoraAdmin
4
+ url: http://localhost:8983/fedora3
5
+ test:
6
+ user: fedoraAdmin
7
+ password: fedoraAdmin
8
+ url: http://localhost:8983/fedora3
9
+ production:
10
+ user: fedoraAdmin
11
+ password: fedoraAdmin
12
+ url: http://localhost:8983/fedora3
data/config/jetty.yml ADDED
@@ -0,0 +1,6 @@
1
+ default:
2
+ startup_wait: 90
3
+ jetty_port: 8983
4
+ java_opts:
5
+ - "-Xmx256m"
6
+ - "-XX:MaxPermSize=128m"
data/config/solr.yml ADDED
@@ -0,0 +1,15 @@
1
+ development:
2
+ default:
3
+ url: http://localhost:8983/solr/development
4
+ full_text:
5
+ url: http://localhost:8983/solr/development
6
+ test:
7
+ default:
8
+ url: http://localhost:8983/solr/test
9
+ full_text:
10
+ url: http://localhost:8983/solr/test
11
+ production:
12
+ default:
13
+ url: http://localhost:8080/solr/production
14
+ full_text:
15
+ url: http://localhost:8080/solr/production
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fedora_migrate/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "fedora-migrate"
8
+ spec.version = FedoraMigrate::VERSION
9
+ spec.authors = ["Adam Wead"]
10
+ spec.email = ["amsterdamos@gmail.com"]
11
+ spec.summary = %q{Migrate Hydra-based repository data from Fedora3 to Fedora4}
12
+ spec.description = %q{Migrates data (models, datastreams, content) from a Fedora3 repository to Fedora4}
13
+ spec.homepage = ""
14
+ spec.license = "APACHE2"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "rubydora", "~> 1.8"
22
+ spec.add_dependency "hydra-head", "~> 9.0.0.beta1"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.7"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_development_dependency "rspec"
27
+ spec.add_development_dependency "jettywrapper"
28
+ spec.add_development_dependency "equivalent-xml"
29
+ spec.add_development_dependency "sufia", "~> 6.0.0.beta1"
30
+ end
@@ -0,0 +1,82 @@
1
+ require "fedora_migrate/version"
2
+ require "active_support"
3
+ require "active_fedora"
4
+ require "hydra-core"
5
+
6
+ # Loads rake tasks
7
+ Dir[File.expand_path(File.join(File.dirname(__FILE__),"tasks/*.rake"))].each { |ext| load ext } if defined?(Rake)
8
+
9
+ # Shenanigans because we're not in a Rails environment and we need
10
+ # Hydra::AccessControls
11
+ Hydra::Engine.config.autoload_paths.each { |path| $LOAD_PATH.unshift path }
12
+ # in gem version 2.4, .find_by_name isn't pulling up gems given in the Gemfile
13
+ # as opposed to those in the gemspec file.
14
+ # This is a workaround:
15
+ Gem::Specification.all.each do |g|
16
+ HAC_DIR = g.gem_dir if g.name.match("hydra-access-controls")
17
+ end
18
+ require HAC_DIR+'/app/vocabularies/acl'
19
+ require HAC_DIR+'/app/vocabularies/hydra/acl'
20
+ require HAC_DIR+'/app/models/role_mapper'
21
+ require HAC_DIR+'/app/models/ability'
22
+ require HAC_DIR+'/app/models/hydra/access_controls/access_control_list'
23
+ require HAC_DIR+'/app/models/hydra/access_controls/permission'
24
+ require HAC_DIR+'/app/models/hydra/access_controls/embargo'
25
+ require HAC_DIR+'/app/models/hydra/access_controls/lease'
26
+ require HAC_DIR+'/app/services/hydra/lease_service'
27
+ require HAC_DIR+'/app/services/hydra/embargo_service'
28
+ require HAC_DIR+'/app/validators/hydra/future_date_validator'
29
+
30
+ module FedoraMigrate
31
+ extend ActiveSupport::Autoload
32
+
33
+ autoload :DatastreamMover
34
+ autoload :Errors
35
+ autoload :FileConfigurator
36
+ autoload :Hooks
37
+ autoload :Logger
38
+ autoload :MigrationOptions
39
+ autoload :Mover
40
+ autoload :ObjectMover
41
+ autoload :Permissions
42
+ autoload :PermissionsMover
43
+ autoload :RDFDatastreamMover
44
+ autoload :RDFDatastreamParser
45
+ autoload :RelsExtDatastreamMover
46
+ autoload :RepositoryMigrator
47
+ autoload :RightsMetadata
48
+ autoload :RubydoraConnection
49
+ autoload :TripleConverter
50
+
51
+ class << self
52
+ attr_reader :fedora_config, :config_options, :source
53
+ attr_accessor :configurator
54
+
55
+ def fedora_config
56
+ @fedora_config ||= ActiveFedora::Config.new(configurator.fedora3_config)
57
+ end
58
+
59
+ def config_options
60
+ @config_options ||= "comming soon!"
61
+ end
62
+
63
+ def source
64
+ @source ||= FedoraMigrate::RubydoraConnection.new(fedora_config.credentials)
65
+ end
66
+
67
+ def find id
68
+ FedoraMigrate.source.connection.find(id)
69
+ end
70
+
71
+ def migrate_repository args
72
+ migrator = FedoraMigrate::RepositoryMigrator.new(args[:namespace], args[:options])
73
+ migrator.migrate_objects
74
+ migrator.migrate_relationships
75
+ migrator.results
76
+ end
77
+
78
+ end
79
+
80
+ self.configurator ||= FedoraMigrate::FileConfigurator.new
81
+
82
+ end
@@ -0,0 +1,78 @@
1
+ module FedoraMigrate
2
+ class DatastreamMover < Mover
3
+
4
+ attr_accessor :versionable
5
+
6
+ def post_initialize
7
+ raise FedoraMigrate::Errors::MigrationError, "You must supply a target" if target.nil?
8
+ end
9
+
10
+ def versionable?
11
+ versionable.nil? ? target_versionable? : versionable
12
+ end
13
+
14
+ def target_versionable?
15
+ if target.respond_to?(:versionable?)
16
+ target.versionable?
17
+ else
18
+ false
19
+ end
20
+ end
21
+
22
+ def migrate
23
+ if versionable?
24
+ migrate_versions
25
+ else
26
+ migrate_current
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ # Reloading the target, otherwise #get_checksum is nil
33
+ def migrate_current
34
+ migrate_content
35
+ target.reload
36
+ verify
37
+ end
38
+
39
+ def migrate_versions
40
+ source.versions.each do |version|
41
+ migrate_content(version)
42
+ target.create_version
43
+ verify(version)
44
+ end
45
+ end
46
+
47
+ # TODO: lastModified isn't the right place for the original creation date (issue #1)
48
+ def migrate_content datastream=nil
49
+ datastream ||= source
50
+ if datastream.content.nil?
51
+ Logger.info "datastream '#{datastream.dsid}' is nil. It's probably defined in the target but not present in the source"
52
+ return true
53
+ end
54
+ target.content = datastream.content
55
+ target.original_name = datastream.label
56
+ target.mime_type = datastream.mimeType
57
+ target.last_modified = datastream.createDate
58
+ Logger.info "#{target.inspect}"
59
+ save
60
+ end
61
+
62
+ # TODO: Reporting mechanism? If there isn't a checksum it defaults to "none" (issue #4)
63
+ def verify datastream=nil
64
+ datastream ||= source
65
+ target_checksum = get_checksum
66
+ return true if datastream.checksum == "none"
67
+ unless datastream.checksum == target_checksum.split(/:/).last
68
+ Logger.fatal "expected #{datastream.dsid} #{datastream.checksumType} #{datastream.checksum} to match #{target_checksum}"
69
+ end
70
+ end
71
+
72
+ def get_checksum
73
+ target.digest.first.to_s
74
+ end
75
+
76
+ end
77
+
78
+ end
@@ -0,0 +1,7 @@
1
+ module FedoraMigrate
2
+ module Errors
3
+ # raise this error when moving or mirgating content
4
+ class MigrationError < StandardError
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,34 @@
1
+ module FedoraMigrate
2
+ class FileConfigurator < ActiveFedora::FileConfigurator
3
+
4
+ def fedora3_config
5
+ load_fedora3_config
6
+ @fedora_config
7
+ end
8
+
9
+ def load_fedora3_config
10
+ return @fedora_config unless @fedora_config.empty?
11
+ @fedora_config_path = get_config_path(:fedora3)
12
+ Logger.info("loading fedora config from #{::File.expand_path(@fedora_config_path)}")
13
+
14
+ begin
15
+ config_erb = ERB.new(IO.read(@fedora_config_path)).result(binding)
16
+ rescue Exception => e
17
+ raise("fedora.yml was found, but could not be parsed with ERB. \n#{$!.inspect}")
18
+ end
19
+
20
+ begin
21
+ fedora_yml = YAML.load(config_erb)
22
+ rescue Psych::SyntaxError => e
23
+ raise "fedora.yml was found, but could not be parsed. " \
24
+ "Error #{e.message}"
25
+ end
26
+
27
+ config = fedora_yml.symbolize_keys
28
+
29
+ cfg = config[ActiveFedora.environment.to_sym] || {}
30
+ @fedora_config = cfg.kind_of?(Array) ? cfg.map(&:symbolize_keys) : cfg.symbolize_keys
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,11 @@
1
+ module FedoraMigrate
2
+ module Hooks
3
+
4
+ def before_object_migration
5
+ end
6
+
7
+ def after_object_migration
8
+ end
9
+
10
+ end
11
+ end
@@ -0,0 +1,36 @@
1
+ module FedoraMigrate
2
+ class Logger
3
+
4
+ class << self
5
+
6
+ def method_missing method_name, *arguments, &block
7
+ logger.send(method_name, *arguments, &block)
8
+ rescue
9
+ super
10
+ end
11
+
12
+ def respond_to?(method_name, include_private = false)
13
+ logger.respond_to? method_name
14
+ end
15
+
16
+ def info msg
17
+ super("FedoraMigrate INFO: ##{caller_locations(1,1)[0].label} " + msg)
18
+ end
19
+
20
+ def warn msg
21
+ super("FedoraMigrate WARN: ##{caller_locations(1,1)[0].label} " + msg)
22
+ end
23
+
24
+ def fatal msg
25
+ super("FedoraMigrate FATAL: ##{caller_locations(1,1)[0].label} " + msg)
26
+ end
27
+
28
+ private
29
+
30
+ def logger
31
+ ActiveFedora::Base.logger || ::Logger.new(STDOUT)
32
+ end
33
+
34
+ end
35
+ end
36
+ end