fedora_2_to_3_pid_renamer 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +10 -6
- data/lib/fedora_2_to_3_pid_renamer/config.rb +3 -0
- data/lib/fedora_2_to_3_pid_renamer/manager.rb +19 -4
- data/lib/fedora_2_to_3_pid_renamer/manipulator/base.rb +45 -0
- data/lib/fedora_2_to_3_pid_renamer/manipulator/text.rb +19 -0
- data/lib/fedora_2_to_3_pid_renamer/manipulator/xml.rb +38 -0
- data/lib/fedora_2_to_3_pid_renamer/manipulator.rb +5 -47
- data/lib/fedora_2_to_3_pid_renamer/version.rb +9 -1
- data/lib/fedora_2_to_3_pid_renamer.rb +1 -1
- data/test/data/cmodel-1.deployments.txt +7 -0
- data/test/data/cmodel-1.members.txt +30 -0
- data/test/units/fedora_2_to_3_pid_renamer/manager_test.rb +28 -5
- data/test/units/fedora_2_to_3_pid_renamer/manipulator/text_test.rb +37 -0
- data/test/units/fedora_2_to_3_pid_renamer/{manipulator_test.rb → manipulator/xml_test.rb} +16 -16
- metadata +13 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a0abd69d649aee20584ec54a1efd3d72f204204a
|
4
|
+
data.tar.gz: c15acd2a1a50a7e985568dda8bb575165ae72cd9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 47470889083bc18f1aa7b543f042af0f629d75930c1bc286fb64cd23351c40064eff2f25e909db3da6fe21a7d998e74908e74621534cfdc30cb448f48e130977
|
7
|
+
data.tar.gz: df95f326d5b43825ec06787381b9e209a97606d3fb0a1619d1b3d34303a9b746692c019c48d77e7379f41817bb9dea2d4dfef3c2d8eec24307fb7a980f4cff77
|
data/README.md
CHANGED
@@ -12,10 +12,10 @@ the process involves running an Analyzer, that outputs a series of files.
|
|
12
12
|
Among these files are a some XML files that describe the data objects that
|
13
13
|
are in the existing Fedora 2 database. These files need to be manipulated
|
14
14
|
so as to modify the Fedora 2 objects so that they will suitable to be inserted
|
15
|
-
into a Fedora 3 database.
|
15
|
+
into a Fedora 3 database. Analyser also creates a set of cmodel-n-deployments.txt
|
16
|
+
files that also need to be manipulated
|
16
17
|
|
17
|
-
This app will carry out the manipulation of the
|
18
|
-
Analyser
|
18
|
+
This app will carry out the manipulation of the files generated by the Analyser
|
19
19
|
|
20
20
|
## Installation
|
21
21
|
|
@@ -29,8 +29,10 @@ To use this app, you must first create a config.yml file. For example:
|
|
29
29
|
|
30
30
|
```
|
31
31
|
changes:
|
32
|
-
CModel1:
|
33
|
-
CModel1-SDep1:
|
32
|
+
CModel1: book
|
33
|
+
CModel1-SDep1: book-SDep1
|
34
|
+
CModel2: thesis
|
35
|
+
CModel2-SDep1: thesis-SDep1
|
34
36
|
|
35
37
|
changeme: foo
|
36
38
|
|
@@ -67,12 +69,14 @@ namespace.
|
|
67
69
|
|
68
70
|
Defines the paths to the input and output folders.
|
69
71
|
|
70
|
-
The input folder should contain the
|
72
|
+
The input folder should contain the files generated by the Analyser
|
71
73
|
|
72
74
|
An output folder is used so that Analyser files are not directly modified by
|
73
75
|
this process. The files can be deleted and the process repeated any number
|
74
76
|
of times until the output is satisfactory.
|
75
77
|
|
78
|
+
Only files that are changed by the process, will be copied to the output folder.
|
79
|
+
|
76
80
|
### locations
|
77
81
|
|
78
82
|
The locations in the XML files, where the content should be modified. Each
|
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
# Manager manages the process of modify a group of XML file as defined via
|
2
|
+
# a config object
|
2
3
|
module Fedora2To3PidRenamer
|
3
4
|
class Manager
|
4
5
|
|
@@ -17,15 +18,29 @@ module Fedora2To3PidRenamer
|
|
17
18
|
create_output_folder
|
18
19
|
Dir.glob(input_file_pattern).each do |input_file|
|
19
20
|
file_name = File.basename input_file
|
21
|
+
manipulator = manipulator_for(file_name)
|
22
|
+
next unless manipulator
|
23
|
+
|
20
24
|
input = File.read input_file
|
21
25
|
output_file = File.join config.output_folder, file_name
|
22
|
-
|
23
|
-
|
26
|
+
|
27
|
+
File.write output_file, manipulator.output_for(input, config)
|
24
28
|
end
|
25
29
|
end
|
26
30
|
|
27
31
|
def input_file_pattern
|
28
|
-
File.join config.input_folder, '
|
32
|
+
File.join config.input_folder, '*'
|
33
|
+
end
|
34
|
+
|
35
|
+
def manipulator_for(file_name)
|
36
|
+
case file_name
|
37
|
+
when /\.xml$/
|
38
|
+
Manipulator::Xml
|
39
|
+
when /cmodel-\d+\.deployments.txt/
|
40
|
+
Manipulator::Text
|
41
|
+
else
|
42
|
+
nil
|
43
|
+
end
|
29
44
|
end
|
30
45
|
end
|
31
46
|
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module Fedora2To3PidRenamer::Manipulator
|
4
|
+
class Base
|
5
|
+
attr_reader :source, :config
|
6
|
+
|
7
|
+
def self.output_for(*args)
|
8
|
+
manipulator = new(*args)
|
9
|
+
manipulator.run
|
10
|
+
manipulator.output
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(source, config)
|
14
|
+
@source = source
|
15
|
+
@config = config
|
16
|
+
end
|
17
|
+
|
18
|
+
def run
|
19
|
+
raise 'run must be defined in sub-classes'
|
20
|
+
end
|
21
|
+
|
22
|
+
def output
|
23
|
+
raise 'output must be defined in sub-classes'
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def change(text, before, after)
|
28
|
+
pattern = Regexp.new("changeme:#{before}(#{end_of_string_or_non_word})")
|
29
|
+
replacement = "#{changeme_replacement}:#{after}#{first_capture_group}"
|
30
|
+
text.gsub! pattern, replacement
|
31
|
+
end
|
32
|
+
|
33
|
+
def end_of_string_or_non_word
|
34
|
+
'$|\W'
|
35
|
+
end
|
36
|
+
|
37
|
+
def first_capture_group
|
38
|
+
'\1'
|
39
|
+
end
|
40
|
+
|
41
|
+
def changeme_replacement
|
42
|
+
config.changeme_replacement
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
module Fedora2To3PidRenamer::Manipulator
|
3
|
+
class Text < Base
|
4
|
+
|
5
|
+
alias_method :text, :source
|
6
|
+
|
7
|
+
def run
|
8
|
+
config.changes.each do |before, after|
|
9
|
+
change(text, before, after)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def output
|
14
|
+
text
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# Manipulator is the tool used to modify the XML files passed to it based on
|
2
|
+
# the settings defined in the config.
|
3
|
+
require 'nokogiri'
|
4
|
+
module Fedora2To3PidRenamer::Manipulator
|
5
|
+
class Xml < Base
|
6
|
+
|
7
|
+
alias_method :raw_xml, :source
|
8
|
+
|
9
|
+
def run
|
10
|
+
modify_text_at_each_location_in_config
|
11
|
+
end
|
12
|
+
|
13
|
+
def xml
|
14
|
+
@xml ||= Nokogiri::XML(raw_xml)
|
15
|
+
end
|
16
|
+
|
17
|
+
def output
|
18
|
+
xml.to_s
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def modify_text_at_each_location_in_config
|
23
|
+
config.locations.each do |location|
|
24
|
+
|
25
|
+
node = xml.xpath(location, config.namespaces).first
|
26
|
+
next unless node
|
27
|
+
|
28
|
+
text = node.value
|
29
|
+
config.changes.each do |before, after|
|
30
|
+
change(text, before, after)
|
31
|
+
end
|
32
|
+
node.value = text
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
@@ -1,50 +1,8 @@
|
|
1
|
-
|
1
|
+
require_relative 'manipulator/base'
|
2
|
+
require_relative 'manipulator/text'
|
3
|
+
require_relative 'manipulator/xml'
|
2
4
|
module Fedora2To3PidRenamer
|
3
|
-
|
4
|
-
|
5
|
-
attr_accessor :raw_xml, :config
|
6
|
-
|
7
|
-
def self.output_for(raw_xml, config)
|
8
|
-
manipulator = new(raw_xml, config)
|
9
|
-
manipulator.run
|
10
|
-
manipulator.output
|
11
|
-
end
|
12
|
-
|
13
|
-
def initialize(raw_xml, config)
|
14
|
-
@raw_xml = raw_xml
|
15
|
-
@config = config
|
16
|
-
end
|
17
|
-
|
18
|
-
def run
|
19
|
-
modify_text_at_each_location_in_config
|
20
|
-
end
|
21
|
-
|
22
|
-
def xml
|
23
|
-
@xml ||= Nokogiri::XML(raw_xml)
|
24
|
-
end
|
25
|
-
|
26
|
-
def output
|
27
|
-
xml.to_s
|
28
|
-
end
|
29
|
-
|
30
|
-
private
|
31
|
-
def modify_text_at_each_location_in_config
|
32
|
-
config.locations.each do |location|
|
33
|
-
|
34
|
-
node = xml.xpath(location, config.namespaces).first
|
35
|
-
next unless node
|
36
|
-
|
37
|
-
text = node.value
|
38
|
-
config.changes.each do |before, after|
|
39
|
-
text.gsub! before, after
|
40
|
-
end
|
41
|
-
|
42
|
-
text.gsub! 'changeme:', (config.changeme_replacement + ':')
|
43
|
-
|
44
|
-
node.value = text
|
45
|
-
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
5
|
+
module Manipulator
|
6
|
+
|
49
7
|
end
|
50
8
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# The following objects will be assigned to cmodel-1
|
2
|
+
# <class-description>
|
3
|
+
# OrigContentModel
|
4
|
+
# 'UVA_MRSID_IMAGE'
|
5
|
+
# BDefPIDs
|
6
|
+
# 'demo:8'
|
7
|
+
# BMechPIDs
|
8
|
+
# 'demo:9'
|
9
|
+
# BindingKeyAssignments
|
10
|
+
# for demo:9
|
11
|
+
# 'MRSID=DS1'
|
12
|
+
# DatastreamIDs
|
13
|
+
# 'DIGIPROV1', 'DS1', 'SOURCE1', 'DESC1', 'RIGHTS1', 'TECH1'
|
14
|
+
# MIMETypes
|
15
|
+
# for DIGIPROV1, 'text/xml'
|
16
|
+
# for DS1, 'image/x-mrsid-image'
|
17
|
+
# for SOURCE1, 'text/xml'
|
18
|
+
# for DESC1, 'text/xml'
|
19
|
+
# for RIGHTS1, 'text/xml'
|
20
|
+
# for TECH1, 'text/xml'
|
21
|
+
# FormatURIs
|
22
|
+
# for DIGIPROV1, none
|
23
|
+
# for DS1, none
|
24
|
+
# for SOURCE1, none
|
25
|
+
# for DESC1, none
|
26
|
+
# for RIGHTS1, none
|
27
|
+
# for TECH1, none
|
28
|
+
# </class-description>
|
29
|
+
demo:11
|
30
|
+
demo:10
|
@@ -4,6 +4,8 @@ require 'fileutils'
|
|
4
4
|
module Fedora2To3PidRenamer
|
5
5
|
class ManagerTest < Minitest::Test
|
6
6
|
|
7
|
+
# Creates data/input and data/output and copies test files into
|
8
|
+
# data/input for start of test
|
7
9
|
def setup
|
8
10
|
FileUtils.mkdir data_file_path('input')
|
9
11
|
FileUtils.mkdir data_file_path('output')
|
@@ -14,8 +16,9 @@ module Fedora2To3PidRenamer
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def teardown
|
17
|
-
|
18
|
-
FileUtils.remove_dir data_file_path('
|
19
|
+
force = true
|
20
|
+
FileUtils.remove_dir data_file_path('input'), force
|
21
|
+
FileUtils.remove_dir data_file_path('output'), force
|
19
22
|
end
|
20
23
|
|
21
24
|
def test_before
|
@@ -36,7 +39,7 @@ module Fedora2To3PidRenamer
|
|
36
39
|
|
37
40
|
def test_run_manipulation
|
38
41
|
manager.run_manipulation
|
39
|
-
|
42
|
+
target_file_to_be_modified.each do |file|
|
40
43
|
output_file = data_file_path(File.join('output', file))
|
41
44
|
assert File.exist?(output_file), "#{output_file} should exist"
|
42
45
|
end
|
@@ -44,7 +47,7 @@ module Fedora2To3PidRenamer
|
|
44
47
|
|
45
48
|
def test_run_manipulation_alters_content
|
46
49
|
manager.run_manipulation
|
47
|
-
xml = xml_load File.join('output',
|
50
|
+
xml = xml_load File.join('output', xml_files_to_be_modified.first)
|
48
51
|
xpath = config.locations.first
|
49
52
|
text = xml.xpath(xpath).text
|
50
53
|
assert_match config.changeme_replacement, text
|
@@ -55,9 +58,29 @@ module Fedora2To3PidRenamer
|
|
55
58
|
end
|
56
59
|
|
57
60
|
def target_files
|
61
|
+
target_file_to_be_modified + target_file_to_be_skipped
|
62
|
+
end
|
63
|
+
|
64
|
+
def xml_files_to_be_modified
|
58
65
|
[
|
59
66
|
'cmodel-1.xml',
|
60
|
-
'cmodel-1.deployment1.xml'
|
67
|
+
'cmodel-1.deployment1.xml'
|
68
|
+
]
|
69
|
+
end
|
70
|
+
|
71
|
+
def text_file_to_be_modified
|
72
|
+
[
|
73
|
+
'cmodel-1.deployments.txt'
|
74
|
+
]
|
75
|
+
end
|
76
|
+
|
77
|
+
def target_file_to_be_modified
|
78
|
+
text_file_to_be_modified + xml_files_to_be_modified
|
79
|
+
end
|
80
|
+
|
81
|
+
def target_file_to_be_skipped
|
82
|
+
[
|
83
|
+
'cmodel-1.members.txt'
|
61
84
|
]
|
62
85
|
end
|
63
86
|
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require_relative '../../../test_helper'
|
2
|
+
|
3
|
+
module Fedora2To3PidRenamer::Manipulator
|
4
|
+
class TextTest < Minitest::Test
|
5
|
+
|
6
|
+
def test_run
|
7
|
+
before = 'changeme:CModel1-SDep1'
|
8
|
+
after = [
|
9
|
+
config.changeme_replacement,
|
10
|
+
config.change_for('CModel1-SDep1')
|
11
|
+
].join(':')
|
12
|
+
|
13
|
+
|
14
|
+
assert_match before, text_manipulator.text
|
15
|
+
refute_match after, text_manipulator.text
|
16
|
+
|
17
|
+
text_manipulator.run
|
18
|
+
|
19
|
+
refute_match before, text_manipulator.text
|
20
|
+
assert_match after, text_manipulator.text
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_text
|
24
|
+
assert_equal file_text, text_manipulator.text
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
def text_manipulator
|
29
|
+
@text_manipulator ||= Text.new(file_text, config)
|
30
|
+
end
|
31
|
+
|
32
|
+
def file_text
|
33
|
+
@file_text ||= file_read 'cmodel-1.deployments.txt'
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
@@ -1,57 +1,57 @@
|
|
1
|
-
require_relative '
|
1
|
+
require_relative '../../../test_helper'
|
2
2
|
|
3
|
-
module Fedora2To3PidRenamer
|
4
|
-
class
|
3
|
+
module Fedora2To3PidRenamer::Manipulator
|
4
|
+
class XmlTest < Minitest::Test
|
5
5
|
|
6
6
|
def test_run
|
7
7
|
before, after = config.changes.first
|
8
|
-
assert_at_xpath_text_changes(before, after) {
|
8
|
+
assert_at_xpath_text_changes(before, after) { xml_manipulator.run }
|
9
9
|
end
|
10
10
|
|
11
11
|
def test_run_modifies_changeme_namespace
|
12
12
|
before = 'changeme:'
|
13
13
|
after = config.changeme_replacement + ':'
|
14
|
-
assert_at_xpath_text_changes(before, after) {
|
14
|
+
assert_at_xpath_text_changes(before, after) { xml_manipulator.run }
|
15
15
|
end
|
16
16
|
|
17
17
|
def test_output
|
18
|
-
assert_equal Nokogiri::XML(xml_raw).to_s,
|
18
|
+
assert_equal Nokogiri::XML(xml_raw).to_s, xml_manipulator.output
|
19
19
|
end
|
20
20
|
|
21
21
|
def test_run_alters_output
|
22
|
-
|
23
|
-
refute_equal Nokogiri::XML(xml_raw).to_s,
|
22
|
+
xml_manipulator.run
|
23
|
+
refute_equal Nokogiri::XML(xml_raw).to_s, xml_manipulator.output
|
24
24
|
end
|
25
25
|
|
26
26
|
def test_run_produces_well_formed_xml
|
27
|
-
|
28
|
-
doc = Nokogiri::XML
|
27
|
+
xml_manipulator.run
|
28
|
+
doc = Nokogiri::XML xml_manipulator.output
|
29
29
|
assert doc.errors.empty?, "Nokogiri should not find errors: #{doc.errors}"
|
30
30
|
end
|
31
31
|
|
32
32
|
def test_output_for
|
33
|
-
|
34
|
-
assert_equal
|
33
|
+
xml_manipulator.run
|
34
|
+
assert_equal xml_manipulator.output, Xml.output_for(xml_raw, config)
|
35
35
|
end
|
36
36
|
|
37
37
|
private
|
38
38
|
def assert_at_xpath_text_changes(before, after)
|
39
39
|
xpath = config.locations.first
|
40
40
|
|
41
|
-
text_before =
|
41
|
+
text_before = xml_manipulator.xml.xpath(xpath, config.namespaces).text
|
42
42
|
assert_match before, text_before
|
43
43
|
refute_match after, text_before
|
44
44
|
|
45
45
|
yield
|
46
46
|
|
47
|
-
text_after =
|
47
|
+
text_after = xml_manipulator.xml.xpath(xpath, config.namespaces).text
|
48
48
|
refute_match before, text_after
|
49
49
|
assert_match after, text_after
|
50
50
|
end
|
51
51
|
|
52
52
|
|
53
|
-
def
|
54
|
-
@
|
53
|
+
def xml_manipulator
|
54
|
+
@xml_manipulator ||= Xml.new(xml_raw, config)
|
55
55
|
end
|
56
56
|
|
57
57
|
def xml_raw
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fedora_2_to_3_pid_renamer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Nichols
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-09-
|
11
|
+
date: 2015-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Interates through a series of xml files and modifies their content based
|
14
14
|
on the setting defined in a configuration file
|
@@ -30,15 +30,21 @@ files:
|
|
30
30
|
- lib/fedora_2_to_3_pid_renamer/config.rb
|
31
31
|
- lib/fedora_2_to_3_pid_renamer/manager.rb
|
32
32
|
- lib/fedora_2_to_3_pid_renamer/manipulator.rb
|
33
|
+
- lib/fedora_2_to_3_pid_renamer/manipulator/base.rb
|
34
|
+
- lib/fedora_2_to_3_pid_renamer/manipulator/text.rb
|
35
|
+
- lib/fedora_2_to_3_pid_renamer/manipulator/xml.rb
|
33
36
|
- lib/fedora_2_to_3_pid_renamer/version.rb
|
34
37
|
- test/data/cmodel-1.deployment1.xml
|
38
|
+
- test/data/cmodel-1.deployments.txt
|
39
|
+
- test/data/cmodel-1.members.txt
|
35
40
|
- test/data/cmodel-1.xml
|
36
41
|
- test/data/config.yml
|
37
42
|
- test/data/simple.xml
|
38
43
|
- test/test_helper.rb
|
39
44
|
- test/units/fedora_2_to_3_pid_renamer/config_test.rb
|
40
45
|
- test/units/fedora_2_to_3_pid_renamer/manager_test.rb
|
41
|
-
- test/units/fedora_2_to_3_pid_renamer/
|
46
|
+
- test/units/fedora_2_to_3_pid_renamer/manipulator/text_test.rb
|
47
|
+
- test/units/fedora_2_to_3_pid_renamer/manipulator/xml_test.rb
|
42
48
|
- test/units/fedora_2_to_3_pid_renamer_test.rb
|
43
49
|
homepage: https://github.com/reggieb/fedora_2_to_3_pid_renamer
|
44
50
|
licenses: []
|
@@ -66,11 +72,14 @@ summary: A small Ruby app used to apply alternative pid names to configuration f
|
|
66
72
|
created during the migration of fedora 2 data to fedora 3
|
67
73
|
test_files:
|
68
74
|
- test/data/cmodel-1.deployment1.xml
|
75
|
+
- test/data/cmodel-1.deployments.txt
|
76
|
+
- test/data/cmodel-1.members.txt
|
69
77
|
- test/data/cmodel-1.xml
|
70
78
|
- test/data/config.yml
|
71
79
|
- test/data/simple.xml
|
72
80
|
- test/test_helper.rb
|
73
81
|
- test/units/fedora_2_to_3_pid_renamer/config_test.rb
|
74
82
|
- test/units/fedora_2_to_3_pid_renamer/manager_test.rb
|
75
|
-
- test/units/fedora_2_to_3_pid_renamer/
|
83
|
+
- test/units/fedora_2_to_3_pid_renamer/manipulator/text_test.rb
|
84
|
+
- test/units/fedora_2_to_3_pid_renamer/manipulator/xml_test.rb
|
76
85
|
- test/units/fedora_2_to_3_pid_renamer_test.rb
|