assembly-utils 1.4.5 → 1.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/.gitignore +2 -1
- data/.rubocop.yml +17 -0
- data/.rubocop_todo.yml +196 -0
- data/.travis.yml +8 -0
- data/Gemfile +1 -2
- data/README.md +7 -4
- data/Rakefile +2 -2
- data/assembly-utils.gemspec +14 -15
- data/config/boot.rb +1 -1
- data/config/connect_to_dor.rb +1 -1
- data/lib/assembly-utils.rb +30 -32
- data/lib/assembly-utils/utils.rb +320 -384
- data/lib/assembly-utils/version.rb +2 -2
- data/spec/spec_helper.rb +5 -11
- data/spec/utils_spec.rb +52 -52
- metadata +35 -46
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
MDNhMTdiOWQ5ZTliYmMzYTU1OWY4NzJkYzU2MzZkOTUwMzZiZmEwYQ==
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 3cab491b6f6fa7b7ef55be7efe3778c938f86021
|
4
|
+
data.tar.gz: 3dd67bee16cac2409e1838c69d9baa1404d6d748
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
ZmEzYjliYzkyODIzYzhlMTRmZjEyOWRmNDk1MDQzYTQwZmIyM2RkODAzZjUx
|
11
|
-
YmY0NTVmNzY1YWQ2MWVkNGIyOWM4ZmM2OTIxODdjYmZhZmVjZWI=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
MDRmOGZkNTQ2OWI3ZTQ2YjA3YjIwNTZlODUyZGE1NWE4NDE0ZjI2Nzg3NWQ2
|
14
|
-
MDhjMmRmNmY4OGUwN2IzNmUwZTUyNDlkZWIzYjUyZGY2ZmFmMGY3Njg2ZTUw
|
15
|
-
ZmU3ZGY0YTA1MzYxMmEwMjE3MzI2ZDQ3MWJjM2VmNmVhOGM1NDA=
|
6
|
+
metadata.gz: 229e597cc957ad504ea2f90f3b6299908ef9b07853ab3e6fcfde235d7aa5cabeca6a6d3ffc5654699fe081b2c5a0189ff6e638400e3c9bee61ad35efcb385a28
|
7
|
+
data.tar.gz: 09f142c4c163d41ee9e6e62f1c2129317190a99d7c2c0c8ba41202dffcc2300570dba2d081f4bb9c004169f651e3dddc02f87543a76a18864c4841847cdddda9
|
data/.gitignore
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
2
|
+
|
3
|
+
# rspec expect{...} is conventional
|
4
|
+
Style/SpaceBeforeBlockBraces:
|
5
|
+
Exclude:
|
6
|
+
- 'spec/**/*_spec.rb'
|
7
|
+
|
8
|
+
# Allow for alignment
|
9
|
+
Style/SpaceAroundOperators:
|
10
|
+
MultiSpaceAllowedForOperators: true
|
11
|
+
|
12
|
+
Style/EmptyLineBetweenDefs:
|
13
|
+
AllowAdjacentOneLineDefs: true
|
14
|
+
|
15
|
+
# Configuration parameters: AllowURI, URISchemes.
|
16
|
+
Metrics/LineLength:
|
17
|
+
Max: 200
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,196 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2016-01-04 15:54:00 -0800 using RuboCop version 0.35.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 2
|
10
|
+
Lint/AmbiguousOperator:
|
11
|
+
Exclude:
|
12
|
+
- 'lib/assembly-utils/utils.rb'
|
13
|
+
|
14
|
+
# Offense count: 1
|
15
|
+
# Cop supports --auto-correct.
|
16
|
+
# Configuration parameters: AlignWith, SupportedStyles, AutoCorrect.
|
17
|
+
Lint/DefEndAlignment:
|
18
|
+
Enabled: false
|
19
|
+
|
20
|
+
# Offense count: 2
|
21
|
+
Lint/HandleExceptions:
|
22
|
+
Exclude:
|
23
|
+
- 'lib/assembly-utils/utils.rb'
|
24
|
+
|
25
|
+
# Offense count: 2
|
26
|
+
Lint/RescueException:
|
27
|
+
Exclude:
|
28
|
+
- 'lib/assembly-utils/utils.rb'
|
29
|
+
|
30
|
+
# Offense count: 1
|
31
|
+
# Cop supports --auto-correct.
|
32
|
+
# Configuration parameters: IgnoreEmptyBlocks.
|
33
|
+
Lint/UnusedBlockArgument:
|
34
|
+
Exclude:
|
35
|
+
- 'lib/assembly-utils/utils.rb'
|
36
|
+
|
37
|
+
# Offense count: 1
|
38
|
+
# Cop supports --auto-correct.
|
39
|
+
# Configuration parameters: AllowUnusedKeywordArguments, IgnoreEmptyMethods.
|
40
|
+
Lint/UnusedMethodArgument:
|
41
|
+
Exclude:
|
42
|
+
- 'lib/assembly-utils/utils.rb'
|
43
|
+
|
44
|
+
# Offense count: 2
|
45
|
+
Lint/UselessAssignment:
|
46
|
+
Exclude:
|
47
|
+
- 'lib/assembly-utils/utils.rb'
|
48
|
+
- 'spec/spec_helper.rb'
|
49
|
+
|
50
|
+
# Offense count: 6
|
51
|
+
Metrics/AbcSize:
|
52
|
+
Max: 59
|
53
|
+
|
54
|
+
# Offense count: 1
|
55
|
+
# Configuration parameters: CountComments.
|
56
|
+
Metrics/ClassLength:
|
57
|
+
Max: 385
|
58
|
+
|
59
|
+
# Offense count: 4
|
60
|
+
Metrics/CyclomaticComplexity:
|
61
|
+
Max: 19
|
62
|
+
|
63
|
+
# Offense count: 1
|
64
|
+
# Configuration parameters: AllowURI, URISchemes.
|
65
|
+
Metrics/LineLength:
|
66
|
+
Max: 226
|
67
|
+
|
68
|
+
# Offense count: 10
|
69
|
+
# Configuration parameters: CountComments.
|
70
|
+
Metrics/MethodLength:
|
71
|
+
Max: 44
|
72
|
+
|
73
|
+
# Offense count: 4
|
74
|
+
Metrics/PerceivedComplexity:
|
75
|
+
Max: 20
|
76
|
+
|
77
|
+
# Offense count: 4
|
78
|
+
# Cop supports --auto-correct.
|
79
|
+
# Configuration parameters: EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle, SupportedLastArgumentHashStyles.
|
80
|
+
Style/AlignHash:
|
81
|
+
Exclude:
|
82
|
+
- 'lib/assembly-utils/utils.rb'
|
83
|
+
|
84
|
+
# Offense count: 1
|
85
|
+
# Cop supports --auto-correct.
|
86
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles, ProceduralMethods, FunctionalMethods, IgnoredMethods.
|
87
|
+
Style/BlockDelimiters:
|
88
|
+
Exclude:
|
89
|
+
- 'lib/assembly-utils/utils.rb'
|
90
|
+
|
91
|
+
# Offense count: 4
|
92
|
+
# Cop supports --auto-correct.
|
93
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
94
|
+
Style/BracesAroundHashParameters:
|
95
|
+
Exclude:
|
96
|
+
- 'spec/utils_spec.rb'
|
97
|
+
|
98
|
+
# Offense count: 1
|
99
|
+
# Configuration parameters: Exclude.
|
100
|
+
Style/Documentation:
|
101
|
+
Exclude:
|
102
|
+
- 'spec/**/*'
|
103
|
+
- 'test/**/*'
|
104
|
+
- 'lib/assembly-utils.rb'
|
105
|
+
|
106
|
+
# Offense count: 6
|
107
|
+
# Cop supports --auto-correct.
|
108
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
109
|
+
Style/EmptyLinesAroundBlockBody:
|
110
|
+
Exclude:
|
111
|
+
- 'assembly-utils.gemspec'
|
112
|
+
- 'spec/spec_helper.rb'
|
113
|
+
- 'spec/utils_spec.rb'
|
114
|
+
|
115
|
+
# Offense count: 1
|
116
|
+
# Cop supports --auto-correct.
|
117
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
118
|
+
Style/EmptyLinesAroundClassBody:
|
119
|
+
Exclude:
|
120
|
+
- 'lib/assembly-utils/utils.rb'
|
121
|
+
|
122
|
+
# Offense count: 4
|
123
|
+
# Cop supports --auto-correct.
|
124
|
+
Style/EmptyLinesAroundMethodBody:
|
125
|
+
Exclude:
|
126
|
+
- 'lib/assembly-utils/utils.rb'
|
127
|
+
|
128
|
+
# Offense count: 1
|
129
|
+
# Configuration parameters: Exclude.
|
130
|
+
Style/FileName:
|
131
|
+
Exclude:
|
132
|
+
- 'lib/assembly-utils.rb'
|
133
|
+
|
134
|
+
# Offense count: 23
|
135
|
+
# Cop supports --auto-correct.
|
136
|
+
# Configuration parameters: SupportedStyles, UseHashRocketsWithSymbolValues.
|
137
|
+
Style/HashSyntax:
|
138
|
+
EnforcedStyle: hash_rockets
|
139
|
+
|
140
|
+
# Offense count: 3
|
141
|
+
# Configuration parameters: NamePrefix, NamePrefixBlacklist, NameWhitelist.
|
142
|
+
Style/PredicateName:
|
143
|
+
Exclude:
|
144
|
+
- 'lib/assembly-utils/utils.rb'
|
145
|
+
|
146
|
+
# Offense count: 2
|
147
|
+
# Cop supports --auto-correct.
|
148
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles, AllowInnerSlashes.
|
149
|
+
Style/RegexpLiteral:
|
150
|
+
Exclude:
|
151
|
+
- 'spec/utils_spec.rb'
|
152
|
+
|
153
|
+
# Offense count: 1
|
154
|
+
# Cop supports --auto-correct.
|
155
|
+
Style/RescueModifier:
|
156
|
+
Exclude:
|
157
|
+
- 'spec/utils_spec.rb'
|
158
|
+
|
159
|
+
# Offense count: 2
|
160
|
+
# Cop supports --auto-correct.
|
161
|
+
# Configuration parameters: AllowAsExpressionSeparator.
|
162
|
+
Style/Semicolon:
|
163
|
+
Exclude:
|
164
|
+
- 'lib/assembly-utils/utils.rb'
|
165
|
+
- 'spec/spec_helper.rb'
|
166
|
+
|
167
|
+
# Offense count: 5
|
168
|
+
# Cop supports --auto-correct.
|
169
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
170
|
+
Style/SignalException:
|
171
|
+
Exclude:
|
172
|
+
- 'lib/assembly-utils/utils.rb'
|
173
|
+
|
174
|
+
# Offense count: 2
|
175
|
+
# Cop supports --auto-correct.
|
176
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
177
|
+
Style/SpaceBeforeBlockBraces:
|
178
|
+
Enabled: false
|
179
|
+
|
180
|
+
# Offense count: 27
|
181
|
+
# Cop supports --auto-correct.
|
182
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles, EnforcedStyleForEmptyBraces, SpaceBeforeBlockParameters.
|
183
|
+
Style/SpaceInsideBlockBraces:
|
184
|
+
Enabled: false
|
185
|
+
|
186
|
+
# Offense count: 10
|
187
|
+
# Cop supports --auto-correct.
|
188
|
+
Style/SpaceInsideBrackets:
|
189
|
+
Exclude:
|
190
|
+
- 'lib/assembly-utils.rb'
|
191
|
+
|
192
|
+
# Offense count: 14
|
193
|
+
# Cop supports --auto-correct.
|
194
|
+
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces, SupportedStyles.
|
195
|
+
Style/SpaceInsideHashLiteralBraces:
|
196
|
+
Enabled: false
|
data/.travis.yml
ADDED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
+
[](https://travis-ci.org/sul-dlss/assembly-utils)
|
2
|
+
[](https://gemnasium.com/sul-dlss/assembly-utils)
|
3
|
+
|
1
4
|
# Assembly Utils Gem
|
2
5
|
|
3
|
-
## Overview
|
6
|
+
## Overview
|
4
7
|
Ruby gem of methods useful for assembly and accessioning. Contains classes to
|
5
8
|
manipulate DOR objects for assembly and accessioning.
|
6
9
|
|
@@ -25,10 +28,10 @@ manipulate DOR objects for assembly and accessioning.
|
|
25
28
|
* 1.1.0 small updates to some methods to depracate `apo_workflow` checking method
|
26
29
|
* 1.1.1 add dor-workflow-service gem and convenience method to auto reset
|
27
30
|
all objects in a specific state back to waiting
|
28
|
-
* 1.1.2 allow `reset_workflow_state` to accept a state parameter
|
31
|
+
* 1.1.2 allow `reset_workflow_state` to accept a state parameter
|
29
32
|
* 1.1.3 add a reindex method
|
30
33
|
* 1.1.4 fixed delete object method to remove from solr too; fix cleanup
|
31
|
-
object method to remove both old and new style druid trees
|
34
|
+
object method to remove both old and new style druid trees
|
32
35
|
* 1.1.5 add new claim druid method
|
33
36
|
* 1.1.6 add new step to remove workflows during `cleanup_object`
|
34
37
|
* 1.1.7 change ordering of cleanup steps so if deleting workflow fails, the
|
@@ -45,7 +48,7 @@ manipulate DOR objects for assembly and accessioning.
|
|
45
48
|
* 1.2.5-1.2.7 update gemfile to newer version of dor-services gem and other related/dependent gems
|
46
49
|
* 1.2.8 add a new constant for technical metadata filename
|
47
50
|
* 1.4.1-2 update gems and fix typos
|
48
|
-
|
51
|
+
* 1.4.6 rubocop fixes, and other refactoring
|
49
52
|
|
50
53
|
## Running tests
|
51
54
|
|
data/Rakefile
CHANGED
data/assembly-utils.gemspec
CHANGED
@@ -1,14 +1,14 @@
|
|
1
|
-
$LOAD_PATH.push File.expand_path(
|
1
|
+
$LOAD_PATH.push File.expand_path('../lib', __FILE__)
|
2
2
|
require 'assembly-utils/version'
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = 'assembly-utils'
|
6
6
|
s.version = Assembly::Utils::VERSION
|
7
|
-
s.authors = [
|
8
|
-
s.email = [
|
9
|
-
s.homepage =
|
10
|
-
s.summary =
|
11
|
-
s.description =
|
7
|
+
s.authors = ['Peter Mangiafico', 'Monty Hindman']
|
8
|
+
s.email = ['pmangiafico@stanford.edu']
|
9
|
+
s.homepage = ''
|
10
|
+
s.summary = 'Ruby gem of methods usesful for assembly and accessioning.'
|
11
|
+
s.description = 'Contains classes to manipulate DOR objects for assembly and accessioning'
|
12
12
|
|
13
13
|
s.rubyforge_project = 'assembly-utils'
|
14
14
|
|
@@ -20,17 +20,16 @@ Gem::Specification.new do |s|
|
|
20
20
|
s.add_dependency 'nokogiri'
|
21
21
|
s.add_dependency 'csv-mapper'
|
22
22
|
s.add_dependency 'fastercsv'
|
23
|
-
s.add_dependency 'druid-tools',
|
24
|
-
|
25
|
-
s.add_dependency 'dor-services',
|
23
|
+
s.add_dependency 'druid-tools', '>= 0.2.6'
|
24
|
+
|
25
|
+
s.add_dependency 'dor-services', '~> 5.3'
|
26
26
|
s.add_dependency 'dor-workflow-service', '>=1.3.1'
|
27
|
-
|
27
|
+
|
28
28
|
s.add_dependency 'activesupport'
|
29
29
|
s.add_dependency 'activeresource'
|
30
|
-
s.add_dependency 'addressable', '2.3.5' # avoids ERROR: lib/addressable/uri.rb:1659:in `normalized_fragment': can't modify frozen Addressable::URI (RuntimeError)
|
31
30
|
|
32
|
-
s.add_development_dependency
|
33
|
-
s.add_development_dependency
|
34
|
-
s.add_development_dependency
|
35
|
-
|
31
|
+
s.add_development_dependency 'rake'
|
32
|
+
s.add_development_dependency 'rspec', '~> 3.1'
|
33
|
+
s.add_development_dependency 'yard'
|
34
|
+
|
36
35
|
end
|
data/config/boot.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
CERT_DIR = File.join(File.dirname(__FILE__),
|
2
|
+
CERT_DIR = File.join(File.dirname(__FILE__), '.', 'certs')
|
3
3
|
|
4
4
|
environment = ENV['ENVIRONMENT'] || ENV['ROBOT_ENVIRONMENT'] || ENV['RAILS_ENV'] || 'development'
|
5
5
|
project_root = File.expand_path(File.dirname(__FILE__) + '/..')
|
data/config/connect_to_dor.rb
CHANGED
data/lib/assembly-utils.rb
CHANGED
@@ -1,37 +1,35 @@
|
|
1
1
|
module Assembly
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
]
|
32
|
-
|
2
|
+
# Base PURL URL
|
3
|
+
PURL_BASE_URL = 'http://purl.stanford.edu'
|
4
|
+
|
5
|
+
# Default content metadata file present at root of each object directory
|
6
|
+
CONTENT_MD_FILE = 'contentMetadata.xml'
|
7
|
+
|
8
|
+
# Default descriptive metadata file present at root of each object directory
|
9
|
+
DESC_MD_FILE = 'descMetadata.xml'
|
10
|
+
|
11
|
+
# Default technical metadata file present at root of each object directory
|
12
|
+
TECHNICAL_MD_FILE = 'technicalMetadata.xml'
|
13
|
+
|
14
|
+
# Defaut DOR workspace directory
|
15
|
+
DOR_WORKSPACE = '/dor/workspace'
|
16
|
+
|
17
|
+
# Default assembly directory, can be overwritten by the value set in the project specific YAML configuration
|
18
|
+
ASSEMBLY_WORKSPACE = '/dor/assembly'
|
19
|
+
|
20
|
+
# Assembly workflow name
|
21
|
+
ASSEMBLY_WF = 'assemblyWF'
|
22
|
+
|
23
|
+
# Assembly workflow steps, used for cleanup
|
24
|
+
ASSEMBLY_WF_STEPS = [
|
25
|
+
[ 'start-assembly', 'completed' ],
|
26
|
+
[ 'jp2-create', 'waiting' ],
|
27
|
+
[ 'checksum-compute', 'waiting' ],
|
28
|
+
[ 'exif-collect', 'waiting' ],
|
29
|
+
[ 'accessioning-initiate', 'waiting' ]
|
30
|
+
]
|
33
31
|
end
|
34
32
|
|
35
33
|
require 'dor-services'
|
36
34
|
# auto-include all files in the lib sub-directory directory
|
37
|
-
Dir[File.dirname(__FILE__) + '/assembly-utils/*.rb'].each {|file| require file unless file=='version.rb'}
|
35
|
+
Dir[File.dirname(__FILE__) + '/assembly-utils/*.rb'].each {|file| require file unless file == 'version.rb'}
|
data/lib/assembly-utils/utils.rb
CHANGED
@@ -4,19 +4,16 @@ require 'csv-mapper'
|
|
4
4
|
require 'druid-tools'
|
5
5
|
|
6
6
|
begin
|
7
|
-
require 'net/ssh/kerberos'
|
7
|
+
require 'net/ssh/kerberos'
|
8
8
|
rescue LoadError
|
9
9
|
end
|
10
10
|
|
11
11
|
module Assembly
|
12
|
-
|
13
|
-
|
14
12
|
# The Utils class contains methods to help with accessioning and assembly
|
15
13
|
class Utils
|
16
|
-
|
17
14
|
WFS = Dor::WorkflowService
|
18
15
|
REPO = 'dor'
|
19
|
-
|
16
|
+
|
20
17
|
# Get the staging directory tree given a druid, and optionally prepend a basepath.
|
21
18
|
# Deprecated and should not be needed anymore.
|
22
19
|
#
|
@@ -28,60 +25,45 @@ module Assembly
|
|
28
25
|
# Example:
|
29
26
|
# puts Assembly::Utils.get_staging_path('aa000aa0001','tmp')
|
30
27
|
# > "tmp/aa/000/aa/0001"
|
31
|
-
def self.get_staging_path(pid,base_path=nil)
|
32
|
-
d=DruidTools::Druid.new(pid,base_path)
|
33
|
-
|
34
|
-
return path
|
28
|
+
def self.get_staging_path(pid, base_path = nil)
|
29
|
+
d = DruidTools::Druid.new(pid, base_path)
|
30
|
+
File.dirname(d.path)
|
35
31
|
end
|
36
32
|
|
37
33
|
# Insert the specified workflow into the specified object.
|
38
34
|
#
|
39
35
|
# @param [String] pid druid pid (e.g. 'aa000aa0001')
|
40
36
|
# @param [String] workflow name (e.g. 'accessionWF')
|
41
|
-
# @param [String] repository name (e.g. 'dor') -- optional, defaults to dor
|
37
|
+
# @param [String] repository name (e.g. 'dor') -- optional, defaults to dor
|
42
38
|
#
|
43
39
|
# @return [boolean] indicates success of web service call
|
44
40
|
#
|
45
41
|
# Example:
|
46
42
|
# puts Assembly::Utils.insert_workflow('druid:aa000aa0001','accessionWF')
|
47
43
|
# > true
|
48
|
-
def self.insert_workflow(pid,workflow,repo='dor')
|
49
|
-
url
|
44
|
+
def self.insert_workflow(pid, workflow, repo = 'dor')
|
45
|
+
url = "#{Dor::Config.dor.service_root}/objects/#{pid}/apo_workflows/#{workflow}"
|
50
46
|
result = RestClient.post url, {}
|
51
|
-
|
47
|
+
[200, 201, 202, 204].include?(result.code) && result
|
52
48
|
end
|
53
|
-
|
49
|
+
|
54
50
|
# Claim a specific druid as already used to be sure it won't get used again.
|
55
51
|
# Not needed for normal purposes, only if you manually register something in Fedora Admin outside of DOR services gem.
|
56
52
|
#
|
57
53
|
# @param [String] pid druid pid (e.g. 'aa000aa0001')
|
58
|
-
#
|
59
54
|
# @return [boolean] indicates success of web service call
|
60
55
|
#
|
61
56
|
# Example:
|
62
57
|
# puts Assembly::Utils.claim_druid('aa000aa0001')
|
63
58
|
# > true
|
64
59
|
def self.claim_druid(pid)
|
65
|
-
sc
|
66
|
-
url
|
67
|
-
rcr
|
68
|
-
resp
|
69
|
-
|
60
|
+
sc = Dor::Config.suri
|
61
|
+
url = "#{sc.url}/suri2/namespaces/#{sc.id_namespace}"
|
62
|
+
rcr = RestClient::Resource.new(url, :user => sc.user, :password => sc.pass)
|
63
|
+
resp = rcr["identifiers/#{pid}"].put('')
|
64
|
+
resp.code == '204'
|
70
65
|
end
|
71
|
-
|
72
|
-
# Force a full re-index of the supplied druid in solr and fedora.
|
73
|
-
#
|
74
|
-
# @param [String] druid druid (e.g. 'druid:aa000aa0001')
|
75
|
-
#
|
76
|
-
# Example:
|
77
|
-
# puts Assembly::Utils.reindex('druid:aa000aa0001')
|
78
|
-
def self.reindex(druid)
|
79
|
-
obj = Dor.load_instance druid
|
80
|
-
solr_doc = obj.to_solr
|
81
|
-
Dor::SearchService.solr.add(solr_doc, :add_attributes => {:commitWithin => 1000}) unless obj.nil?
|
82
|
-
Dor.find(pid).update_index
|
83
|
-
end
|
84
|
-
|
66
|
+
|
85
67
|
# Export one or more objects given a single or array of pids, with output to the specified directory as FOXML files
|
86
68
|
#
|
87
69
|
# @param [Array] pids - an array of pids to export (can also pass a single pid as a string)
|
@@ -89,9 +71,9 @@ module Assembly
|
|
89
71
|
#
|
90
72
|
# Example:
|
91
73
|
# Assembly::Utils.export_objects(['druid:aa000aa0001','druid:bb000bb0001'],'/tmp')
|
92
|
-
def self.export_objects(pids,output_dir)
|
93
|
-
pids=[pids] if pids.class==String
|
94
|
-
pids.each {|pid| ActiveFedora::FixtureExporter.export_to_path(pid, output_dir)}
|
74
|
+
def self.export_objects(pids, output_dir)
|
75
|
+
pids = [pids] if pids.class == String
|
76
|
+
pids.each {|pid| ActiveFedora::FixtureExporter.export_to_path(pid, output_dir)}
|
95
77
|
end
|
96
78
|
|
97
79
|
# Import all of the FOXML files in the specified directory into Fedora
|
@@ -102,30 +84,28 @@ module Assembly
|
|
102
84
|
# Assembly::Utils.import_objects('/tmp')
|
103
85
|
def self.import_objects(source_dir)
|
104
86
|
Dir.chdir(source_dir)
|
105
|
-
files=Dir.glob('*.foxml.xml')
|
87
|
+
files = Dir.glob('*.foxml.xml')
|
106
88
|
files.each do |file|
|
107
|
-
pid = ActiveFedora::FixtureLoader.import_to_fedora(File.join(source_dir,file))
|
89
|
+
pid = ActiveFedora::FixtureLoader.import_to_fedora(File.join(source_dir, file))
|
108
90
|
ActiveFedora::FixtureLoader.index(pid)
|
109
91
|
end
|
110
92
|
end
|
111
|
-
|
112
|
-
# Get a list of druids that match the given array of source IDs.
|
93
|
+
|
94
|
+
# Get a list of druids that match the given array of source IDs.
|
113
95
|
# This method only works when this gem is used in a project that is configured to connect to DOR
|
114
96
|
#
|
115
97
|
# @param [String] source_ids array of source ids to lookup
|
116
|
-
#
|
117
98
|
# @return [array] druids
|
118
99
|
# Example:
|
119
|
-
#
|
120
100
|
# puts Assembly::Utils.get_druids_by_sourceid(['revs-01','revs-02'])
|
121
101
|
# > ['druid:aa000aa0001','druid:aa000aa0002']
|
122
102
|
def self.get_druids_by_sourceid(source_ids)
|
123
|
-
druids=[]
|
124
|
-
source_ids.each {|sid| druids
|
103
|
+
druids = []
|
104
|
+
source_ids.each {|sid| druids << Dor::SearchService.query_by_id(sid)}
|
125
105
|
druids.flatten
|
126
106
|
end
|
127
|
-
|
128
|
-
# Show the workflow status of specific steps in assembly and/or accession workflows for the provided druids.
|
107
|
+
|
108
|
+
# Show the workflow status of specific steps in assembly and/or accession workflows for the provided druids.
|
129
109
|
# This method only works when this gem is used in a project that is configured to connect to DOR
|
130
110
|
#
|
131
111
|
# @param [Hash] params parameters specified as a hash, using symbols for options:
|
@@ -137,41 +117,40 @@ module Assembly
|
|
137
117
|
#
|
138
118
|
# Example:
|
139
119
|
# Assembly::Utils.workflow_status(:druids=>['druid:aa000aa0001','druid:aa000aa0002'],:workflows=>[:assembly,:accession],:filename=>'output.csv')
|
140
|
-
def self.workflow_status(params={})
|
141
|
-
|
142
|
-
druids=params[:druids] || []
|
143
|
-
workflows=params[:workflows] || [:assembly]
|
144
|
-
filename=params[:filename] || ''
|
120
|
+
def self.workflow_status(params = {})
|
145
121
|
|
122
|
+
druids = params[:druids] || []
|
123
|
+
workflows = params[:workflows] || [:assembly]
|
124
|
+
filename = params[:filename] || ''
|
146
125
|
accession_steps = %w(content-metadata descriptive-metadata rights-metadata remediate-object shelve publish)
|
147
|
-
assembly_steps
|
126
|
+
assembly_steps = %w(jp2-create checksum-compute exif-collect accessioning-initiate)
|
148
127
|
|
149
|
-
puts
|
128
|
+
puts 'Generating report'
|
150
129
|
|
151
|
-
csv = CSV.open(filename,
|
152
|
-
|
153
|
-
header=[
|
154
|
-
header << assembly_steps
|
130
|
+
csv = CSV.open(filename, 'w') if filename != ''
|
131
|
+
|
132
|
+
header = ['druid']
|
133
|
+
header << assembly_steps if workflows.include?(:assembly)
|
155
134
|
header << accession_steps if workflows.include?(:accession)
|
156
135
|
csv << header.flatten if filename != ''
|
157
|
-
puts header.join(',')
|
158
|
-
|
136
|
+
puts header.join(',')
|
137
|
+
|
159
138
|
druids.each do |druid|
|
160
|
-
output=[druid]
|
161
|
-
assembly_steps.each
|
162
|
-
accession_steps.each {|step| output <<
|
139
|
+
output = [druid]
|
140
|
+
assembly_steps.each {|step| output << get_workflow_status(druid, 'assemblyWF', step )} if workflows.include?(:assembly)
|
141
|
+
accession_steps.each {|step| output << get_workflow_status(druid, 'accessionWF', step)} if workflows.include?(:accession)
|
163
142
|
csv << output if filename != ''
|
164
143
|
puts output.join(',')
|
165
144
|
end
|
166
|
-
|
145
|
+
|
167
146
|
if filename != ''
|
168
|
-
csv.close
|
147
|
+
csv.close
|
169
148
|
puts "Report generated in #{filename}"
|
170
149
|
end
|
171
150
|
|
172
151
|
end
|
173
152
|
|
174
|
-
# Show the workflow status of a specific step in a specific workflow for the provided druid.
|
153
|
+
# Show the workflow status of a specific step in a specific workflow for the provided druid.
|
175
154
|
# This method only works when this gem is used in a project that is configured to connect to DOR
|
176
155
|
#
|
177
156
|
# @param [string] druid a druid string
|
@@ -183,63 +162,60 @@ module Assembly
|
|
183
162
|
# Example:
|
184
163
|
# puts Assembly::Utils.get_workflow_status('druid:aa000aa0001','assemblyWF','jp2-create')
|
185
164
|
# > "completed"
|
186
|
-
def self.get_workflow_status(druid,workflow,step)
|
187
|
-
Dor::WorkflowService.get_workflow_status('dor', druid, workflow, step)
|
188
|
-
end
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
:
|
213
|
-
:
|
214
|
-
:
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
end
|
237
|
-
|
238
|
-
# Cleanup a single objects and associated files given a druid. WARNING: VERY DESTRUCTIVE.
|
165
|
+
def self.get_workflow_status(druid, workflow, step)
|
166
|
+
Dor::WorkflowService.get_workflow_status('dor', druid, workflow, step)
|
167
|
+
end
|
168
|
+
|
169
|
+
# Cleanup a list of objects and associated files given a list of druids. WARNING: VERY DESTRUCTIVE.
|
170
|
+
# This method only works when this gem is used in a project that is configured to connect to DOR
|
171
|
+
#
|
172
|
+
# @param [Hash] params parameters specified as a hash, using symbols for options:
|
173
|
+
# * :druids => array of druids to cleanup
|
174
|
+
# * :steps => an array of steps, specified as symbols, indicating steps to be run, options are:
|
175
|
+
# :stacks=This will remove all files from the stacks that were shelved for the objects
|
176
|
+
# :dor=This will delete objects from Fedora
|
177
|
+
# :stage=This will delete the staged content in the assembly workspace
|
178
|
+
# :symlinks=This will remove the symlink from the dor workspace
|
179
|
+
# :workflows=This will remove the assemblyWF and accessoiningWF workflows for this object
|
180
|
+
# * :dry_run => do not actually clean up (defaults to false)
|
181
|
+
#
|
182
|
+
# Example:
|
183
|
+
# Assembly::Utils.cleanup(:druids=>['druid:aa000aa0001','druid:aa000aa0002'],:steps=>[:stacks,:dor,:stage,:symlinks,:workflows])
|
184
|
+
def self.cleanup(params = {})
|
185
|
+
druids = params[:druids] || []
|
186
|
+
steps = params[:steps] || []
|
187
|
+
dry_run = params[:dry_run] || false
|
188
|
+
|
189
|
+
allowed_steps = {:stacks => 'This will remove all files from the stacks that were shelved for the objects',
|
190
|
+
:dor => 'This will delete objects from Fedora',
|
191
|
+
:stage => "This will delete the staged content in #{Assembly::ASSEMBLY_WORKSPACE}",
|
192
|
+
:symlinks => "This will remove the symlink from #{Assembly::DOR_WORKSPACE}",
|
193
|
+
:workflows => 'This will remove the accessionWF and assemblyWF workflows'}
|
194
|
+
|
195
|
+
num_steps = 0
|
196
|
+
|
197
|
+
puts 'THIS IS A DRY RUN' if dry_run
|
198
|
+
|
199
|
+
Assembly::Utils.confirm "Run on '#{ENV['ROBOT_ENVIRONMENT']}'? Any response other than 'y' or 'yes' will stop the cleanup now."
|
200
|
+
Assembly::Utils.confirm 'Are you really sure you want to run on production? CLEANUP IS NOT REVERSIBLE' if ENV['ROBOT_ENVIRONMENT'] == 'production'
|
201
|
+
|
202
|
+
steps.each do |step|
|
203
|
+
if allowed_steps.keys.include?(step)
|
204
|
+
Assembly::Utils.confirm "Run step '#{step}'? #{allowed_steps[step]}. Any response other than 'y' or 'yes' will stop the cleanup now."
|
205
|
+
num_steps += 1 # count the valid steps found and agreed to
|
206
|
+
end
|
207
|
+
end
|
208
|
+
raise 'no valid steps specified for cleanup' if num_steps == 0
|
209
|
+
raise 'no druids provided' if druids.size == 0
|
210
|
+
|
211
|
+
druids.each {|pid| Assembly::Utils.cleanup_object(pid, steps, dry_run)}
|
212
|
+
end
|
213
|
+
|
214
|
+
# Cleanup a single objects and associated files given a druid. WARNING: VERY DESTRUCTIVE.
|
239
215
|
# This method only works when this gem is used in a project that is configured to connect to DOR
|
240
216
|
#
|
241
217
|
# @param [string] pid a druid
|
242
|
-
# @param [array] steps an array of steps, options below
|
218
|
+
# @param [array] steps an array of steps, options below
|
243
219
|
# :stacks=This will remove all files from the stacks that were shelved for the objects
|
244
220
|
# :dor=This will delete objects from Fedora
|
245
221
|
# :stage=This will delete the staged content in the assembly workspace
|
@@ -249,88 +225,85 @@ module Assembly
|
|
249
225
|
#
|
250
226
|
# Example:
|
251
227
|
# Assembly::Utils.cleanup_object('druid:aa000aa0001',[:stacks,:dor,:stage,:symlinks,:workflows])
|
252
|
-
def self.cleanup_object(pid,steps,dry_run=false)
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
228
|
+
def self.cleanup_object(pid, steps, dry_run = false)
|
229
|
+
# start up an SSH session if we are going to try and remove content from the stacks
|
230
|
+
ssh_session = Net::SSH.start(Dor::Config.stacks.host, Dor::Config.stacks.user, :auth_methods => %w(gssapi-with-mic publickey hostbased password keyboard-interactive)) if steps.include?(:stacks) && defined?(stacks_server)
|
231
|
+
|
232
|
+
druid_tree = DruidTools::Druid.new(pid).tree
|
233
|
+
puts "Cleaning up #{pid}"
|
234
|
+
if steps.include?(:dor)
|
235
|
+
puts "-- deleting #{pid} from Fedora #{ENV['ROBOT_ENVIRONMENT']}"
|
236
|
+
Assembly::Utils.unregister(pid) unless dry_run
|
237
|
+
end
|
238
|
+
if steps.include?(:symlinks)
|
239
|
+
path_to_symlinks = []
|
240
|
+
path_to_symlinks << File.join(Assembly::DOR_WORKSPACE, druid_tree)
|
241
|
+
path_to_symlinks << Assembly::Utils.get_staging_path(pid, Assembly::DOR_WORKSPACE)
|
242
|
+
path_to_symlinks.each do |path|
|
243
|
+
if File.directory?(path)
|
244
|
+
puts "-- deleting folder #{path} (WARNING: should have been a symlink)"
|
245
|
+
FileUtils.rm_rf path unless dry_run
|
246
|
+
elsif File.symlink?(path)
|
247
|
+
puts "-- deleting symlink #{path}"
|
248
|
+
File.delete(path) unless dry_run
|
249
|
+
else
|
250
|
+
puts "-- Skipping #{path}: not a folder or symlink"
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
if steps.include?(:stage)
|
255
|
+
path_to_content = Assembly::Utils.get_staging_path(pid, Assembly::ASSEMBLY_WORKSPACE)
|
256
|
+
puts "-- deleting folder #{path_to_content}"
|
257
|
+
FileUtils.rm_rf path_to_content if !dry_run && File.exist?(path_to_content)
|
258
|
+
end
|
259
|
+
if steps.include?(:stacks)
|
260
|
+
path_to_content = Dor::DigitalStacksService.stacks_storage_dir(pid)
|
261
|
+
puts "-- removing files from the stacks on #{stacks_server} at #{path_to_content}"
|
262
|
+
ssh_session.exec!("rm -fr #{path_to_content}") unless dry_run
|
263
|
+
end
|
264
|
+
if steps.include?(:workflows)
|
265
|
+
puts "-- deleting #{pid} accessionWF and assemblyWF workflows from Fedora #{ENV['ROBOT_ENVIRONMENT']}"
|
266
|
+
unless dry_run
|
267
|
+
Dor::WorkflowService.delete_workflow('dor', pid, 'accessionWF')
|
268
|
+
Dor::WorkflowService.delete_workflow('dor', pid, 'assemblyWF')
|
269
|
+
end
|
270
|
+
end
|
271
|
+
rescue Exception => e
|
272
|
+
puts "** cleaning up failed for #{pid} with #{e.message}"
|
273
|
+
ensure
|
274
|
+
ssh_session.close if ssh_session
|
275
|
+
end
|
276
|
+
|
302
277
|
# Delete an object from DOR.
|
303
278
|
# This method only works when this gem is used in a project that is configured to connect to DOR
|
304
279
|
#
|
305
|
-
# @param [string] pid the druid
|
280
|
+
# @param [string] pid the druid
|
306
281
|
#
|
307
282
|
# Example:
|
308
283
|
# Assembly::Utils.delete_from_dor('druid:aa000aa0001')
|
309
284
|
def self.delete_from_dor(pid)
|
310
|
-
|
311
285
|
Dor::Config.fedora.client["objects/#{pid}"].delete
|
312
286
|
Dor::SearchService.solr.delete_by_id(pid)
|
313
287
|
Dor::SearchService.solr.commit
|
314
|
-
|
315
288
|
end
|
316
|
-
|
289
|
+
|
317
290
|
# Quicky update rights metadata for any existing list of objects using default rights metadata pulled from the supplied APO
|
318
291
|
#
|
319
292
|
# @param [array] druids - an array of druids
|
320
293
|
# @param [string] apo_druid - the druid of the APO to pull rights metadata from
|
321
294
|
# @param [boolean] publish - defaults to false, if true, will publish each object after replacing datastreams (must be run on server with rights to do this)
|
322
|
-
#
|
295
|
+
#
|
323
296
|
# Example:
|
324
297
|
# druids=%w{druid:aa111aa1111 druid:bb222bb2222}
|
325
298
|
# apo_druid='druid:cc222cc2222'
|
326
|
-
# Assembly::Utils.update_rights_metadata(druids,apo_druid)
|
327
|
-
def self.update_rights_metadata(druids,apo_druid,publish=false)
|
299
|
+
# Assembly::Utils.update_rights_metadata(druids,apo_druid)
|
300
|
+
def self.update_rights_metadata(druids, apo_druid, publish = false)
|
328
301
|
apo = Dor::Item.find(apo_druid)
|
329
302
|
rights_md = apo.datastreams['defaultObjectRights']
|
330
|
-
|
303
|
+
replace_datastreams(druids, 'rightsMetadata', rights_md.content, publish)
|
331
304
|
end
|
332
|
-
|
333
|
-
# Replace a specific datastream for a series of objects in DOR with new content
|
305
|
+
|
306
|
+
# Replace a specific datastream for a series of objects in DOR with new content
|
334
307
|
#
|
335
308
|
# @param [array] druids - an array of druids
|
336
309
|
# @param [string] datastream_name - the name of the datastream to replace
|
@@ -342,31 +315,31 @@ module Assembly
|
|
342
315
|
# new_content='<xml><more nodes>this should be the whole datastream</more nodes></xml>'
|
343
316
|
# datastream='rightsMetadata'
|
344
317
|
# Assembly::Utils.replace_datastreams(druids,datastream,new_content)
|
345
|
-
def self.replace_datastreams(druids,datastream_name,new_content,publish=false)
|
318
|
+
def self.replace_datastreams(druids, datastream_name, new_content, publish = false)
|
346
319
|
druids.each do |druid|
|
347
320
|
obj = Dor::Item.find(druid)
|
348
|
-
ds = obj.datastreams[datastream_name]
|
321
|
+
ds = obj.datastreams[datastream_name]
|
349
322
|
if ds
|
350
|
-
ds.content = new_content
|
323
|
+
ds.content = new_content
|
351
324
|
ds.save
|
352
325
|
puts "replaced #{datastream_name} for #{druid}"
|
353
326
|
if publish
|
354
327
|
obj.publish_metadata
|
355
|
-
puts
|
328
|
+
puts '--object re-published'
|
356
329
|
end
|
357
330
|
else
|
358
|
-
puts "#{datastream_name} does not exist for #{druid}"
|
331
|
+
puts "#{datastream_name} does not exist for #{druid}"
|
359
332
|
end
|
360
|
-
end
|
361
|
-
end
|
333
|
+
end
|
334
|
+
end
|
362
335
|
|
363
|
-
# Republish a list of druids. Only works when run from a server with access rights to the stacks (e.g. lyberservices-prod)
|
336
|
+
# Republish a list of druids. Only works when run from a server with access rights to the stacks (e.g. lyberservices-prod)
|
364
337
|
#
|
365
338
|
# @param [array] druids - an array of druids
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
339
|
+
#
|
340
|
+
# Example:
|
341
|
+
# druids=%w{druid:aa111aa1111 druid:bb222bb2222}
|
342
|
+
# Assembly::Utils.republish(druids)
|
370
343
|
def self.republish(druids)
|
371
344
|
druids.each do |druid|
|
372
345
|
obj = Dor::Item.find(druid)
|
@@ -374,98 +347,89 @@ module Assembly
|
|
374
347
|
puts "republished #{druid}"
|
375
348
|
end
|
376
349
|
end
|
377
|
-
|
350
|
+
|
378
351
|
# Determines if the specifed APO object contains a specified workflow defined in it
|
379
352
|
# DEPRACATED NOW THAT REIFED WORKFLOWS ARE USED
|
380
353
|
# @param [string] druid - the druid of the APO to check
|
381
354
|
# @param [string] workflow - the name of the workflow to check
|
382
|
-
#
|
355
|
+
#
|
383
356
|
# @return [boolean] if workflow is defined in APO
|
384
|
-
#
|
357
|
+
#
|
385
358
|
# Example:
|
386
359
|
# Assembly::Utils.apo_workflow_defined?('druid:oo000oo0001','assembly')
|
387
360
|
# > true
|
388
|
-
def self.apo_workflow_defined?(druid,workflow)
|
389
|
-
puts
|
361
|
+
def self.apo_workflow_defined?(druid, workflow)
|
362
|
+
puts '************WARNING - THIS METHOD MAY NOT BE USEFUL ANYMORE SINCE WORKFLOWS ARE NO LONGER DEFINED IN THE APO**************'
|
390
363
|
obj = Dor::Item.find(druid)
|
391
364
|
raise 'object not an APO' if obj.identityMetadata.objectType.first != 'adminPolicy'
|
392
|
-
xml_doc=Nokogiri::XML(obj.administrativeMetadata.content)
|
365
|
+
xml_doc = Nokogiri::XML(obj.administrativeMetadata.content)
|
393
366
|
xml_doc.xpath("//#{workflow}").size == 1 || xml_doc.xpath("//*[@id='#{workflow}']").size == 1
|
394
367
|
end
|
395
|
-
|
368
|
+
|
396
369
|
# Determines if the specifed object is an APO
|
397
370
|
# @param [string] druid - the druid of the APO to check
|
398
|
-
#
|
371
|
+
#
|
399
372
|
# @return [boolean] if object exist and is an APO
|
400
|
-
#
|
373
|
+
#
|
401
374
|
# Example:
|
402
375
|
# Assembly::Utils.is_apo?('druid:oo000oo0001')
|
403
376
|
# > true
|
404
377
|
def self.is_apo?(druid)
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
return false
|
410
|
-
end
|
378
|
+
obj = Dor::Item.find(druid)
|
379
|
+
obj.identityMetadata.objectType.first == 'adminPolicy'
|
380
|
+
rescue
|
381
|
+
return false
|
411
382
|
end
|
412
|
-
|
413
|
-
# Update a specific datastream for a series of objects in DOR by searching and replacing content
|
383
|
+
|
384
|
+
# Update a specific datastream for a series of objects in DOR by searching and replacing content
|
414
385
|
#
|
415
386
|
# @param [array] druids - an array of druids
|
416
387
|
# @param [string] datastream_name - the name of the datastream to replace
|
417
388
|
# @param [string] find_content - the content to find
|
418
389
|
# @param [string] replace_content - the content to replace the found content with
|
419
|
-
#
|
390
|
+
#
|
420
391
|
# Example:
|
421
|
-
# druids
|
422
|
-
# find_content='FooBarBaz'
|
423
|
-
# replace_content='Stanford Rules'
|
424
|
-
# datastream='rightsMetadata'
|
425
|
-
# Assembly::Utils.update_datastreams(druids,datastream,find_content,replace_content)
|
426
|
-
def self.update_datastreams(druids,datastream_name,find_content,replace_content)
|
392
|
+
# druids = %w{druid:aa111aa1111 druid:bb222bb2222}
|
393
|
+
# find_content = 'FooBarBaz'
|
394
|
+
# replace_content = 'Stanford Rules'
|
395
|
+
# datastream = 'rightsMetadata'
|
396
|
+
# Assembly::Utils.update_datastreams(druids, datastream, find_content, replace_content)
|
397
|
+
def self.update_datastreams(druids, datastream_name, find_content, replace_content)
|
427
398
|
druids.each do |druid|
|
428
399
|
obj = Dor::Item.find(druid)
|
429
400
|
ds = obj.datastreams[datastream_name]
|
430
401
|
if ds
|
431
|
-
updated_content=ds.content.gsub(find_content,replace_content)
|
402
|
+
updated_content = ds.content.gsub(find_content, replace_content)
|
432
403
|
ds.content = updated_content
|
433
404
|
ds.save
|
434
405
|
puts "updated #{datastream_name} for #{druid}"
|
435
406
|
else
|
436
|
-
puts "#{datastream_name} does not exist for #{druid}"
|
407
|
+
puts "#{datastream_name} does not exist for #{druid}"
|
437
408
|
end
|
438
|
-
end
|
439
|
-
end
|
409
|
+
end
|
410
|
+
end
|
440
411
|
|
441
412
|
# Unregister a DOR object, which includes deleting it and deleting all its workflows
|
442
|
-
#
|
443
413
|
# @param [string] pid of druid
|
444
|
-
#
|
445
|
-
# @return [boolean] if deletion succeed or not
|
414
|
+
# @return [boolean] if deletion succeed or not
|
446
415
|
def self.unregister(pid)
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
rescue
|
453
|
-
return false
|
454
|
-
end
|
455
|
-
|
416
|
+
Assembly::Utils.delete_all_workflows pid
|
417
|
+
Assembly::Utils.delete_from_dor pid
|
418
|
+
true
|
419
|
+
rescue
|
420
|
+
return false
|
456
421
|
end
|
457
422
|
|
458
423
|
# Set the workflow step for the given PID to an error state
|
459
|
-
#
|
460
424
|
# @param [string] pid of druid
|
461
425
|
# @param [string] step to set to error
|
462
426
|
#
|
463
427
|
def self.set_workflow_step_to_error(pid, step)
|
464
428
|
wf_name = Assembly::ASSEMBLY_WF
|
465
429
|
msg = 'Integration testing'
|
466
|
-
params =
|
430
|
+
params = ['dor', pid, wf_name, step, msg]
|
467
431
|
resp = Dor::WorkflowService.update_workflow_error_status *params
|
468
|
-
raise
|
432
|
+
raise 'update_workflow_error_status() returned false.' unless resp == true
|
469
433
|
end
|
470
434
|
|
471
435
|
# Delete all workflows for the given PID. Destructive and should only be used when deleting an object from DOR.
|
@@ -473,23 +437,23 @@ module Assembly
|
|
473
437
|
#
|
474
438
|
# @param [string] pid of druid
|
475
439
|
# @param [String] repo repository dealing with the workflow. Default is 'dor'. Another option is 'sdr'
|
476
|
-
# e.g.
|
440
|
+
# e.g.
|
477
441
|
# Assembly::Utils.delete_all_workflows('druid:oo000oo0001')
|
478
|
-
def self.delete_all_workflows(pid, repo='dor')
|
479
|
-
Dor::WorkflowService.get_workflows(pid).each {|workflow| Dor::WorkflowService.delete_workflow(repo,pid,workflow)}
|
442
|
+
def self.delete_all_workflows(pid, repo = 'dor')
|
443
|
+
Dor::WorkflowService.get_workflows(pid).each {|workflow| Dor::WorkflowService.delete_workflow(repo, pid, workflow)}
|
480
444
|
end
|
481
445
|
|
482
446
|
# Reindex the supplied PID in solr.
|
483
447
|
#
|
484
448
|
# @param [string] pid of druid
|
485
|
-
# e.g.
|
486
|
-
# Assembly::Utils.reindex('druid:oo000oo0001')
|
449
|
+
# e.g.
|
450
|
+
# Assembly::Utils.reindex('druid:oo000oo0001')
|
487
451
|
def self.reindex(pid)
|
488
452
|
obj = Dor.load_instance pid
|
489
453
|
solr_doc = obj.to_solr
|
490
|
-
Dor::SearchService.solr.add(solr_doc, :add_attributes => {:commitWithin => 1000}) unless obj.nil?
|
454
|
+
Dor::SearchService.solr.add(solr_doc, :add_attributes => {:commitWithin => 1000}) unless obj.nil?
|
491
455
|
end
|
492
|
-
|
456
|
+
|
493
457
|
# Clear stray workflows - remove any workflow steps for orphaned objects.
|
494
458
|
# This method only works when this gem is used in a project that is configured to connect to DOR
|
495
459
|
def self.clear_stray_workflows
|
@@ -507,15 +471,14 @@ module Assembly
|
|
507
471
|
resp = wfs.update_workflow_error_status *params
|
508
472
|
puts "updated: resp=#{resp} params=#{params.inspect}"
|
509
473
|
end
|
510
|
-
end
|
474
|
+
end
|
511
475
|
end
|
512
476
|
|
513
477
|
# Check if the object is fully accessioned and ingested.
|
514
478
|
# This method only works when this gem is used in a project that is configured to connect to the workflow service.
|
515
479
|
#
|
516
480
|
# @param [string] pid the druid to operate on
|
517
|
-
#
|
518
|
-
# @return [boolean] if object is fully ingested
|
481
|
+
# @return [boolean] if object is fully ingested
|
519
482
|
# Example:
|
520
483
|
# Assembly::Utils.is_ingested?('druid:oo000oo0001')
|
521
484
|
# > false
|
@@ -527,7 +490,6 @@ module Assembly
|
|
527
490
|
# This method only works when this gem is used in a project that is configured to connect to the workflow service.
|
528
491
|
#
|
529
492
|
# @param [string] pid the druid to operate on
|
530
|
-
#
|
531
493
|
# @return [boolean] if object is currently in accessioning
|
532
494
|
# Example:
|
533
495
|
# Assembly::Utils.in_accessioning?('druid:oo000oo0001')
|
@@ -535,42 +497,39 @@ module Assembly
|
|
535
497
|
def self.in_accessioning?(pid)
|
536
498
|
WFS.get_active_lifecycle(REPO, pid, 'submitted') ? true : false
|
537
499
|
end
|
538
|
-
|
500
|
+
|
539
501
|
# Check if the object is on ingest hold
|
540
502
|
# This method only works when this gem is used in a project that is configured to connect to the workflow service.
|
541
503
|
#
|
542
504
|
# @param [string] pid the druid to operate on
|
543
|
-
#
|
544
505
|
# @return [boolean] if object is on ingest hold
|
545
506
|
# Example:
|
546
507
|
# Assembly::Utils.ingest_hold?('druid:oo000oo0001')
|
547
508
|
# > false
|
548
509
|
def self.ingest_hold?(pid)
|
549
|
-
WFS.get_workflow_status(REPO, pid, 'accessionWF','sdr-ingest-transfer') == 'hold'
|
510
|
+
WFS.get_workflow_status(REPO, pid, 'accessionWF', 'sdr-ingest-transfer') == 'hold'
|
550
511
|
end
|
551
|
-
|
512
|
+
|
552
513
|
# Check if the object is submitted
|
553
514
|
# This method only works when this gem is used in a project that is configured to connect to the workflow service.
|
554
515
|
#
|
555
516
|
# @param [string] pid the druid to operate on
|
556
|
-
#
|
557
|
-
# @return [boolean] if object is submitted
|
517
|
+
# @return [boolean] if object is submitted
|
558
518
|
# Example:
|
559
519
|
# Assembly::Utils.is_submitted?('druid:oo000oo0001')
|
560
|
-
# > false
|
520
|
+
# > false
|
561
521
|
def self.is_submitted?(pid)
|
562
|
-
WFS.get_lifecycle(REPO, pid, 'submitted')
|
522
|
+
WFS.get_lifecycle(REPO, pid, 'submitted').nil?
|
563
523
|
end
|
564
524
|
|
565
525
|
# Check if the updates are allowed on the object
|
566
526
|
# This method only works when this gem is used in a project that is configured to connect to the workflow service.
|
567
527
|
#
|
568
528
|
# @param [string] pid the druid to operate on
|
569
|
-
#
|
570
|
-
# @return [boolean] if object can be versioned and updated
|
529
|
+
# @return [boolean] if object can be versioned and updated
|
571
530
|
# Example:
|
572
531
|
# Assembly::Utils.updates_allowed?('druid:oo000oo0001')
|
573
|
-
# > false
|
532
|
+
# > false
|
574
533
|
def self.updates_allowed?(pid)
|
575
534
|
!self.in_accessioning?(pid) && self.is_ingested?(pid)
|
576
535
|
end
|
@@ -579,15 +538,14 @@ module Assembly
|
|
579
538
|
# This method only works when this gem is used in a project that is configured to connect to the workflow service.
|
580
539
|
#
|
581
540
|
# @param [string] pid the druid to operate on
|
582
|
-
#
|
583
541
|
# @return [boolean] if object requires versioning
|
584
542
|
# Example:
|
585
543
|
# Assembly::Utils.versioning_required?('druid:oo000oo0001')
|
586
|
-
# > false
|
544
|
+
# > false
|
587
545
|
def self.versioning_required?(pid)
|
588
|
-
!((!self.is_ingested?(pid) && self.ingest_hold?(pid)) || (!self.is_ingested?(pid) && !self.is_submitted?(pid)))
|
546
|
+
!((!self.is_ingested?(pid) && self.ingest_hold?(pid)) || (!self.is_ingested?(pid) && !self.is_submitted?(pid)))
|
589
547
|
end
|
590
|
-
|
548
|
+
|
591
549
|
# Reset the workflow states for a list of druids given a list of workflow names and steps.
|
592
550
|
# Provide a list of druids in an array, and a hash containing workflow names (e.g. 'assemblyWF' or 'accessionWF') as the keys, and arrays of steps
|
593
551
|
# as the corresponding values (e.g. ['checksum-compute','jp2-create']) and they will all be reset to "waiting".
|
@@ -599,25 +557,25 @@ module Assembly
|
|
599
557
|
# * :state => a string for the name of the state to reset to, defaults to 'waiting' (could be 'completed' for example)
|
600
558
|
#
|
601
559
|
# Example:
|
602
|
-
# druids=['druid:aa111aa1111','druid:bb222bb2222']
|
603
|
-
# steps={'assemblyWF'
|
604
|
-
# Assembly::Utils.reset_workflow_states(:druids=>druids
|
605
|
-
def self.reset_workflow_states(params={})
|
606
|
-
druids=params[:druids] || []
|
607
|
-
workflows=params[:steps]
|
608
|
-
state=params[:state]
|
560
|
+
# druids = ['druid:aa111aa1111', 'druid:bb222bb2222']
|
561
|
+
# steps = {'assemblyWF' => ['checksum-compute'], 'accessionWF' => ['content-metadata', 'descriptive-metadata']}
|
562
|
+
# Assembly::Utils.reset_workflow_states(:druids => druids, :steps => steps)
|
563
|
+
def self.reset_workflow_states(params = {})
|
564
|
+
druids = params[:druids] || []
|
565
|
+
workflows = params[:steps] || {}
|
566
|
+
state = params[:state] || 'waiting'
|
609
567
|
druids.each do |druid|
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
end
|
568
|
+
puts "** #{druid}"
|
569
|
+
begin
|
570
|
+
workflows.each do |workflow, steps|
|
571
|
+
steps.each do |step|
|
572
|
+
puts "Updating #{workflow}:#{step} to #{state}"
|
573
|
+
Dor::WorkflowService.update_workflow_status 'dor', druid, workflow, step, state
|
617
574
|
end
|
618
|
-
|
619
|
-
|
620
|
-
|
575
|
+
end
|
576
|
+
rescue Exception => e
|
577
|
+
puts "an error occurred trying to update workflows for #{druid} with message #{e.message}"
|
578
|
+
end
|
621
579
|
end
|
622
580
|
end
|
623
581
|
|
@@ -625,22 +583,20 @@ module Assembly
|
|
625
583
|
# Useful if you want to import a report from argo
|
626
584
|
#
|
627
585
|
# @param [string] filename of CSV that has a column called "druid"
|
628
|
-
#
|
629
586
|
# @return [array] array of druids
|
630
|
-
#
|
631
587
|
# Example:
|
632
|
-
# Assembly::Utils.read_druids_from_file('download.csv') # ['druid:xxxxx','druid:yyyyy']
|
588
|
+
# Assembly::Utils.read_druids_from_file('download.csv') # ['druid:xxxxx', 'druid:yyyyy']
|
633
589
|
def self.read_druids_from_file(csv_filename)
|
634
|
-
rows=CsvMapper.import(csv_filename) do read_attributes_from_file end
|
635
|
-
druids=[]
|
590
|
+
rows = CsvMapper.import(csv_filename) do read_attributes_from_file end
|
591
|
+
druids = []
|
636
592
|
rows.each do |row|
|
637
|
-
druid=row.druid
|
638
|
-
druid="druid:#{druid}" unless druid.include?('druid:')
|
593
|
+
druid = row.druid
|
594
|
+
druid = "druid:#{druid}" unless druid.include?('druid:')
|
639
595
|
druids << druid
|
640
596
|
end
|
641
|
-
|
597
|
+
druids
|
642
598
|
end
|
643
|
-
|
599
|
+
|
644
600
|
# Get a list of druids that have errored out in a particular workflow and step
|
645
601
|
#
|
646
602
|
# @param [string] workflow name
|
@@ -648,24 +604,21 @@ module Assembly
|
|
648
604
|
# @param [string] tag -- optional, if supplied, results will be filtered by the exact tag supplied; note this will dramatically slow down the response if there are many results
|
649
605
|
#
|
650
606
|
# @return [hash] hash of results, with key has a druid, and value as the error message
|
651
|
-
# e.g.
|
607
|
+
# e.g.
|
652
608
|
# result=Assembly::Utils.get_errored_objects_for_workstep('accessionWF','content-metadata','Project : Revs')
|
653
609
|
# => {"druid:qd556jq0580"=>"druid:qd556jq0580 - Item error; caused by #<Rubydora::FedoraInvalidRequest: Error modifying datastream contentMetadata for druid:qd556jq0580. See logger for details>"}
|
654
|
-
def self.get_errored_objects_for_workstep
|
655
|
-
result=Dor::WorkflowService.get_errored_objects_for_workstep workflow,step,'dor'
|
656
|
-
if tag == ''
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
filtered_result.merge!(druid=>error) if item.tags.include? tag
|
664
|
-
rescue
|
665
|
-
end
|
610
|
+
def self.get_errored_objects_for_workstep(workflow, step, tag = '')
|
611
|
+
result = Dor::WorkflowService.get_errored_objects_for_workstep workflow, step, 'dor'
|
612
|
+
return result if tag == ''
|
613
|
+
filtered_result = {}
|
614
|
+
result.each do |druid, error|
|
615
|
+
begin
|
616
|
+
item = Dor::Item.find(druid)
|
617
|
+
filtered_result.merge!(druid => error) if item.tags.include? tag
|
618
|
+
rescue
|
666
619
|
end
|
667
|
-
return filtered_result
|
668
620
|
end
|
621
|
+
filtered_result
|
669
622
|
end
|
670
623
|
|
671
624
|
# Reset any objects in a specific workflow step and state that have errored out back to waiting
|
@@ -675,15 +628,15 @@ module Assembly
|
|
675
628
|
# @param [string] tag -- optional, if supplied, results will be filtered by the exact tag supplied; note this will dramatically slow down the response if there are many results
|
676
629
|
#
|
677
630
|
# @return [hash] hash of results that have been reset, with key has a druid, and value as the error message
|
678
|
-
# e.g.
|
679
|
-
# result=Assembly::Utils.reset_errored_objects_for_workstep('accessionWF','content-metadata')
|
680
|
-
# => {"druid:qd556jq0580"=>"druid:qd556jq0580 - Item error; caused by #<Rubydora::FedoraInvalidRequest: Error modifying datastream contentMetadata for druid:qd556jq0580. See logger for details>"}
|
681
|
-
def self.reset_errored_objects_for_workstep
|
682
|
-
result=
|
683
|
-
druids=[]
|
684
|
-
result.each {|k,v| druids << k}
|
685
|
-
|
686
|
-
|
631
|
+
# e.g.
|
632
|
+
# result = Assembly::Utils.reset_errored_objects_for_workstep('accessionWF', 'content-metadata')
|
633
|
+
# => {"druid:qd556jq0580"=>"druid:qd556jq0580 - Item error; caused by #<Rubydora::FedoraInvalidRequest: Error modifying datastream contentMetadata for druid:qd556jq0580. See logger for details>"}
|
634
|
+
def self.reset_errored_objects_for_workstep(workflow, step, tag = '')
|
635
|
+
result = get_errored_objects_for_workstep workflow, step, tag
|
636
|
+
druids = []
|
637
|
+
result.each {|k, v| druids << k}
|
638
|
+
reset_workflow_states(:druids => druids, :steps => {workflow => [step]}) if druids.size > 0
|
639
|
+
result
|
687
640
|
end
|
688
641
|
|
689
642
|
# Read in a list of druids from a pre-assembly progress load file and load into an array.
|
@@ -693,114 +646,106 @@ module Assembly
|
|
693
646
|
#
|
694
647
|
# @return [array] list of druids
|
695
648
|
#
|
696
|
-
# Example:
|
697
|
-
# druids=Assembly::Utils.get_druids_from_log
|
649
|
+
# Example:
|
650
|
+
# druids = Assembly::Utils.get_druids_from_log '/dor/preassembly/sohp_accession_log.yaml'
|
698
651
|
# puts druids
|
699
|
-
# > ['aa000aa0001','aa000aa0002']
|
700
|
-
def self.get_druids_from_log(progress_log_file,completed=true)
|
701
|
-
|
652
|
+
# > ['aa000aa0001', 'aa000aa0002']
|
653
|
+
def self.get_druids_from_log(progress_log_file, completed = true)
|
654
|
+
druids = []
|
702
655
|
docs = YAML.load_stream(Assembly::Utils.read_file(progress_log_file))
|
703
656
|
docs = docs.documents if docs.respond_to? :documents
|
704
|
-
docs.each { |obj| druids << obj[:pid] if obj[:pre_assem_finished] == completed}
|
705
|
-
|
657
|
+
docs.each { |obj| druids << obj[:pid] if obj[:pre_assem_finished] == completed}
|
658
|
+
druids
|
706
659
|
end
|
707
660
|
|
708
661
|
# Read in a YAML configuration file from disk and return a hash
|
709
662
|
#
|
710
663
|
# @param [string] filename of YAML config file to read
|
711
|
-
#
|
712
664
|
# @return [hash] configuration contents as a hash
|
713
665
|
#
|
714
666
|
# Example:
|
715
667
|
# config_filename='/thumpers/dpgthumper2-smpl/SC1017_SOHP/sohp_prod_accession.yaml'
|
716
|
-
# config=Assembly::Utils.load_config(config_filename)
|
668
|
+
# config=Assembly::Utils.load_config(config_filename)
|
717
669
|
# puts config['progress_log_file']
|
718
|
-
# > "/dor/preassembly/sohp_accession_log.yaml"
|
670
|
+
# > "/dor/preassembly/sohp_accession_log.yaml"
|
719
671
|
def self.load_config(filename)
|
720
|
-
YAML.load(Assembly::Utils.read_file(filename))
|
672
|
+
YAML.load(Assembly::Utils.read_file(filename))
|
721
673
|
end
|
722
674
|
|
723
675
|
# Read in a file from disk
|
724
676
|
#
|
725
677
|
# @param [string] filename to read
|
726
|
-
#
|
727
678
|
# @return [string] file contents as a string
|
728
679
|
def self.read_file(filename)
|
729
|
-
|
680
|
+
File.readable?(filename) ? IO.read(filename) : ''
|
730
681
|
end
|
731
682
|
|
732
|
-
# Used by the completion_report and project_tag_report in the pre-assembly project
|
683
|
+
# Used by the completion_report and project_tag_report in the pre-assembly project
|
733
684
|
#
|
734
685
|
# @param [solr_document] doc a solr document result
|
735
686
|
# @param [boolean] check_status_in_dor indicates if we should check for the workflow states in dor or trust SOLR is up to date (defaults to false)
|
736
687
|
#
|
737
688
|
# @return [string] a comma delimited row for the report
|
738
|
-
def self.solr_doc_parser(doc,check_status_in_dor=false)
|
739
|
-
|
689
|
+
def self.solr_doc_parser(doc, check_status_in_dor = false)
|
740
690
|
druid = doc[:id]
|
741
691
|
|
742
692
|
if Solrizer::VERSION < '3.0'
|
743
693
|
label = doc[:objectLabel_t]
|
744
|
-
title=doc[:public_dc_title_t].nil? ? '' : doc[:public_dc_title_t].first
|
694
|
+
title = doc[:public_dc_title_t].nil? ? '' : doc[:public_dc_title_t].first
|
745
695
|
|
746
696
|
if check_status_in_dor
|
747
|
-
accessioned =
|
748
|
-
shelved
|
697
|
+
accessioned = get_workflow_status(druid, 'accessionWF', 'publish') == 'completed'
|
698
|
+
shelved = get_workflow_status(druid, 'accessionWF', 'shelve') == 'completed'
|
749
699
|
else
|
750
|
-
accessioned = doc[:wf_wps_facet].nil? ? false : doc[:wf_wps_facet].include?(
|
751
|
-
shelved
|
700
|
+
accessioned = doc[:wf_wps_facet].nil? ? false : doc[:wf_wps_facet].include?('accessionWF:publish:completed')
|
701
|
+
shelved = doc[:wf_wps_facet].nil? ? false : doc[:wf_wps_facet].include?('accessionWF:shelve:completed')
|
752
702
|
end
|
753
703
|
source_id = doc[:source_id_t]
|
754
|
-
files=doc[:content_file_t]
|
704
|
+
files = doc[:content_file_t]
|
755
705
|
else
|
756
706
|
label = doc[Solrizer.solr_name('objectLabel', :displayable)]
|
757
707
|
title = doc.fetch(Solrizer.solr_name('public_dc_title', :displayable), []).first || ''
|
758
708
|
|
759
709
|
if check_status_in_dor
|
760
|
-
accessioned =
|
761
|
-
shelved
|
710
|
+
accessioned = get_workflow_status(druid, 'accessionWF', 'publish') == 'completed'
|
711
|
+
shelved = get_workflow_status(druid, 'accessionWF', 'shelve') == 'completed'
|
762
712
|
else
|
763
|
-
accessioned = doc.fetch(Solrizer.solr_name('wf_wps', :symbol), []).include?(
|
764
|
-
shelved
|
713
|
+
accessioned = doc.fetch(Solrizer.solr_name('wf_wps', :symbol), []).include?('accessionWF:publish:completed')
|
714
|
+
shelved = doc.fetch(Solrizer.solr_name('wf_wps', :symbol), []).include?('accessionWF:shelve:completed')
|
765
715
|
end
|
766
716
|
source_id = doc[Solrizer.solr_name('source_id', :symbol)]
|
767
|
-
files=doc[Solrizer.solr_name('content_file', :symbol)]
|
717
|
+
files = doc[Solrizer.solr_name('content_file', :symbol)]
|
718
|
+
end
|
768
719
|
|
720
|
+
if files.nil?
|
721
|
+
file_type_list = ''
|
722
|
+
num_files = 0
|
723
|
+
else
|
724
|
+
num_files = files.size
|
725
|
+
# count the amount of each file type
|
726
|
+
file_types = Hash.new(0)
|
727
|
+
unless num_files == 0
|
728
|
+
files.each {|file| file_types[File.extname(file)] += 1}
|
729
|
+
file_type_list = file_types.map{|k, v| "#{k}=#{v}"}.join(' | ')
|
730
|
+
end
|
769
731
|
end
|
770
732
|
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
else
|
775
|
-
num_files = files.size
|
776
|
-
# count the amount of each file type
|
777
|
-
file_types=Hash.new(0)
|
778
|
-
unless num_files == 0
|
779
|
-
files.each {|file| file_types[File.extname(file)]+=1}
|
780
|
-
file_type_list=file_types.map{|k,v| "#{k}=#{v}"}.join(' | ')
|
781
|
-
end
|
782
|
-
end
|
783
|
-
|
784
|
-
purl_link = ""
|
785
|
-
val = druid.split(/:/).last
|
786
|
-
purl_link = File.join(Assembly::PURL_BASE_URL, val)
|
787
|
-
|
788
|
-
return [druid, label, title, source_id, accessioned, shelved, purl_link, num_files,file_type_list]
|
789
|
-
|
733
|
+
val = druid.split(/:/).last
|
734
|
+
purl_link = File.join(Assembly::PURL_BASE_URL, val)
|
735
|
+
[druid, label, title, source_id, accessioned, shelved, purl_link, num_files, file_type_list]
|
790
736
|
end
|
791
737
|
|
792
738
|
# Takes a hash data structure and recursively converts all hash keys from strings to symbols.
|
793
739
|
#
|
794
740
|
# @param [hash] h hash
|
795
|
-
#
|
796
741
|
# @return [hash] a hash with all keys converted from strings to symbols
|
797
742
|
#
|
798
743
|
# Example:
|
799
|
-
# Assembly::Utils.symbolize_keys({'dude'=>'is cool','i'=>'am too'})
|
800
|
-
# > {:dude=>"is cool", :i=>"am too"}
|
744
|
+
# Assembly::Utils.symbolize_keys({'dude' => 'is cool', 'i' => 'am too'})
|
745
|
+
# > {:dude => "is cool", :i => "am too"}
|
801
746
|
def self.symbolize_keys(h)
|
802
747
|
if h.instance_of? Hash
|
803
|
-
h.inject({}) { |hh,(k,v)| hh[k.to_sym] = symbolize_keys(v); hh }
|
748
|
+
h.inject({}) { |hh, (k, v)| hh[k.to_sym] = symbolize_keys(v); hh }
|
804
749
|
elsif h.instance_of? Array
|
805
750
|
h.map { |v| symbolize_keys(v) }
|
806
751
|
else
|
@@ -809,48 +754,39 @@ module Assembly
|
|
809
754
|
end
|
810
755
|
|
811
756
|
# Takes a hash and converts its string values to symbols -- not recursively.
|
812
|
-
#
|
813
757
|
# @param [hash] h hash
|
814
|
-
#
|
815
|
-
# @return [hash] a hash with all keys converted from strings to symbols
|
816
|
-
#
|
758
|
+
# @return [hash] a hash with all values converted from strings to symbols
|
817
759
|
# Example:
|
818
|
-
# Assembly::Utils.values_to_symbols!({'dude'=>'iscool','i'=>'amtoo'})
|
819
|
-
# > {
|
760
|
+
# Assembly::Utils.values_to_symbols!({'dude' => 'iscool', 'i' => 'amtoo'})
|
761
|
+
# > {'i' => :amtoo, 'dude' => :iscool}
|
820
762
|
def self.values_to_symbols!(h)
|
821
|
-
h.each { |k,v| h[k] = v.to_sym if v.class == String }
|
822
|
-
end
|
763
|
+
h.each { |k, v| h[k] = v.to_sym if v.class == String }
|
764
|
+
end
|
823
765
|
|
824
766
|
# Removes any duplicate tags within each druid
|
825
|
-
#
|
826
767
|
# @param [array] druids - an array of druids
|
827
768
|
def self.remove_duplicate_tags(druids)
|
828
769
|
druids.each do |druid|
|
829
770
|
i = Dor::Item.find(druid)
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
end
|
838
|
-
end
|
771
|
+
next unless i && i.tags.size > 1 # multiple tags
|
772
|
+
i.tags.each do |tag|
|
773
|
+
next unless (i.tags.select {|t| t == tag}).size > 1 # tag is duplicate
|
774
|
+
i.remove_tag(tag)
|
775
|
+
i.add_tag(tag)
|
776
|
+
puts "Saving #{druid} to remove duplicate tag='#{tag}'"
|
777
|
+
i.save
|
839
778
|
end
|
840
779
|
end
|
841
780
|
end
|
842
781
|
|
843
|
-
|
844
|
-
# Used by the cleanup to ask user for confirmation of each step. Any response other than 'yes' results in the raising of an error
|
845
|
-
#
|
782
|
+
# Used by the cleanup to ask user for confirmation of each step. Any response other than 'yes' raises an error
|
846
783
|
# @param [string] message the message to show to a user
|
847
784
|
#
|
848
785
|
def self.confirm(message)
|
849
786
|
puts message
|
850
|
-
response=gets.chomp.downcase
|
851
|
-
raise
|
787
|
+
response = gets.chomp.downcase
|
788
|
+
raise 'Exiting' if response != 'y' && response != 'yes'
|
852
789
|
end
|
853
790
|
|
854
791
|
end
|
855
|
-
|
856
792
|
end
|