preservation 0.2.2 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/PITCHME.md +3 -11
- data/lib/preservation/builder.rb +1 -1
- data/lib/preservation/conversion.rb +2 -2
- data/lib/preservation/report/transfer.rb +58 -9
- data/lib/preservation/transfer/pure.rb +48 -0
- data/lib/preservation/version.rb +1 -1
- data/preservation.gemspec +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 922ab4347457976c6f59c174922cec3699ec186b
|
4
|
+
data.tar.gz: 52fcc64ecab218ffa17f270d69d954d81c779142
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9dba19729c2c899a7e9e1399f66e95eaaaf3ede7d15adfce8d3f7b8c43d0ff932e014e261bcd3aec961335f8b41ee96f3f942a1e7a131b953ccd2662a1b44029
|
7
|
+
data.tar.gz: 42fd26b22096093d0f199b69622f8d4e92f100f7ad2d259cd43b27eead7357ca5fa4054204568439211f7ee3674f8af7e43ea07f6663cf365f9223ca0240e2f2
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,11 @@ This project adheres to [Semantic Versioning](http://semver.org/).
|
|
4
4
|
|
5
5
|
## Unreleased
|
6
6
|
|
7
|
+
## 0.3.0 - 2016-09-30
|
8
|
+
### Added
|
9
|
+
- Transfer - prepare batches of datasets.
|
10
|
+
- Reporting - pending transfers.
|
11
|
+
|
7
12
|
## 0.2.2 - 2016-09-28
|
8
13
|
### Fixed
|
9
14
|
- Transfer - related work as simple array in metadata.
|
data/PITCHME.md
CHANGED
@@ -84,17 +84,9 @@ Preservation::Storage.cleanup
|
|
84
84
|
"positive selection"
|
85
85
|
],
|
86
86
|
"dcterms.license": "CC BY",
|
87
|
-
"
|
88
|
-
|
89
|
-
|
90
|
-
"type": "Journal article",
|
91
|
-
"dc.identifier": "http://dx.doi.org/10.1136/ebmed-2014-110127"
|
92
|
-
},
|
93
|
-
{
|
94
|
-
"dc.title": "The 2014 Ebola virus disease outbreak in West Africa",
|
95
|
-
"type": "Journal article",
|
96
|
-
"dc.identifier": "http://dx.doi.org/10.1099/vir.0.067199-0"
|
97
|
-
}
|
87
|
+
"dc.relation": [
|
88
|
+
"http://dx.doi.org/10.1136/ebmed-2014-110127",
|
89
|
+
"http://dx.doi.org/10.1099/vir.0.067199-0"
|
98
90
|
]
|
99
91
|
}
|
100
92
|
]
|
data/lib/preservation/builder.rb
CHANGED
@@ -31,7 +31,7 @@ module Preservation
|
|
31
31
|
|
32
32
|
# Build directory name
|
33
33
|
#
|
34
|
-
# @param
|
34
|
+
# @param metadata_record [Hash]
|
35
35
|
# @param directory_name_scheme [Symbol]
|
36
36
|
# @return [String]
|
37
37
|
def self.build_directory_name(metadata_record, directory_name_scheme)
|
@@ -5,7 +5,7 @@ module Preservation
|
|
5
5
|
module Conversion
|
6
6
|
# Binary to hexadecimal
|
7
7
|
#
|
8
|
-
# @param [Binary String]
|
8
|
+
# @param s [Binary String]
|
9
9
|
# @return [Hexadecimal String]
|
10
10
|
def self.bin_to_hex(s)
|
11
11
|
s.each_byte.map { |b| b.to_s(16) }.join
|
@@ -13,7 +13,7 @@ module Preservation
|
|
13
13
|
|
14
14
|
# Hexadecimal to binary
|
15
15
|
#
|
16
|
-
# @param [Hexadecimal String]
|
16
|
+
# @param s [Hexadecimal String]
|
17
17
|
# @return [Binary String]
|
18
18
|
def self.hex_to_bin(s)
|
19
19
|
s.scan(/../).map { |x| x.hex.chr }.join
|
@@ -10,6 +10,7 @@ module Preservation
|
|
10
10
|
#
|
11
11
|
# @param status_to_find [String]
|
12
12
|
# @param status_presence [Boolean]
|
13
|
+
# @return [Array<Hash>]
|
13
14
|
def self.status(status_to_find: nil, status_presence: true)
|
14
15
|
if status_presence === true
|
15
16
|
status_presence = '='
|
@@ -22,7 +23,10 @@ module Preservation
|
|
22
23
|
records = []
|
23
24
|
db.results_as_hash = true
|
24
25
|
db.execute( query, [ status_to_find ] ) do |row|
|
25
|
-
|
26
|
+
bin_path = Preservation::Conversion.hex_to_bin row['hex_path']
|
27
|
+
if !bin_path.nil? && !bin_path.empty?
|
28
|
+
records << row_to_hash(row)
|
29
|
+
end
|
26
30
|
end
|
27
31
|
|
28
32
|
records
|
@@ -53,22 +57,61 @@ module Preservation
|
|
53
57
|
db.get_first_value( query, [status_to_find] )
|
54
58
|
end
|
55
59
|
|
60
|
+
# Pending transfers
|
61
|
+
#
|
62
|
+
# @return [Hash]
|
63
|
+
def self.pending
|
64
|
+
# Get the directories
|
65
|
+
dirs = Dir.entries Preservation.ingest_path
|
66
|
+
a = []
|
67
|
+
# For each directory, if it isn't in the db, add it to list
|
68
|
+
dirs.each do |dir|
|
69
|
+
next if !in_db?(dir)
|
70
|
+
o = {}
|
71
|
+
o['path'] = dir
|
72
|
+
o['path_timestamp'] = File.mtime "#{Preservation.ingest_path}/#{dir}"
|
73
|
+
a << o
|
74
|
+
end
|
75
|
+
a
|
76
|
+
end
|
77
|
+
|
78
|
+
# Is there a pending transfer with this path?
|
79
|
+
#
|
80
|
+
# @return [Boolean]
|
81
|
+
def self.pending?(path_to_find)
|
82
|
+
is_pending = false
|
83
|
+
pending.each do |i|
|
84
|
+
if i['path'] == path_to_find
|
85
|
+
is_pending = true
|
86
|
+
break
|
87
|
+
end
|
88
|
+
end
|
89
|
+
is_pending
|
90
|
+
end
|
91
|
+
|
92
|
+
|
56
93
|
# Compilation of statistics and data, with focus on exceptions
|
57
94
|
#
|
58
95
|
# @return [Hash]
|
59
96
|
def self.exception
|
60
|
-
|
61
|
-
|
97
|
+
incomplete_result = status(status_to_find: 'COMPLETE', status_presence: false)
|
98
|
+
failed_result = status(status_to_find: 'FAILED', status_presence: true)
|
99
|
+
pending_result = pending
|
100
|
+
current_result = current
|
101
|
+
complete_count_result = complete_count
|
62
102
|
report = {}
|
63
|
-
report['
|
103
|
+
report['pending'] = {}
|
104
|
+
report['pending']['count'] = pending_result.count
|
105
|
+
report['pending']['data'] = pending_result if !pending_result.empty?
|
106
|
+
report['current'] = current_result if !current_result.empty?
|
64
107
|
report['failed'] = {}
|
65
|
-
report['failed']['count'] =
|
66
|
-
report['failed']['data'] =
|
108
|
+
report['failed']['count'] = failed_result.count
|
109
|
+
report['failed']['data'] = failed_result if !failed_result.empty?
|
67
110
|
report['incomplete'] = {}
|
68
|
-
report['incomplete']['count'] =
|
69
|
-
report['incomplete']['data'] =
|
111
|
+
report['incomplete']['count'] = incomplete_result.count
|
112
|
+
report['incomplete']['data'] = incomplete_result if !incomplete_result.empty?
|
70
113
|
report['complete'] = {}
|
71
|
-
report['complete']['count'] =
|
114
|
+
report['complete']['count'] = complete_count_result if complete_count_result
|
72
115
|
report
|
73
116
|
end
|
74
117
|
|
@@ -119,6 +162,8 @@ module Preservation
|
|
119
162
|
preserved
|
120
163
|
end
|
121
164
|
|
165
|
+
private
|
166
|
+
|
122
167
|
# Db
|
123
168
|
#
|
124
169
|
# @return [SQLite3::Database]
|
@@ -144,6 +189,10 @@ module Preservation
|
|
144
189
|
o['current'] = current if current
|
145
190
|
o['id'] = id if id
|
146
191
|
o['uuid'] = uuid if !uuid.nil? && !uuid.empty?
|
192
|
+
path = "#{Preservation.ingest_path}/#{bin_path}"
|
193
|
+
if File.exist? path
|
194
|
+
o['path_timestamp'] = File.mtime path
|
195
|
+
end
|
147
196
|
o
|
148
197
|
end
|
149
198
|
|
@@ -29,9 +29,12 @@ module Preservation
|
|
29
29
|
# @param uuid [String] uuid to preserve
|
30
30
|
# @param dir_scheme [Symbol] how to make directory name
|
31
31
|
# @param delay [Integer] days to wait (after modification date) before preserving
|
32
|
+
# @return [Boolean] indicates presence of metadata description file
|
32
33
|
def prepare_dataset(uuid: nil,
|
33
34
|
dir_scheme: :uuid,
|
34
35
|
delay: 0)
|
36
|
+
success = false
|
37
|
+
|
35
38
|
if uuid.nil?
|
36
39
|
@logger.error 'Missing ' + uuid
|
37
40
|
exit
|
@@ -49,6 +52,7 @@ module Preservation
|
|
49
52
|
@logger.error 'No metadata for ' + uuid
|
50
53
|
exit
|
51
54
|
end
|
55
|
+
|
52
56
|
# configurable to become more human-readable
|
53
57
|
dir_name = Preservation::Builder.build_directory_name(d, dir_scheme)
|
54
58
|
|
@@ -115,6 +119,7 @@ module Preservation
|
|
115
119
|
# puts pretty
|
116
120
|
File.write(metadata_filename,pretty)
|
117
121
|
@logger.info 'Created ' + metadata_filename
|
122
|
+
success = true
|
118
123
|
else
|
119
124
|
@logger.info 'Skipping ' + dir_name + ', Pure UUID ' + d['uuid'] +
|
120
125
|
' because ' + metadata_filename + ' exists'
|
@@ -122,6 +127,49 @@ module Preservation
|
|
122
127
|
else
|
123
128
|
@logger.info 'Skipping ' + dir_name + ', Pure UUID ' + d['uuid']
|
124
129
|
end
|
130
|
+
success
|
131
|
+
end
|
132
|
+
|
133
|
+
# For multiple datasets, if necessary, fetch the metadata,
|
134
|
+
# prepare a directory in the ingest path and populate it with the files and
|
135
|
+
# JSON description file.
|
136
|
+
#
|
137
|
+
# @param max [Integer] maximum to prepare, omit to set no maximum
|
138
|
+
# @param dir_scheme [Symbol] how to make directory name
|
139
|
+
# @param delay [Integer] days to wait (after modification date) before preserving
|
140
|
+
def prepare_dataset_batch(max: nil,
|
141
|
+
dir_scheme: :uuid,
|
142
|
+
delay: 30)
|
143
|
+
collection = Puree::Collection.new resource: :dataset,
|
144
|
+
base_url: @base_url,
|
145
|
+
username: @username,
|
146
|
+
password: @password,
|
147
|
+
basic_auth: @basic_auth
|
148
|
+
count = collection.count
|
149
|
+
|
150
|
+
max = count if max.nil?
|
151
|
+
|
152
|
+
batch_size = 10
|
153
|
+
num_prepared = 0
|
154
|
+
0.step(count, batch_size) do |n|
|
155
|
+
|
156
|
+
minimal_metadata = collection.find limit: batch_size,
|
157
|
+
offset: n,
|
158
|
+
full: false
|
159
|
+
uuids = []
|
160
|
+
minimal_metadata.each do |i|
|
161
|
+
uuids << i['uuid']
|
162
|
+
end
|
163
|
+
|
164
|
+
uuids.each do |uuid|
|
165
|
+
success = prepare_dataset uuid: uuid,
|
166
|
+
dir_scheme: dir_scheme.to_sym,
|
167
|
+
delay: delay
|
168
|
+
|
169
|
+
num_prepared += 1 if success
|
170
|
+
exit if num_prepared == max
|
171
|
+
end
|
172
|
+
end
|
125
173
|
end
|
126
174
|
|
127
175
|
private
|
data/lib/preservation/version.rb
CHANGED
data/preservation.gemspec
CHANGED
@@ -21,6 +21,6 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.required_ruby_version = '~> 2.1'
|
22
22
|
|
23
23
|
spec.add_runtime_dependency 'free_disk_space', '~> 1.0'
|
24
|
-
spec.add_runtime_dependency 'puree', '~> 0.
|
24
|
+
spec.add_runtime_dependency 'puree', '~> 0.19'
|
25
25
|
spec.add_runtime_dependency 'sqlite3', '~> 1.3'
|
26
26
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: preservation
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adrian Albin-Clark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-09-
|
11
|
+
date: 2016-09-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: free_disk_space
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '0.
|
33
|
+
version: '0.19'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '0.
|
40
|
+
version: '0.19'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: sqlite3
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|