preservation 0.2.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/PITCHME.md +3 -11
- data/lib/preservation/builder.rb +1 -1
- data/lib/preservation/conversion.rb +2 -2
- data/lib/preservation/report/transfer.rb +58 -9
- data/lib/preservation/transfer/pure.rb +48 -0
- data/lib/preservation/version.rb +1 -1
- data/preservation.gemspec +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 922ab4347457976c6f59c174922cec3699ec186b
|
4
|
+
data.tar.gz: 52fcc64ecab218ffa17f270d69d954d81c779142
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9dba19729c2c899a7e9e1399f66e95eaaaf3ede7d15adfce8d3f7b8c43d0ff932e014e261bcd3aec961335f8b41ee96f3f942a1e7a131b953ccd2662a1b44029
|
7
|
+
data.tar.gz: 42fd26b22096093d0f199b69622f8d4e92f100f7ad2d259cd43b27eead7357ca5fa4054204568439211f7ee3674f8af7e43ea07f6663cf365f9223ca0240e2f2
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,11 @@ This project adheres to [Semantic Versioning](http://semver.org/).
|
|
4
4
|
|
5
5
|
## Unreleased
|
6
6
|
|
7
|
+
## 0.3.0 - 2016-09-30
|
8
|
+
### Added
|
9
|
+
- Transfer - prepare batches of datasets.
|
10
|
+
- Reporting - pending transfers.
|
11
|
+
|
7
12
|
## 0.2.2 - 2016-09-28
|
8
13
|
### Fixed
|
9
14
|
- Transfer - related work as simple array in metadata.
|
data/PITCHME.md
CHANGED
@@ -84,17 +84,9 @@ Preservation::Storage.cleanup
|
|
84
84
|
"positive selection"
|
85
85
|
],
|
86
86
|
"dcterms.license": "CC BY",
|
87
|
-
"
|
88
|
-
|
89
|
-
|
90
|
-
"type": "Journal article",
|
91
|
-
"dc.identifier": "http://dx.doi.org/10.1136/ebmed-2014-110127"
|
92
|
-
},
|
93
|
-
{
|
94
|
-
"dc.title": "The 2014 Ebola virus disease outbreak in West Africa",
|
95
|
-
"type": "Journal article",
|
96
|
-
"dc.identifier": "http://dx.doi.org/10.1099/vir.0.067199-0"
|
97
|
-
}
|
87
|
+
"dc.relation": [
|
88
|
+
"http://dx.doi.org/10.1136/ebmed-2014-110127",
|
89
|
+
"http://dx.doi.org/10.1099/vir.0.067199-0"
|
98
90
|
]
|
99
91
|
}
|
100
92
|
]
|
data/lib/preservation/builder.rb
CHANGED
@@ -31,7 +31,7 @@ module Preservation
|
|
31
31
|
|
32
32
|
# Build directory name
|
33
33
|
#
|
34
|
-
# @param
|
34
|
+
# @param metadata_record [Hash]
|
35
35
|
# @param directory_name_scheme [Symbol]
|
36
36
|
# @return [String]
|
37
37
|
def self.build_directory_name(metadata_record, directory_name_scheme)
|
@@ -5,7 +5,7 @@ module Preservation
|
|
5
5
|
module Conversion
|
6
6
|
# Binary to hexadecimal
|
7
7
|
#
|
8
|
-
# @param [Binary String]
|
8
|
+
# @param s [Binary String]
|
9
9
|
# @return [Hexadecimal String]
|
10
10
|
def self.bin_to_hex(s)
|
11
11
|
s.each_byte.map { |b| b.to_s(16) }.join
|
@@ -13,7 +13,7 @@ module Preservation
|
|
13
13
|
|
14
14
|
# Hexadecimal to binary
|
15
15
|
#
|
16
|
-
# @param [Hexadecimal String]
|
16
|
+
# @param s [Hexadecimal String]
|
17
17
|
# @return [Binary String]
|
18
18
|
def self.hex_to_bin(s)
|
19
19
|
s.scan(/../).map { |x| x.hex.chr }.join
|
@@ -10,6 +10,7 @@ module Preservation
|
|
10
10
|
#
|
11
11
|
# @param status_to_find [String]
|
12
12
|
# @param status_presence [Boolean]
|
13
|
+
# @return [Array<Hash>]
|
13
14
|
def self.status(status_to_find: nil, status_presence: true)
|
14
15
|
if status_presence === true
|
15
16
|
status_presence = '='
|
@@ -22,7 +23,10 @@ module Preservation
|
|
22
23
|
records = []
|
23
24
|
db.results_as_hash = true
|
24
25
|
db.execute( query, [ status_to_find ] ) do |row|
|
25
|
-
|
26
|
+
bin_path = Preservation::Conversion.hex_to_bin row['hex_path']
|
27
|
+
if !bin_path.nil? && !bin_path.empty?
|
28
|
+
records << row_to_hash(row)
|
29
|
+
end
|
26
30
|
end
|
27
31
|
|
28
32
|
records
|
@@ -53,22 +57,61 @@ module Preservation
|
|
53
57
|
db.get_first_value( query, [status_to_find] )
|
54
58
|
end
|
55
59
|
|
60
|
+
# Pending transfers
|
61
|
+
#
|
62
|
+
# @return [Hash]
|
63
|
+
def self.pending
|
64
|
+
# Get the directories
|
65
|
+
dirs = Dir.entries Preservation.ingest_path
|
66
|
+
a = []
|
67
|
+
# For each directory, if it isn't in the db, add it to list
|
68
|
+
dirs.each do |dir|
|
69
|
+
next if !in_db?(dir)
|
70
|
+
o = {}
|
71
|
+
o['path'] = dir
|
72
|
+
o['path_timestamp'] = File.mtime "#{Preservation.ingest_path}/#{dir}"
|
73
|
+
a << o
|
74
|
+
end
|
75
|
+
a
|
76
|
+
end
|
77
|
+
|
78
|
+
# Is there a pending transfer with this path?
|
79
|
+
#
|
80
|
+
# @return [Boolean]
|
81
|
+
def self.pending?(path_to_find)
|
82
|
+
is_pending = false
|
83
|
+
pending.each do |i|
|
84
|
+
if i['path'] == path_to_find
|
85
|
+
is_pending = true
|
86
|
+
break
|
87
|
+
end
|
88
|
+
end
|
89
|
+
is_pending
|
90
|
+
end
|
91
|
+
|
92
|
+
|
56
93
|
# Compilation of statistics and data, with focus on exceptions
|
57
94
|
#
|
58
95
|
# @return [Hash]
|
59
96
|
def self.exception
|
60
|
-
|
61
|
-
|
97
|
+
incomplete_result = status(status_to_find: 'COMPLETE', status_presence: false)
|
98
|
+
failed_result = status(status_to_find: 'FAILED', status_presence: true)
|
99
|
+
pending_result = pending
|
100
|
+
current_result = current
|
101
|
+
complete_count_result = complete_count
|
62
102
|
report = {}
|
63
|
-
report['
|
103
|
+
report['pending'] = {}
|
104
|
+
report['pending']['count'] = pending_result.count
|
105
|
+
report['pending']['data'] = pending_result if !pending_result.empty?
|
106
|
+
report['current'] = current_result if !current_result.empty?
|
64
107
|
report['failed'] = {}
|
65
|
-
report['failed']['count'] =
|
66
|
-
report['failed']['data'] =
|
108
|
+
report['failed']['count'] = failed_result.count
|
109
|
+
report['failed']['data'] = failed_result if !failed_result.empty?
|
67
110
|
report['incomplete'] = {}
|
68
|
-
report['incomplete']['count'] =
|
69
|
-
report['incomplete']['data'] =
|
111
|
+
report['incomplete']['count'] = incomplete_result.count
|
112
|
+
report['incomplete']['data'] = incomplete_result if !incomplete_result.empty?
|
70
113
|
report['complete'] = {}
|
71
|
-
report['complete']['count'] =
|
114
|
+
report['complete']['count'] = complete_count_result if complete_count_result
|
72
115
|
report
|
73
116
|
end
|
74
117
|
|
@@ -119,6 +162,8 @@ module Preservation
|
|
119
162
|
preserved
|
120
163
|
end
|
121
164
|
|
165
|
+
private
|
166
|
+
|
122
167
|
# Db
|
123
168
|
#
|
124
169
|
# @return [SQLite3::Database]
|
@@ -144,6 +189,10 @@ module Preservation
|
|
144
189
|
o['current'] = current if current
|
145
190
|
o['id'] = id if id
|
146
191
|
o['uuid'] = uuid if !uuid.nil? && !uuid.empty?
|
192
|
+
path = "#{Preservation.ingest_path}/#{bin_path}"
|
193
|
+
if File.exist? path
|
194
|
+
o['path_timestamp'] = File.mtime path
|
195
|
+
end
|
147
196
|
o
|
148
197
|
end
|
149
198
|
|
@@ -29,9 +29,12 @@ module Preservation
|
|
29
29
|
# @param uuid [String] uuid to preserve
|
30
30
|
# @param dir_scheme [Symbol] how to make directory name
|
31
31
|
# @param delay [Integer] days to wait (after modification date) before preserving
|
32
|
+
# @return [Boolean] indicates presence of metadata description file
|
32
33
|
def prepare_dataset(uuid: nil,
|
33
34
|
dir_scheme: :uuid,
|
34
35
|
delay: 0)
|
36
|
+
success = false
|
37
|
+
|
35
38
|
if uuid.nil?
|
36
39
|
@logger.error 'Missing ' + uuid
|
37
40
|
exit
|
@@ -49,6 +52,7 @@ module Preservation
|
|
49
52
|
@logger.error 'No metadata for ' + uuid
|
50
53
|
exit
|
51
54
|
end
|
55
|
+
|
52
56
|
# configurable to become more human-readable
|
53
57
|
dir_name = Preservation::Builder.build_directory_name(d, dir_scheme)
|
54
58
|
|
@@ -115,6 +119,7 @@ module Preservation
|
|
115
119
|
# puts pretty
|
116
120
|
File.write(metadata_filename,pretty)
|
117
121
|
@logger.info 'Created ' + metadata_filename
|
122
|
+
success = true
|
118
123
|
else
|
119
124
|
@logger.info 'Skipping ' + dir_name + ', Pure UUID ' + d['uuid'] +
|
120
125
|
' because ' + metadata_filename + ' exists'
|
@@ -122,6 +127,49 @@ module Preservation
|
|
122
127
|
else
|
123
128
|
@logger.info 'Skipping ' + dir_name + ', Pure UUID ' + d['uuid']
|
124
129
|
end
|
130
|
+
success
|
131
|
+
end
|
132
|
+
|
133
|
+
# For multiple datasets, if necessary, fetch the metadata,
|
134
|
+
# prepare a directory in the ingest path and populate it with the files and
|
135
|
+
# JSON description file.
|
136
|
+
#
|
137
|
+
# @param max [Integer] maximum to prepare, omit to set no maximum
|
138
|
+
# @param dir_scheme [Symbol] how to make directory name
|
139
|
+
# @param delay [Integer] days to wait (after modification date) before preserving
|
140
|
+
def prepare_dataset_batch(max: nil,
|
141
|
+
dir_scheme: :uuid,
|
142
|
+
delay: 30)
|
143
|
+
collection = Puree::Collection.new resource: :dataset,
|
144
|
+
base_url: @base_url,
|
145
|
+
username: @username,
|
146
|
+
password: @password,
|
147
|
+
basic_auth: @basic_auth
|
148
|
+
count = collection.count
|
149
|
+
|
150
|
+
max = count if max.nil?
|
151
|
+
|
152
|
+
batch_size = 10
|
153
|
+
num_prepared = 0
|
154
|
+
0.step(count, batch_size) do |n|
|
155
|
+
|
156
|
+
minimal_metadata = collection.find limit: batch_size,
|
157
|
+
offset: n,
|
158
|
+
full: false
|
159
|
+
uuids = []
|
160
|
+
minimal_metadata.each do |i|
|
161
|
+
uuids << i['uuid']
|
162
|
+
end
|
163
|
+
|
164
|
+
uuids.each do |uuid|
|
165
|
+
success = prepare_dataset uuid: uuid,
|
166
|
+
dir_scheme: dir_scheme.to_sym,
|
167
|
+
delay: delay
|
168
|
+
|
169
|
+
num_prepared += 1 if success
|
170
|
+
exit if num_prepared == max
|
171
|
+
end
|
172
|
+
end
|
125
173
|
end
|
126
174
|
|
127
175
|
private
|
data/lib/preservation/version.rb
CHANGED
data/preservation.gemspec
CHANGED
@@ -21,6 +21,6 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.required_ruby_version = '~> 2.1'
|
22
22
|
|
23
23
|
spec.add_runtime_dependency 'free_disk_space', '~> 1.0'
|
24
|
-
spec.add_runtime_dependency 'puree', '~> 0.
|
24
|
+
spec.add_runtime_dependency 'puree', '~> 0.19'
|
25
25
|
spec.add_runtime_dependency 'sqlite3', '~> 1.3'
|
26
26
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: preservation
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adrian Albin-Clark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-09-
|
11
|
+
date: 2016-09-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: free_disk_space
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '0.
|
33
|
+
version: '0.19'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '0.
|
40
|
+
version: '0.19'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: sqlite3
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|