preservation 0.2.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8a7fb8d9bbb3ee395f23b470f0fba14641bba7d5
4
- data.tar.gz: c2452352ba383321304b6a8c030bf514d8956113
3
+ metadata.gz: 922ab4347457976c6f59c174922cec3699ec186b
4
+ data.tar.gz: 52fcc64ecab218ffa17f270d69d954d81c779142
5
5
  SHA512:
6
- metadata.gz: 33dddbb76619f3b188644c1d7c0fba431e3559967fdc307cc363e6f8fe7054c4fb363f2604980bced5ee9477f4d7fb1511ce2e5a8d46ec17d788d8bbde27e6e7
7
- data.tar.gz: a268d12ff6c6e6955ebd678c479ece0433ba341032480ff87993ec35c549ba2af4fdb9a5b3171468bb3dd3306d0d71b5cfda0ab1067100c20577085e4ab37366
6
+ metadata.gz: 9dba19729c2c899a7e9e1399f66e95eaaaf3ede7d15adfce8d3f7b8c43d0ff932e014e261bcd3aec961335f8b41ee96f3f942a1e7a131b953ccd2662a1b44029
7
+ data.tar.gz: 42fd26b22096093d0f199b69622f8d4e92f100f7ad2d259cd43b27eead7357ca5fa4054204568439211f7ee3674f8af7e43ea07f6663cf365f9223ca0240e2f2
@@ -4,6 +4,11 @@ This project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ ## 0.3.0 - 2016-09-30
8
+ ### Added
9
+ - Transfer - prepare batches of datasets.
10
+ - Reporting - pending transfers.
11
+
7
12
  ## 0.2.2 - 2016-09-28
8
13
  ### Fixed
9
14
  - Transfer - related work as simple array in metadata.
data/PITCHME.md CHANGED
@@ -84,17 +84,9 @@ Preservation::Storage.cleanup
84
84
  "positive selection"
85
85
  ],
86
86
  "dcterms.license": "CC BY",
87
- "related": [
88
- {
89
- "dc.title": "The unprecedented scale of the West African Ebola virus disease outbreak is due to environmental and sociological factors, not special attributes of the currently circulating strain of the virus",
90
- "type": "Journal article",
91
- "dc.identifier": "http://dx.doi.org/10.1136/ebmed-2014-110127"
92
- },
93
- {
94
- "dc.title": "The 2014 Ebola virus disease outbreak in West Africa",
95
- "type": "Journal article",
96
- "dc.identifier": "http://dx.doi.org/10.1099/vir.0.067199-0"
97
- }
87
+ "dc.relation": [
88
+ "http://dx.doi.org/10.1136/ebmed-2014-110127",
89
+ "http://dx.doi.org/10.1099/vir.0.067199-0"
98
90
  ]
99
91
  }
100
92
  ]
@@ -31,7 +31,7 @@ module Preservation
31
31
 
32
32
  # Build directory name
33
33
  #
34
- # @param metadata record [Hash]
34
+ # @param metadata_record [Hash]
35
35
  # @param directory_name_scheme [Symbol]
36
36
  # @return [String]
37
37
  def self.build_directory_name(metadata_record, directory_name_scheme)
@@ -5,7 +5,7 @@ module Preservation
5
5
  module Conversion
6
6
  # Binary to hexadecimal
7
7
  #
8
- # @param [Binary String]
8
+ # @param s [Binary String]
9
9
  # @return [Hexadecimal String]
10
10
  def self.bin_to_hex(s)
11
11
  s.each_byte.map { |b| b.to_s(16) }.join
@@ -13,7 +13,7 @@ module Preservation
13
13
 
14
14
  # Hexadecimal to binary
15
15
  #
16
- # @param [Hexadecimal String]
16
+ # @param s [Hexadecimal String]
17
17
  # @return [Binary String]
18
18
  def self.hex_to_bin(s)
19
19
  s.scan(/../).map { |x| x.hex.chr }.join
@@ -10,6 +10,7 @@ module Preservation
10
10
  #
11
11
  # @param status_to_find [String]
12
12
  # @param status_presence [Boolean]
13
+ # @return [Array<Hash>]
13
14
  def self.status(status_to_find: nil, status_presence: true)
14
15
  if status_presence === true
15
16
  status_presence = '='
@@ -22,7 +23,10 @@ module Preservation
22
23
  records = []
23
24
  db.results_as_hash = true
24
25
  db.execute( query, [ status_to_find ] ) do |row|
25
- records << row_to_hash(row)
26
+ bin_path = Preservation::Conversion.hex_to_bin row['hex_path']
27
+ if !bin_path.nil? && !bin_path.empty?
28
+ records << row_to_hash(row)
29
+ end
26
30
  end
27
31
 
28
32
  records
@@ -53,22 +57,61 @@ module Preservation
53
57
  db.get_first_value( query, [status_to_find] )
54
58
  end
55
59
 
60
+ # Pending transfers
61
+ #
62
+ # @return [Hash]
63
+ def self.pending
64
+ # Get the directories
65
+ dirs = Dir.entries Preservation.ingest_path
66
+ a = []
67
+ # For each directory, if it isn't in the db, add it to list
68
+ dirs.each do |dir|
69
+ next if !in_db?(dir)
70
+ o = {}
71
+ o['path'] = dir
72
+ o['path_timestamp'] = File.mtime "#{Preservation.ingest_path}/#{dir}"
73
+ a << o
74
+ end
75
+ a
76
+ end
77
+
78
+ # Is there a pending transfer with this path?
79
+ #
80
+ # @return [Boolean]
81
+ def self.pending?(path_to_find)
82
+ is_pending = false
83
+ pending.each do |i|
84
+ if i['path'] == path_to_find
85
+ is_pending = true
86
+ break
87
+ end
88
+ end
89
+ is_pending
90
+ end
91
+
92
+
56
93
  # Compilation of statistics and data, with focus on exceptions
57
94
  #
58
95
  # @return [Hash]
59
96
  def self.exception
60
- incomplete = status(status_to_find: 'COMPLETE', status_presence: false)
61
- failed = status(status_to_find: 'FAILED', status_presence: true)
97
+ incomplete_result = status(status_to_find: 'COMPLETE', status_presence: false)
98
+ failed_result = status(status_to_find: 'FAILED', status_presence: true)
99
+ pending_result = pending
100
+ current_result = current
101
+ complete_count_result = complete_count
62
102
  report = {}
63
- report['current'] = current if !current.empty?
103
+ report['pending'] = {}
104
+ report['pending']['count'] = pending_result.count
105
+ report['pending']['data'] = pending_result if !pending_result.empty?
106
+ report['current'] = current_result if !current_result.empty?
64
107
  report['failed'] = {}
65
- report['failed']['count'] = failed.count
66
- report['failed']['data'] = failed if !failed.empty?
108
+ report['failed']['count'] = failed_result.count
109
+ report['failed']['data'] = failed_result if !failed_result.empty?
67
110
  report['incomplete'] = {}
68
- report['incomplete']['count'] = incomplete.count
69
- report['incomplete']['data'] = incomplete if !incomplete.empty?
111
+ report['incomplete']['count'] = incomplete_result.count
112
+ report['incomplete']['data'] = incomplete_result if !incomplete_result.empty?
70
113
  report['complete'] = {}
71
- report['complete']['count'] = complete_count if complete_count
114
+ report['complete']['count'] = complete_count_result if complete_count_result
72
115
  report
73
116
  end
74
117
 
@@ -119,6 +162,8 @@ module Preservation
119
162
  preserved
120
163
  end
121
164
 
165
+ private
166
+
122
167
  # Db
123
168
  #
124
169
  # @return [SQLite3::Database]
@@ -144,6 +189,10 @@ module Preservation
144
189
  o['current'] = current if current
145
190
  o['id'] = id if id
146
191
  o['uuid'] = uuid if !uuid.nil? && !uuid.empty?
192
+ path = "#{Preservation.ingest_path}/#{bin_path}"
193
+ if File.exist? path
194
+ o['path_timestamp'] = File.mtime path
195
+ end
147
196
  o
148
197
  end
149
198
 
@@ -29,9 +29,12 @@ module Preservation
29
29
  # @param uuid [String] uuid to preserve
30
30
  # @param dir_scheme [Symbol] how to make directory name
31
31
  # @param delay [Integer] days to wait (after modification date) before preserving
32
+ # @return [Boolean] indicates presence of metadata description file
32
33
  def prepare_dataset(uuid: nil,
33
34
  dir_scheme: :uuid,
34
35
  delay: 0)
36
+ success = false
37
+
35
38
  if uuid.nil?
36
39
  @logger.error 'Missing ' + uuid
37
40
  exit
@@ -49,6 +52,7 @@ module Preservation
49
52
  @logger.error 'No metadata for ' + uuid
50
53
  exit
51
54
  end
55
+
52
56
  # configurable to become more human-readable
53
57
  dir_name = Preservation::Builder.build_directory_name(d, dir_scheme)
54
58
 
@@ -115,6 +119,7 @@ module Preservation
115
119
  # puts pretty
116
120
  File.write(metadata_filename,pretty)
117
121
  @logger.info 'Created ' + metadata_filename
122
+ success = true
118
123
  else
119
124
  @logger.info 'Skipping ' + dir_name + ', Pure UUID ' + d['uuid'] +
120
125
  ' because ' + metadata_filename + ' exists'
@@ -122,6 +127,49 @@ module Preservation
122
127
  else
123
128
  @logger.info 'Skipping ' + dir_name + ', Pure UUID ' + d['uuid']
124
129
  end
130
+ success
131
+ end
132
+
133
+ # For multiple datasets, if necessary, fetch the metadata,
134
+ # prepare a directory in the ingest path and populate it with the files and
135
+ # JSON description file.
136
+ #
137
+ # @param max [Integer] maximum to prepare, omit to set no maximum
138
+ # @param dir_scheme [Symbol] how to make directory name
139
+ # @param delay [Integer] days to wait (after modification date) before preserving
140
+ def prepare_dataset_batch(max: nil,
141
+ dir_scheme: :uuid,
142
+ delay: 30)
143
+ collection = Puree::Collection.new resource: :dataset,
144
+ base_url: @base_url,
145
+ username: @username,
146
+ password: @password,
147
+ basic_auth: @basic_auth
148
+ count = collection.count
149
+
150
+ max = count if max.nil?
151
+
152
+ batch_size = 10
153
+ num_prepared = 0
154
+ 0.step(count, batch_size) do |n|
155
+
156
+ minimal_metadata = collection.find limit: batch_size,
157
+ offset: n,
158
+ full: false
159
+ uuids = []
160
+ minimal_metadata.each do |i|
161
+ uuids << i['uuid']
162
+ end
163
+
164
+ uuids.each do |uuid|
165
+ success = prepare_dataset uuid: uuid,
166
+ dir_scheme: dir_scheme.to_sym,
167
+ delay: delay
168
+
169
+ num_prepared += 1 if success
170
+ exit if num_prepared == max
171
+ end
172
+ end
125
173
  end
126
174
 
127
175
  private
@@ -1,5 +1,5 @@
1
1
  module Preservation
2
2
  # Semantic version number
3
3
  #
4
- VERSION = "0.2.2"
4
+ VERSION = "0.4.0"
5
5
  end
@@ -21,6 +21,6 @@ Gem::Specification.new do |spec|
21
21
  spec.required_ruby_version = '~> 2.1'
22
22
 
23
23
  spec.add_runtime_dependency 'free_disk_space', '~> 1.0'
24
- spec.add_runtime_dependency 'puree', '~> 0.17'
24
+ spec.add_runtime_dependency 'puree', '~> 0.19'
25
25
  spec.add_runtime_dependency 'sqlite3', '~> 1.3'
26
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: preservation
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adrian Albin-Clark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-09-28 00:00:00.000000000 Z
11
+ date: 2016-09-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: free_disk_space
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0.17'
33
+ version: '0.19'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0.17'
40
+ version: '0.19'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: sqlite3
43
43
  requirement: !ruby/object:Gem::Requirement