research_metadata_batch 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -1
- data/Gemfile.lock +3 -3
- data/README.md +26 -22
- data/lib/research_metadata_batch/base.rb +33 -47
- data/lib/research_metadata_batch/research_output.rb +1 -1
- data/lib/research_metadata_batch/{custom.rb → shared.rb} +7 -24
- data/lib/research_metadata_batch/version.rb +1 -1
- data/research_metadata_batch.gemspec +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d27b60c2067c5d5faae6b2680f50186e4087c146bdb076f65955dfd05172397
|
4
|
+
data.tar.gz: 25c9789f3452dc091c2b180eac05f9038ff63f10152eaee9cc4a4fd68be2e862
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 103ffbdd89ab5f0056608f07c3f30fb388f294db3ec882cb5e3b4e729c83b0bfedac16ef5047b17b785c7988f065cab55fb32ca4311064bf50879fc1321dc70e
|
7
|
+
data.tar.gz: 15e0e3496b5194d4a15b1f07a9c8e69d64412e41622a352d62fc5f82dbc16d8dad621494c81e09b01b5c8213a19dbeb53221621e802a4616ecb1ad7b1df19ecf
|
data/CHANGELOG.md
CHANGED
@@ -2,9 +2,17 @@
|
|
2
2
|
All notable changes to this project will be documented in this file.
|
3
3
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## 0.3.0 - 2018-10-12
|
6
|
+
### Added
|
7
|
+
- Support for GET and POST parameters.
|
8
|
+
- Simplified logging.
|
9
|
+
|
10
|
+
### Changed
|
11
|
+
- Record values matching rather than validation.
|
12
|
+
|
5
13
|
## 0.2.0 - 2018-10-05
|
6
14
|
### Changed
|
7
|
-
- Record validation
|
15
|
+
- Record validation.
|
8
16
|
|
9
17
|
## 0.1.0 - 2018-09-27
|
10
18
|
### Added
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
research_metadata_batch (0.
|
5
|
-
puree (~> 2.
|
4
|
+
research_metadata_batch (0.3.0)
|
5
|
+
puree (~> 2.3)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
@@ -24,7 +24,7 @@ GEM
|
|
24
24
|
nokogiri (1.8.4)
|
25
25
|
mini_portile2 (~> 2.3.0)
|
26
26
|
public_suffix (3.0.3)
|
27
|
-
puree (2.
|
27
|
+
puree (2.3.0)
|
28
28
|
http (~> 2.0)
|
29
29
|
nokogiri (~> 1.6)
|
30
30
|
unf (0.1.4)
|
data/README.md
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
# Research Metadata Batch
|
2
|
-
|
3
|
-
defined in user-defined applications.
|
2
|
+
Batch processing for the Pure Research Information System.
|
4
3
|
|
5
4
|
## Status
|
6
5
|
|
@@ -36,21 +35,16 @@ pure_config = {
|
|
36
35
|
ResearchMetadataBatch::Dataset.new(pure_config: pure_config).process
|
37
36
|
```
|
38
37
|
|
39
|
-
##
|
40
|
-
Require this gem, then open up the base class {ResearchMetadataBatch::Base} as below. Implement methods from
|
41
|
-
{ResearchMetadataBatch::Custom} as inherited methods, including any secondary initialisation using the
|
42
|
-
``init`` method.
|
43
|
-
|
44
|
-
|
45
|
-
For resource-specific customisation, open up a resource class e.g. {ResearchMetadataBatch::Dataset}. Implement methods from
|
46
|
-
{ResearchMetadataBatch::Custom} as resource-specific methods.
|
47
|
-
|
38
|
+
## Example application
|
48
39
|
This example uses Amazon Web Services.
|
49
40
|
|
50
|
-
###
|
41
|
+
### shared.rb
|
42
|
+
Implement methods from {ResearchMetadataBatch::Shared}.
|
51
43
|
```ruby
|
52
|
-
|
53
|
-
|
44
|
+
require 'aws-sdk-s3'
|
45
|
+
|
46
|
+
module App
|
47
|
+
module Shared
|
54
48
|
def init(aws_config:)
|
55
49
|
aws_credentials = Aws::Credentials.new aws_config[:access_key_id],
|
56
50
|
aws_config[:secret_access_key]
|
@@ -66,19 +60,21 @@ module ResearchMetadataBatch
|
|
66
60
|
end
|
67
61
|
```
|
68
62
|
|
69
|
-
###
|
63
|
+
### research_output.rb
|
70
64
|
```ruby
|
71
|
-
|
72
|
-
|
73
|
-
|
65
|
+
require_relative 'shared'
|
66
|
+
|
67
|
+
module App
|
68
|
+
class ResearchOutput < ResearchMetadataBatch::ResearchOutput
|
69
|
+
include App::Shared
|
74
70
|
end
|
75
71
|
end
|
76
72
|
```
|
77
73
|
|
78
|
-
###
|
74
|
+
### script.rb
|
79
75
|
```ruby
|
80
76
|
require 'research_metadata_batch'
|
81
|
-
|
77
|
+
require_relative 'research_output'
|
82
78
|
|
83
79
|
pure_config = {
|
84
80
|
url: ENV['PURE_URL'],
|
@@ -101,7 +97,15 @@ config = {
|
|
101
97
|
log_file: log_file
|
102
98
|
}
|
103
99
|
|
104
|
-
batch =
|
100
|
+
batch = App::ResearchOutput.new config
|
105
101
|
batch.init aws_config: aws_config
|
106
|
-
|
102
|
+
params = {
|
103
|
+
size: 50,
|
104
|
+
typeUri: [
|
105
|
+
'/dk/atira/pure/researchoutput/researchoutputtypes/contributiontojournal/article',
|
106
|
+
'/dk/atira/pure/researchoutput/researchoutputtypes/contributiontoconference/paper'
|
107
|
+
]
|
108
|
+
}
|
109
|
+
batch.process params: params
|
110
|
+
|
107
111
|
```
|
@@ -1,12 +1,12 @@
|
|
1
1
|
require 'logger'
|
2
2
|
require 'puree'
|
3
|
-
require_relative '
|
3
|
+
require_relative 'shared'
|
4
4
|
|
5
5
|
module ResearchMetadataBatch
|
6
6
|
# @note Not to be used directly
|
7
7
|
class Base
|
8
|
-
include ResearchMetadataBatch::
|
9
|
-
|
8
|
+
include ResearchMetadataBatch::Shared
|
9
|
+
# @param pure_config [Hash]
|
10
10
|
# @option config [String] :url
|
11
11
|
# @option config [String] :username
|
12
12
|
# @option config [String] :password
|
@@ -21,22 +21,18 @@ module ResearchMetadataBatch
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
+
# @param params [Hash] Combined GET and POST parameters for all records
|
24
25
|
# @param max [Fixnum] Number of records to act upon. Omit to act upon as many as possible.
|
25
|
-
# @param
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
preflight
|
36
|
-
@logger.info preflight_success_log_message
|
37
|
-
rescue => error
|
38
|
-
@logger.info preflight_error_log_message(error)
|
39
|
-
end
|
26
|
+
# @param delay [Fixnum] Delay in seconds between batches.
|
27
|
+
def process(params: {}, max: nil, delay: 0)
|
28
|
+
offset = params[:offset]
|
29
|
+
records_available = resource_count params
|
30
|
+
@logger.info "PURE_RECORDS_AVAILABLE=#{records_available}"
|
31
|
+
begin
|
32
|
+
preflight_msg = preflight
|
33
|
+
@logger.info "PREFLIGHT=#{preflight_msg}" if preflight_msg
|
34
|
+
rescue => error
|
35
|
+
@logger.error "PREFLIGHT=#{error}"
|
40
36
|
end
|
41
37
|
|
42
38
|
if max
|
@@ -47,7 +43,7 @@ module ResearchMetadataBatch
|
|
47
43
|
qty_to_find = records_available
|
48
44
|
end
|
49
45
|
|
50
|
-
if offset < 0 || offset > records_available - 1
|
46
|
+
if !offset || offset < 0 || offset > records_available - 1
|
51
47
|
offset = 0
|
52
48
|
end
|
53
49
|
|
@@ -57,35 +53,27 @@ module ResearchMetadataBatch
|
|
57
53
|
while position < records_available
|
58
54
|
# extract from Pure
|
59
55
|
begin
|
60
|
-
|
61
|
-
|
62
|
-
|
56
|
+
params[:offset] = position
|
57
|
+
result = resource_batch params
|
58
|
+
rescue => error
|
59
|
+
@logger.error "METADATA_EXTRACTION=#{error}"
|
63
60
|
sleep 10
|
64
61
|
redo
|
65
62
|
end
|
66
63
|
|
67
64
|
result.each do |i|
|
68
65
|
|
69
|
-
|
70
|
-
|
71
|
-
@logger.warn "#{log_message_prefix(position, i.uuid)} - VALIDATION_ERROR=#{record_validation_error}"
|
66
|
+
unless valid? i
|
67
|
+
@logger.info "#{log_message_prefix(position, i.uuid)} : VALID=false"
|
72
68
|
position += 1
|
73
69
|
next
|
74
70
|
end
|
75
71
|
|
76
72
|
begin
|
77
|
-
|
78
|
-
|
79
|
-
else
|
80
|
-
act_msg = mock_act i
|
81
|
-
end
|
82
|
-
if act_msg
|
83
|
-
@logger.info "#{log_message_prefix(position, i.uuid)} - #{act_success_log_message(i, act_msg)}"
|
84
|
-
else
|
85
|
-
@logger.info "#{log_message_prefix(position, i.uuid)}"
|
86
|
-
end
|
73
|
+
act_msg = act i
|
74
|
+
@logger.info "#{log_message_prefix(position, i.uuid)} : #{act_msg}"
|
87
75
|
rescue => error
|
88
|
-
@logger.error "#{log_message_prefix(position, i.uuid)}
|
76
|
+
@logger.error "#{log_message_prefix(position, i.uuid)} : #{error}"
|
89
77
|
end
|
90
78
|
|
91
79
|
position += 1
|
@@ -98,37 +86,35 @@ module ResearchMetadataBatch
|
|
98
86
|
|
99
87
|
# handle error response
|
100
88
|
if result.empty?
|
101
|
-
@logger.error "PURE_RECORD=#{position}
|
89
|
+
@logger.error "PURE_RECORD=#{position} : METADATA_EXTRACTION=No data"
|
102
90
|
position += 1
|
103
91
|
end
|
104
92
|
|
105
93
|
sleep delay
|
106
94
|
end
|
107
95
|
|
108
|
-
@logger.info "
|
96
|
+
@logger.info "PURE_RECORDS_AVAILABLE=#{records_available}"
|
109
97
|
|
110
98
|
end
|
111
99
|
|
112
100
|
private
|
113
101
|
|
114
|
-
def act(model)
|
115
|
-
puts model.inspect
|
116
|
-
end
|
117
|
-
|
118
102
|
# @return [String]
|
119
103
|
def log_message_prefix(pure_record, pure_uuid)
|
120
|
-
"PURE_RECORD=#{pure_record}
|
104
|
+
"PURE_RECORD=#{pure_record} : PURE_UUID=#{pure_uuid}"
|
121
105
|
end
|
122
106
|
|
123
|
-
def resource_count
|
107
|
+
def resource_count(params)
|
108
|
+
params = params.dup
|
124
109
|
resource_class = "Puree::Extractor::#{Puree::Util::String.titleize(@resource_type)}"
|
125
|
-
Object.const_get(resource_class).new(@pure_config).count
|
110
|
+
Object.const_get(resource_class).new(@pure_config).count(params)
|
126
111
|
end
|
127
112
|
|
128
|
-
def resource_batch(
|
113
|
+
def resource_batch(params)
|
114
|
+
params = params.dup
|
129
115
|
resource_method = "#{@resource_type}s".to_sym
|
130
116
|
client = Puree::REST::Client.new(@pure_config).send resource_method
|
131
|
-
response = client.
|
117
|
+
response = client.all_complex params: params
|
132
118
|
Puree::XMLExtractor::Collection.send resource_method, response.to_s
|
133
119
|
end
|
134
120
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module ResearchMetadataBatch
|
2
2
|
|
3
3
|
# @note These methods (except init) are used internally by {ResearchMetadataBatch::Base#process} and have been left public for documentation purposes only
|
4
|
-
module
|
4
|
+
module Shared
|
5
5
|
|
6
6
|
# Second stage initialisation, perhaps third party services.
|
7
7
|
# @param args [Hash]
|
@@ -9,37 +9,20 @@ module ResearchMetadataBatch
|
|
9
9
|
end
|
10
10
|
|
11
11
|
# Anything to be done at the start of a batch run
|
12
|
+
# @return [String, nil] Optionally, return something to indicate what has been done.
|
12
13
|
def preflight
|
13
14
|
end
|
14
15
|
|
15
|
-
# Message when preflight method completes
|
16
|
-
# @return [String]
|
17
|
-
def preflight_success_log_message
|
18
|
-
end
|
19
|
-
|
20
|
-
# Message when preflight method does not complete
|
21
|
-
# @return [String]
|
22
|
-
def preflight_error_log_message(error)
|
23
|
-
end
|
24
|
-
|
25
16
|
# Do something with model metadata
|
26
17
|
# @return [String, nil] Optionally, return something transaction-specific, such as a code/ID from an external service.
|
27
18
|
def act(model)
|
19
|
+
puts model.inspect
|
28
20
|
end
|
29
21
|
|
30
|
-
#
|
31
|
-
# @return [
|
32
|
-
def
|
33
|
-
|
34
|
-
|
35
|
-
# Fake doing something with model metadata
|
36
|
-
# @return [String, nil]
|
37
|
-
def mock_act(model)
|
38
|
-
end
|
39
|
-
|
40
|
-
# Check for values in metadata
|
41
|
-
# @return [String, nil]
|
42
|
-
def validate_record(model)
|
22
|
+
# Check for values in metadata.
|
23
|
+
# @return [Boolean]
|
24
|
+
def valid?(model)
|
25
|
+
true
|
43
26
|
end
|
44
27
|
|
45
28
|
end
|
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
15
15
|
spec.require_paths = ["lib"]
|
16
16
|
spec.required_ruby_version = '~> 2.1'
|
17
|
-
spec.add_dependency 'puree', '~> 2.
|
17
|
+
spec.add_dependency 'puree', '~> 2.3'
|
18
18
|
spec.metadata = {
|
19
19
|
"source_code_uri" => "https://github.com/lulibrary/research_metadata_batch"
|
20
20
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: research_metadata_batch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adrian Albin-Clark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-10-
|
11
|
+
date: 2018-10-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: puree
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '2.
|
19
|
+
version: '2.3'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '2.
|
26
|
+
version: '2.3'
|
27
27
|
description:
|
28
28
|
email:
|
29
29
|
- a.albin-clark@lancaster.ac.uk
|
@@ -39,7 +39,6 @@ files:
|
|
39
39
|
- README.md
|
40
40
|
- lib/research_metadata_batch.rb
|
41
41
|
- lib/research_metadata_batch/base.rb
|
42
|
-
- lib/research_metadata_batch/custom.rb
|
43
42
|
- lib/research_metadata_batch/dataset.rb
|
44
43
|
- lib/research_metadata_batch/event.rb
|
45
44
|
- lib/research_metadata_batch/external_organisation.rb
|
@@ -50,6 +49,7 @@ files:
|
|
50
49
|
- lib/research_metadata_batch/publisher.rb
|
51
50
|
- lib/research_metadata_batch/research_metadata_batch.rb
|
52
51
|
- lib/research_metadata_batch/research_output.rb
|
52
|
+
- lib/research_metadata_batch/shared.rb
|
53
53
|
- lib/research_metadata_batch/version.rb
|
54
54
|
- research_metadata_batch.gemspec
|
55
55
|
homepage:
|